youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

sportbox.py (3421B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     determine_ext,
      9     int_or_none,
     10     js_to_json,
     11     merge_dicts,
     12 )
     13 
     14 
     15 class SportBoxIE(InfoExtractor):
     16     _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
     17     _TESTS = [{
     18         'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
     19         'info_dict': {
     20             'id': '109158',
     21             'ext': 'mp4',
     22             'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
     23             'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
     24             'thumbnail': r're:^https?://.*\.jpg$',
     25             'duration': 292,
     26             'view_count': int,
     27             'timestamp': 1426237001,
     28             'upload_date': '20150313',
     29         },
     30         'params': {
     31             # m3u8 download
     32             'skip_download': True,
     33         },
     34     }, {
     35         'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
     36         'only_matching': True,
     37     }, {
     38         'url': 'https://news.sportbox.ru/vdl/player/media/193095',
     39         'only_matching': True,
     40     }, {
     41         'url': 'https://news.sportbox.ru/vdl/player/media/109158',
     42         'only_matching': True,
     43     }, {
     44         'url': 'https://matchtv.ru/vdl/player/media/109158',
     45         'only_matching': True,
     46     }]
     47 
     48     @staticmethod
     49     def _extract_urls(webpage):
     50         return re.findall(
     51             r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
     52             webpage)
     53 
     54     def _real_extract(self, url):
     55         video_id = self._match_id(url)
     56 
     57         webpage = self._download_webpage(url, video_id)
     58 
     59         sources = self._parse_json(
     60             self._search_regex(
     61                 r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
     62                 webpage, 'sources'),
     63             video_id, transform_source=js_to_json)
     64 
     65         formats = []
     66         for source in sources:
     67             src = source.get('src')
     68             if not src:
     69                 continue
     70             if determine_ext(src) == 'm3u8':
     71                 formats.extend(self._extract_m3u8_formats(
     72                     src, video_id, 'mp4', entry_protocol='m3u8_native',
     73                     m3u8_id='hls', fatal=False))
     74             else:
     75                 formats.append({
     76                     'url': src,
     77                 })
     78         self._sort_formats(formats)
     79 
     80         player = self._parse_json(
     81             self._search_regex(
     82                 r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage,
     83                 'player options', default='{}'),
     84             video_id, transform_source=js_to_json)
     85         media_id = player['mediaId']
     86 
     87         info = self._search_json_ld(webpage, media_id, default={})
     88 
     89         view_count = int_or_none(self._search_regex(
     90             r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
     91 
     92         return merge_dicts(info, {
     93             'id': media_id,
     94             'title': self._og_search_title(webpage, default=None) or media_id,
     95             'thumbnail': player.get('poster'),
     96             'duration': int_or_none(player.get('duration')),
     97             'view_count': view_count,
     98             'formats': formats,
     99         })