sportbox.py (3421B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 determine_ext, 9 int_or_none, 10 js_to_json, 11 merge_dicts, 12 ) 13 14 15 class SportBoxIE(InfoExtractor): 16 _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' 17 _TESTS = [{ 18 'url': 'http://news.sportbox.ru/vdl/player/ci/211355', 19 'info_dict': { 20 'id': '109158', 21 'ext': 'mp4', 22 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 23 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 24 'thumbnail': r're:^https?://.*\.jpg$', 25 'duration': 292, 26 'view_count': int, 27 'timestamp': 1426237001, 28 'upload_date': '20150313', 29 }, 30 'params': { 31 # m3u8 download 32 'skip_download': True, 33 }, 34 }, { 35 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', 36 'only_matching': True, 37 }, { 38 'url': 'https://news.sportbox.ru/vdl/player/media/193095', 39 'only_matching': True, 40 }, { 41 'url': 'https://news.sportbox.ru/vdl/player/media/109158', 42 'only_matching': True, 43 }, { 44 'url': 'https://matchtv.ru/vdl/player/media/109158', 45 'only_matching': True, 46 }] 47 48 @staticmethod 49 def _extract_urls(webpage): 50 return re.findall( 51 r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"', 52 webpage) 53 54 def _real_extract(self, url): 55 video_id = self._match_id(url) 56 57 webpage = self._download_webpage(url, video_id) 58 59 sources = self._parse_json( 60 self._search_regex( 61 r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n', 62 webpage, 'sources'), 63 video_id, transform_source=js_to_json) 64 65 formats = [] 66 for source in sources: 67 src = source.get('src') 68 if not src: 69 continue 70 if determine_ext(src) == 'm3u8': 71 formats.extend(self._extract_m3u8_formats( 72 src, video_id, 'mp4', entry_protocol='m3u8_native', 73 m3u8_id='hls', fatal=False)) 74 else: 75 formats.append({ 76 'url': src, 77 }) 78 self._sort_formats(formats) 79 80 player = self._parse_json( 81 self._search_regex( 82 r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage, 83 'player options', default='{}'), 84 video_id, transform_source=js_to_json) 85 media_id = player['mediaId'] 86 87 info = self._search_json_ld(webpage, media_id, default={}) 88 89 view_count = int_or_none(self._search_regex( 90 r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) 91 92 return merge_dicts(info, { 93 'id': media_id, 94 'title': self._og_search_title(webpage, default=None) or media_id, 95 'thumbnail': player.get('poster'), 96 'duration': int_or_none(player.get('duration')), 97 'view_count': view_count, 98 'formats': formats, 99 })