yandexvideo.py (5258B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 determine_ext, 7 int_or_none, 8 try_get, 9 url_or_none, 10 ) 11 12 13 class YandexVideoIE(InfoExtractor): 14 _VALID_URL = r'''(?x) 15 https?:// 16 (?: 17 yandex\.ru(?:/(?:portal/(?:video|efir)|efir))?/?\?.*?stream_id=| 18 frontend\.vh\.yandex\.ru/player/ 19 ) 20 (?P<id>(?:[\da-f]{32}|[\w-]{12})) 21 ''' 22 _TESTS = [{ 23 'url': 'https://yandex.ru/portal/video?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374', 24 'md5': 'e02a05bfaf0d9615ef07ae3a10f4faf4', 25 'info_dict': { 26 'id': '4dbb36ec4e0526d58f9f2dc8f0ecf374', 27 'ext': 'mp4', 28 'title': 'Русский Вудсток - главный рок-фест в истории СССР / вДудь', 29 'description': 'md5:7d6b8d4bc4a3b9a56499916c1ea5b5fa', 30 'thumbnail': r're:^https?://', 31 'timestamp': 1549972939, 32 'duration': 5575, 33 'age_limit': 18, 34 'upload_date': '20190212', 35 'view_count': int, 36 'like_count': int, 37 'dislike_count': int, 38 }, 39 }, { 40 'url': 'https://yandex.ru/portal/efir?stream_id=4dbb262b4fe5cf15a215de4f34eee34d&from=morda', 41 'only_matching': True, 42 }, { 43 'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d', 44 'only_matching': True, 45 }, { 46 'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda', 47 'only_matching': True, 48 }, { 49 # vod-episode, series episode 50 'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee', 51 'only_matching': True, 52 }, { 53 # episode, sports 54 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d', 55 'only_matching': True, 56 }, { 57 # DASH with DRM 58 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8', 59 'only_matching': True, 60 }, { 61 'url': 'https://yandex.ru/efir?stream_active=watching&stream_id=v7a2dZ-v5mSI&from_block=efir_newtab', 62 'only_matching': True, 63 }] 64 65 def _real_extract(self, url): 66 video_id = self._match_id(url) 67 68 player = try_get((self._download_json( 69 'https://frontend.vh.yandex.ru/graphql', video_id, data=('''{ 70 player(content_id: "%s") { 71 computed_title 72 content_url 73 description 74 dislikes 75 duration 76 likes 77 program_title 78 release_date 79 release_date_ut 80 release_year 81 restriction_age 82 season 83 start_time 84 streams 85 thumbnail 86 title 87 views_count 88 } 89 }''' % video_id).encode(), fatal=False)), lambda x: x['player']['content']) 90 if not player or player.get('error'): 91 player = self._download_json( 92 'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id, 93 video_id, query={ 94 'stream_options': 'hires', 95 'disable_trackings': 1, 96 }) 97 content = player['content'] 98 99 title = content.get('title') or content['computed_title'] 100 101 formats = [] 102 streams = content.get('streams') or [] 103 streams.append({'url': content.get('content_url')}) 104 for stream in streams: 105 content_url = url_or_none(stream.get('url')) 106 if not content_url: 107 continue 108 ext = determine_ext(content_url) 109 if ext == 'ismc': 110 continue 111 elif ext == 'm3u8': 112 formats.extend(self._extract_m3u8_formats( 113 content_url, video_id, 'mp4', 114 'm3u8_native', m3u8_id='hls', fatal=False)) 115 elif ext == 'mpd': 116 formats.extend(self._extract_mpd_formats( 117 content_url, video_id, mpd_id='dash', fatal=False)) 118 else: 119 formats.append({'url': content_url}) 120 121 self._sort_formats(formats) 122 123 timestamp = (int_or_none(content.get('release_date')) 124 or int_or_none(content.get('release_date_ut')) 125 or int_or_none(content.get('start_time'))) 126 season = content.get('season') or {} 127 128 return { 129 'id': video_id, 130 'title': title, 131 'description': content.get('description'), 132 'thumbnail': content.get('thumbnail'), 133 'timestamp': timestamp, 134 'duration': int_or_none(content.get('duration')), 135 'series': content.get('program_title'), 136 'age_limit': int_or_none(content.get('restriction_age')), 137 'view_count': int_or_none(content.get('views_count')), 138 'like_count': int_or_none(content.get('likes')), 139 'dislike_count': int_or_none(content.get('dislikes')), 140 'season_number': int_or_none(season.get('season_number')), 141 'season_id': season.get('id'), 142 'release_year': int_or_none(content.get('release_year')), 143 'formats': formats, 144 }