vrv.py (11111B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import base64 5 import json 6 import hashlib 7 import hmac 8 import random 9 import string 10 import time 11 12 from .common import InfoExtractor 13 from ..compat import ( 14 compat_HTTPError, 15 compat_urllib_parse_urlencode, 16 compat_urllib_parse, 17 ) 18 from ..utils import ( 19 ExtractorError, 20 float_or_none, 21 int_or_none, 22 ) 23 24 25 class VRVBaseIE(InfoExtractor): 26 _API_DOMAIN = None 27 _API_PARAMS = {} 28 _CMS_SIGNING = {} 29 _TOKEN = None 30 _TOKEN_SECRET = '' 31 32 def _call_api(self, path, video_id, note, data=None): 33 # https://tools.ietf.org/html/rfc5849#section-3 34 base_url = self._API_DOMAIN + '/core/' + path 35 query = [ 36 ('oauth_consumer_key', self._API_PARAMS['oAuthKey']), 37 ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])), 38 ('oauth_signature_method', 'HMAC-SHA1'), 39 ('oauth_timestamp', int(time.time())), 40 ] 41 if self._TOKEN: 42 query.append(('oauth_token', self._TOKEN)) 43 encoded_query = compat_urllib_parse_urlencode(query) 44 headers = self.geo_verification_headers() 45 if data: 46 data = json.dumps(data).encode() 47 headers['Content-Type'] = 'application/json' 48 base_string = '&'.join([ 49 'POST' if data else 'GET', 50 compat_urllib_parse.quote(base_url, ''), 51 compat_urllib_parse.quote(encoded_query, '')]) 52 oauth_signature = base64.b64encode(hmac.new( 53 (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'), 54 base_string.encode(), hashlib.sha1).digest()).decode() 55 encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') 56 try: 57 return self._download_json( 58 '?'.join([base_url, encoded_query]), video_id, 59 note='Downloading %s JSON metadata' % note, headers=headers, data=data) 60 except ExtractorError as e: 61 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: 62 raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True) 63 raise 64 65 def _call_cms(self, path, video_id, note): 66 if not self._CMS_SIGNING: 67 index = self._call_api('index', video_id, 'CMS Signing') 68 self._CMS_SIGNING = index.get('cms_signing') or {} 69 if not self._CMS_SIGNING: 70 for signing_policy in index.get('signing_policies', []): 71 signing_path = signing_policy.get('path') 72 if signing_path and signing_path.startswith('/cms/'): 73 name, value = signing_policy.get('name'), signing_policy.get('value') 74 if name and value: 75 self._CMS_SIGNING[name] = value 76 return self._download_json( 77 self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, 78 note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) 79 80 def _get_cms_resource(self, resource_key, video_id): 81 return self._call_api( 82 'cms_resource', video_id, 'resource path', data={ 83 'resource_key': resource_key, 84 })['__links__']['cms_resource']['href'] 85 86 def _real_initialize(self): 87 webpage = self._download_webpage( 88 'https://vrv.co/', None, headers=self.geo_verification_headers()) 89 self._API_PARAMS = self._parse_json(self._search_regex( 90 [ 91 r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)', 92 r'window\.__APP_CONFIG__\s*=\s*({.+})' 93 ], webpage, 'app config'), None)['cxApiParams'] 94 self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') 95 96 97 class VRVIE(VRVBaseIE): 98 IE_NAME = 'vrv' 99 _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)' 100 _TESTS = [{ 101 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', 102 'info_dict': { 103 'id': 'GR9PNZ396', 104 'ext': 'mp4', 105 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT', 106 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f', 107 'uploader_id': 'seeso', 108 }, 109 'params': { 110 # m3u8 download 111 'skip_download': True, 112 }, 113 }, { 114 # movie listing 115 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT', 116 'info_dict': { 117 'id': 'G6NQXZ1J6', 118 'title': 'Lily C.A.T', 119 'description': 'md5:988b031e7809a6aeb60968be4af7db07', 120 }, 121 'playlist_count': 2, 122 }] 123 _NETRC_MACHINE = 'vrv' 124 125 def _real_initialize(self): 126 super(VRVIE, self)._real_initialize() 127 128 email, password = self._get_login_info() 129 if email is None: 130 return 131 132 token_credentials = self._call_api( 133 'authenticate/by:credentials', None, 'Token Credentials', data={ 134 'email': email, 135 'password': password, 136 }) 137 self._TOKEN = token_credentials['oauth_token'] 138 self._TOKEN_SECRET = token_credentials['oauth_token_secret'] 139 140 def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): 141 if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'): 142 return [] 143 stream_id_list = [] 144 if audio_lang: 145 stream_id_list.append('audio-%s' % audio_lang) 146 if hardsub_lang: 147 stream_id_list.append('hardsub-%s' % hardsub_lang) 148 format_id = stream_format 149 if stream_id_list: 150 format_id += '-' + '-'.join(stream_id_list) 151 if 'hls' in stream_format: 152 adaptive_formats = self._extract_m3u8_formats( 153 url, video_id, 'mp4', m3u8_id=format_id, 154 note='Downloading %s information' % format_id, 155 fatal=False) 156 elif stream_format == 'dash': 157 adaptive_formats = self._extract_mpd_formats( 158 url, video_id, mpd_id=format_id, 159 note='Downloading %s information' % format_id, 160 fatal=False) 161 if audio_lang: 162 for f in adaptive_formats: 163 if f.get('acodec') != 'none': 164 f['language'] = audio_lang 165 return adaptive_formats 166 167 def _real_extract(self, url): 168 video_id = self._match_id(url) 169 170 object_data = self._call_cms(self._get_cms_resource( 171 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0] 172 resource_path = object_data['__links__']['resource']['href'] 173 video_data = self._call_cms(resource_path, video_id, 'video') 174 title = video_data['title'] 175 description = video_data.get('description') 176 177 if video_data.get('__class__') == 'movie_listing': 178 items = self._call_cms( 179 video_data['__links__']['movie_listing/movies']['href'], 180 video_id, 'movie listing').get('items') or [] 181 if len(items) != 1: 182 entries = [] 183 for item in items: 184 item_id = item.get('id') 185 if not item_id: 186 continue 187 entries.append(self.url_result( 188 'https://vrv.co/watch/' + item_id, 189 self.ie_key(), item_id, item.get('title'))) 190 return self.playlist_result(entries, video_id, title, description) 191 video_data = items[0] 192 193 streams_path = video_data['__links__'].get('streams', {}).get('href') 194 if not streams_path: 195 self.raise_login_required() 196 streams_json = self._call_cms(streams_path, video_id, 'streams') 197 198 audio_locale = streams_json.get('audio_locale') 199 formats = [] 200 for stream_type, streams in streams_json.get('streams', {}).items(): 201 if stream_type in ('adaptive_hls', 'adaptive_dash'): 202 for stream in streams.values(): 203 formats.extend(self._extract_vrv_formats( 204 stream.get('url'), video_id, stream_type.split('_')[1], 205 audio_locale, stream.get('hardsub_locale'))) 206 self._sort_formats(formats) 207 208 subtitles = {} 209 for k in ('captions', 'subtitles'): 210 for subtitle in streams_json.get(k, {}).values(): 211 subtitle_url = subtitle.get('url') 212 if not subtitle_url: 213 continue 214 subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({ 215 'url': subtitle_url, 216 'ext': subtitle.get('format', 'ass'), 217 }) 218 219 thumbnails = [] 220 for thumbnail in video_data.get('images', {}).get('thumbnails', []): 221 thumbnail_url = thumbnail.get('source') 222 if not thumbnail_url: 223 continue 224 thumbnails.append({ 225 'url': thumbnail_url, 226 'width': int_or_none(thumbnail.get('width')), 227 'height': int_or_none(thumbnail.get('height')), 228 }) 229 230 return { 231 'id': video_id, 232 'title': title, 233 'formats': formats, 234 'subtitles': subtitles, 235 'thumbnails': thumbnails, 236 'description': description, 237 'duration': float_or_none(video_data.get('duration_ms'), 1000), 238 'uploader_id': video_data.get('channel_id'), 239 'series': video_data.get('series_title'), 240 'season': video_data.get('season_title'), 241 'season_number': int_or_none(video_data.get('season_number')), 242 'season_id': video_data.get('season_id'), 243 'episode': title, 244 'episode_number': int_or_none(video_data.get('episode_number')), 245 'episode_id': video_data.get('production_episode_id'), 246 } 247 248 249 class VRVSeriesIE(VRVBaseIE): 250 IE_NAME = 'vrv:series' 251 _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)' 252 _TEST = { 253 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider', 254 'info_dict': { 255 'id': 'G68VXG3G6', 256 }, 257 'playlist_mincount': 11, 258 } 259 260 def _real_extract(self, url): 261 series_id = self._match_id(url) 262 263 seasons_path = self._get_cms_resource( 264 'cms:/seasons?series_id=' + series_id, series_id) 265 seasons_data = self._call_cms(seasons_path, series_id, 'seasons') 266 267 entries = [] 268 for season in seasons_data.get('items', []): 269 episodes_path = season['__links__']['season/episodes']['href'] 270 episodes = self._call_cms(episodes_path, series_id, 'episodes') 271 for episode in episodes.get('items', []): 272 episode_id = episode['id'] 273 entries.append(self.url_result( 274 'https://vrv.co/watch/' + episode_id, 275 'VRV', episode_id, episode.get('title'))) 276 277 return self.playlist_result(entries, series_id)