viki.py (15610B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import base64 5 import hashlib 6 import hmac 7 import itertools 8 import json 9 import re 10 import time 11 12 from .common import InfoExtractor 13 from ..compat import ( 14 compat_parse_qs, 15 compat_urllib_parse_urlparse, 16 ) 17 from ..utils import ( 18 ExtractorError, 19 int_or_none, 20 parse_age_limit, 21 parse_iso8601, 22 sanitized_Request, 23 std_headers, 24 try_get, 25 ) 26 27 28 class VikiBaseIE(InfoExtractor): 29 _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/' 30 _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com' 31 _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s' 32 33 _APP = '100005a' 34 _APP_VERSION = '6.0.0' 35 _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad' 36 37 _GEO_BYPASS = False 38 _NETRC_MACHINE = 'viki' 39 40 _token = None 41 42 _ERRORS = { 43 'geo': 'Sorry, this content is not available in your region.', 44 'upcoming': 'Sorry, this content is not yet available.', 45 'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers', 46 } 47 48 def _prepare_call(self, path, timestamp=None, post_data=None): 49 path += '?' if '?' not in path else '&' 50 if not timestamp: 51 timestamp = int(time.time()) 52 query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp) 53 if self._token: 54 query += '&token=%s' % self._token 55 sig = hmac.new( 56 self._APP_SECRET.encode('ascii'), 57 query.encode('ascii'), 58 hashlib.sha1 59 ).hexdigest() 60 url = self._API_URL_TEMPLATE % (query, sig) 61 return sanitized_Request( 62 url, json.dumps(post_data).encode('utf-8')) if post_data else url 63 64 def _call_api(self, path, video_id, note, timestamp=None, post_data=None): 65 resp = self._download_json( 66 self._prepare_call(path, timestamp, post_data), video_id, note, 67 headers={'x-viki-app-ver': self._APP_VERSION}) 68 69 error = resp.get('error') 70 if error: 71 if error == 'invalid timestamp': 72 resp = self._download_json( 73 self._prepare_call(path, int(resp['current_timestamp']), post_data), 74 video_id, '%s (retry)' % note) 75 error = resp.get('error') 76 if error: 77 self._raise_error(resp['error']) 78 79 return resp 80 81 def _raise_error(self, error): 82 raise ExtractorError( 83 '%s returned error: %s' % (self.IE_NAME, error), 84 expected=True) 85 86 def _check_errors(self, data): 87 for reason, status in (data.get('blocking') or {}).items(): 88 if status and reason in self._ERRORS: 89 message = self._ERRORS[reason] 90 if reason == 'geo': 91 self.raise_geo_restricted(msg=message) 92 elif reason == 'paywall': 93 self.raise_login_required(message) 94 raise ExtractorError('%s said: %s' % ( 95 self.IE_NAME, message), expected=True) 96 97 def _real_initialize(self): 98 self._login() 99 100 def _login(self): 101 username, password = self._get_login_info() 102 if username is None: 103 return 104 105 login_form = { 106 'login_id': username, 107 'password': password, 108 } 109 110 login = self._call_api( 111 'sessions.json', None, 112 'Logging in', post_data=login_form) 113 114 self._token = login.get('token') 115 if not self._token: 116 self.report_warning('Unable to get session token, login has probably failed') 117 118 @staticmethod 119 def dict_selection(dict_obj, preferred_key, allow_fallback=True): 120 if preferred_key in dict_obj: 121 return dict_obj.get(preferred_key) 122 123 if not allow_fallback: 124 return 125 126 filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()])) 127 return filtered_dict[0] if filtered_dict else None 128 129 130 class VikiIE(VikiBaseIE): 131 IE_NAME = 'viki' 132 _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE 133 _TESTS = [{ 134 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', 135 'info_dict': { 136 'id': '1023585v', 137 'ext': 'mp4', 138 'title': 'Heirs - Episode 14', 139 'uploader': 'SBS Contents Hub', 140 'timestamp': 1385047627, 141 'upload_date': '20131121', 142 'age_limit': 13, 143 'duration': 3570, 144 'episode_number': 14, 145 }, 146 'params': { 147 'format': 'bestvideo', 148 }, 149 'skip': 'Blocked in the US', 150 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'], 151 }, { 152 # clip 153 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', 154 'md5': '86c0b5dbd4d83a6611a79987cc7a1989', 155 'info_dict': { 156 'id': '1067139v', 157 'ext': 'mp4', 158 'title': "'The Avengers: Age of Ultron' Press Conference", 159 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea', 160 'duration': 352, 161 'timestamp': 1430380829, 162 'upload_date': '20150430', 163 'uploader': 'Arirang TV', 164 'like_count': int, 165 'age_limit': 0, 166 }, 167 'skip': 'Sorry. There was an error loading this video', 168 }, { 169 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi', 170 'info_dict': { 171 'id': '1048879v', 172 'ext': 'mp4', 173 'title': 'Ankhon Dekhi', 174 'duration': 6512, 175 'timestamp': 1408532356, 176 'upload_date': '20140820', 177 'uploader': 'Spuul', 178 'like_count': int, 179 'age_limit': 13, 180 }, 181 'skip': 'Blocked in the US', 182 }, { 183 # episode 184 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', 185 'md5': '0a53dc252e6e690feccd756861495a8c', 186 'info_dict': { 187 'id': '44699v', 188 'ext': 'mp4', 189 'title': 'Boys Over Flowers - Episode 1', 190 'description': 'md5:b89cf50038b480b88b5b3c93589a9076', 191 'duration': 4172, 192 'timestamp': 1270496524, 193 'upload_date': '20100405', 194 'uploader': 'group8', 195 'like_count': int, 196 'age_limit': 13, 197 'episode_number': 1, 198 }, 199 'params': { 200 'format': 'bestvideo', 201 }, 202 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'], 203 }, { 204 # youtube external 205 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1', 206 'md5': '63f8600c1da6f01b7640eee7eca4f1da', 207 'info_dict': { 208 'id': '50562v', 209 'ext': 'webm', 210 'title': 'Poor Nastya [COMPLETE] - Episode 1', 211 'description': '', 212 'duration': 606, 213 'timestamp': 1274949505, 214 'upload_date': '20101213', 215 'uploader': 'ad14065n', 216 'uploader_id': 'ad14065n', 217 'like_count': int, 218 'age_limit': 13, 219 }, 220 'skip': 'Page not found!', 221 }, { 222 'url': 'http://www.viki.com/player/44699v', 223 'only_matching': True, 224 }, { 225 # non-English description 226 'url': 'http://www.viki.com/videos/158036v-love-in-magic', 227 'md5': '41faaba0de90483fb4848952af7c7d0d', 228 'info_dict': { 229 'id': '158036v', 230 'ext': 'mp4', 231 'uploader': 'I Planet Entertainment', 232 'upload_date': '20111122', 233 'timestamp': 1321985454, 234 'description': 'md5:44b1e46619df3a072294645c770cef36', 235 'title': 'Love In Magic', 236 'age_limit': 13, 237 }, 238 'params': { 239 'format': 'bestvideo', 240 }, 241 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'], 242 }] 243 244 def _real_extract(self, url): 245 video_id = self._match_id(url) 246 247 resp = self._download_json( 248 'https://www.viki.com/api/videos/' + video_id, 249 video_id, 'Downloading video JSON', headers={ 250 'x-client-user-agent': std_headers['User-Agent'], 251 'x-viki-app-ver': '3.0.0', 252 }) 253 video = resp['video'] 254 255 self._check_errors(video) 256 257 title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False) 258 episode_number = int_or_none(video.get('number')) 259 if not title: 260 title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id 261 container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {} 262 container_title = self.dict_selection(container_titles, 'en') 263 title = '%s - %s' % (container_title, title) 264 265 description = self.dict_selection(video.get('descriptions', {}), 'en') 266 267 like_count = int_or_none(try_get(video, lambda x: x['likes']['count'])) 268 269 thumbnails = [] 270 for thumbnail_id, thumbnail in (video.get('images') or {}).items(): 271 thumbnails.append({ 272 'id': thumbnail_id, 273 'url': thumbnail.get('url'), 274 }) 275 276 subtitles = {} 277 for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items(): 278 subtitles[subtitle_lang] = [{ 279 'ext': subtitles_format, 280 'url': self._prepare_call( 281 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)), 282 } for subtitles_format in ('srt', 'vtt')] 283 284 result = { 285 'id': video_id, 286 'title': title, 287 'description': description, 288 'duration': int_or_none(video.get('duration')), 289 'timestamp': parse_iso8601(video.get('created_at')), 290 'uploader': video.get('author'), 291 'uploader_url': video.get('author_url'), 292 'like_count': like_count, 293 'age_limit': parse_age_limit(video.get('rating')), 294 'thumbnails': thumbnails, 295 'subtitles': subtitles, 296 'episode_number': episode_number, 297 } 298 299 formats = [] 300 301 def add_format(format_id, format_dict, protocol='http'): 302 # rtmps URLs does not seem to work 303 if protocol == 'rtmps': 304 return 305 format_url = format_dict.get('url') 306 if not format_url: 307 return 308 qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query) 309 stream = qs.get('stream', [None])[0] 310 if stream: 311 format_url = base64.b64decode(stream).decode() 312 if format_id in ('m3u8', 'hls'): 313 m3u8_formats = self._extract_m3u8_formats( 314 format_url, video_id, 'mp4', 315 entry_protocol='m3u8_native', 316 m3u8_id='m3u8-%s' % protocol, fatal=False) 317 # Despite CODECS metadata in m3u8 all video-only formats 318 # are actually video+audio 319 for f in m3u8_formats: 320 if '_drm/index_' in f['url']: 321 continue 322 if f.get('acodec') == 'none' and f.get('vcodec') != 'none': 323 f['acodec'] = None 324 formats.append(f) 325 elif format_id in ('mpd', 'dash'): 326 formats.extend(self._extract_mpd_formats( 327 format_url, video_id, 'mpd-%s' % protocol, fatal=False)) 328 elif format_url.startswith('rtmp'): 329 mobj = re.search( 330 r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$', 331 format_url) 332 if not mobj: 333 return 334 formats.append({ 335 'format_id': 'rtmp-%s' % format_id, 336 'ext': 'flv', 337 'url': mobj.group('url'), 338 'play_path': mobj.group('playpath'), 339 'app': mobj.group('app'), 340 'page_url': url, 341 }) 342 else: 343 formats.append({ 344 'url': format_url, 345 'format_id': '%s-%s' % (format_id, protocol), 346 'height': int_or_none(self._search_regex( 347 r'^(\d+)[pP]$', format_id, 'height', default=None)), 348 }) 349 350 for format_id, format_dict in (resp.get('streams') or {}).items(): 351 add_format(format_id, format_dict) 352 if not formats: 353 streams = self._call_api( 354 'videos/%s/streams.json' % video_id, video_id, 355 'Downloading video streams JSON') 356 357 if 'external' in streams: 358 result.update({ 359 '_type': 'url_transparent', 360 'url': streams['external']['url'], 361 }) 362 return result 363 364 for format_id, stream_dict in streams.items(): 365 for protocol, format_dict in stream_dict.items(): 366 add_format(format_id, format_dict, protocol) 367 self._sort_formats(formats) 368 369 result['formats'] = formats 370 return result 371 372 373 class VikiChannelIE(VikiBaseIE): 374 IE_NAME = 'viki:channel' 375 _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE 376 _TESTS = [{ 377 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', 378 'info_dict': { 379 'id': '50c', 380 'title': 'Boys Over Flowers', 381 'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59', 382 }, 383 'playlist_mincount': 71, 384 }, { 385 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete', 386 'info_dict': { 387 'id': '1354c', 388 'title': 'Poor Nastya [COMPLETE]', 389 'description': 'md5:05bf5471385aa8b21c18ad450e350525', 390 }, 391 'playlist_count': 127, 392 'skip': 'Page not found', 393 }, { 394 'url': 'http://www.viki.com/news/24569c-showbiz-korea', 395 'only_matching': True, 396 }, { 397 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005', 398 'only_matching': True, 399 }, { 400 'url': 'http://www.viki.com/artists/2141c-shinee', 401 'only_matching': True, 402 }] 403 404 _PER_PAGE = 25 405 406 def _real_extract(self, url): 407 channel_id = self._match_id(url) 408 409 channel = self._call_api( 410 'containers/%s.json' % channel_id, channel_id, 411 'Downloading channel JSON') 412 413 self._check_errors(channel) 414 415 title = self.dict_selection(channel['titles'], 'en') 416 417 description = self.dict_selection(channel['descriptions'], 'en') 418 419 entries = [] 420 for video_type in ('episodes', 'clips', 'movies'): 421 for page_num in itertools.count(1): 422 page = self._call_api( 423 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d' 424 % (channel_id, video_type, self._PER_PAGE, page_num), channel_id, 425 'Downloading %s JSON page #%d' % (video_type, page_num)) 426 for video in page['response']: 427 video_id = video['id'] 428 entries.append(self.url_result( 429 'https://www.viki.com/videos/%s' % video_id, 'Viki')) 430 if not page['pagination']['next']: 431 break 432 433 return self.playlist_result(entries, channel_id, title, description)