vimeo.py (47270B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import base64 5 import functools 6 import re 7 import itertools 8 9 from .common import InfoExtractor 10 from ..compat import ( 11 compat_kwargs, 12 compat_HTTPError, 13 compat_str, 14 compat_urlparse, 15 ) 16 from ..utils import ( 17 clean_html, 18 determine_ext, 19 ExtractorError, 20 get_element_by_class, 21 js_to_json, 22 int_or_none, 23 merge_dicts, 24 OnDemandPagedList, 25 parse_filesize, 26 parse_iso8601, 27 sanitized_Request, 28 smuggle_url, 29 std_headers, 30 str_or_none, 31 try_get, 32 unified_timestamp, 33 unsmuggle_url, 34 urlencode_postdata, 35 urljoin, 36 unescapeHTML, 37 ) 38 39 40 class VimeoBaseInfoExtractor(InfoExtractor): 41 _NETRC_MACHINE = 'vimeo' 42 _LOGIN_REQUIRED = False 43 _LOGIN_URL = 'https://vimeo.com/log_in' 44 45 def _login(self): 46 username, password = self._get_login_info() 47 if username is None: 48 if self._LOGIN_REQUIRED: 49 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) 50 return 51 webpage = self._download_webpage( 52 self._LOGIN_URL, None, 'Downloading login page') 53 token, vuid = self._extract_xsrft_and_vuid(webpage) 54 data = { 55 'action': 'login', 56 'email': username, 57 'password': password, 58 'service': 'vimeo', 59 'token': token, 60 } 61 self._set_vimeo_cookie('vuid', vuid) 62 try: 63 self._download_webpage( 64 self._LOGIN_URL, None, 'Logging in', 65 data=urlencode_postdata(data), headers={ 66 'Content-Type': 'application/x-www-form-urlencoded', 67 'Referer': self._LOGIN_URL, 68 }) 69 except ExtractorError as e: 70 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418: 71 raise ExtractorError( 72 'Unable to log in: bad username or password', 73 expected=True) 74 raise ExtractorError('Unable to log in') 75 76 def _get_video_password(self): 77 password = self._downloader.params.get('videopassword') 78 if password is None: 79 raise ExtractorError( 80 'This video is protected by a password, use the --video-password option', 81 expected=True) 82 return password 83 84 def _verify_video_password(self, url, video_id, password, token, vuid): 85 if url.startswith('http://'): 86 # vimeo only supports https now, but the user can give an http url 87 url = url.replace('http://', 'https://') 88 self._set_vimeo_cookie('vuid', vuid) 89 return self._download_webpage( 90 url + '/password', video_id, 'Verifying the password', 91 'Wrong password', data=urlencode_postdata({ 92 'password': password, 93 'token': token, 94 }), headers={ 95 'Content-Type': 'application/x-www-form-urlencoded', 96 'Referer': url, 97 }) 98 99 def _extract_xsrft_and_vuid(self, webpage): 100 xsrft = self._search_regex( 101 r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', 102 webpage, 'login token', group='xsrft') 103 vuid = self._search_regex( 104 r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1', 105 webpage, 'vuid', group='vuid') 106 return xsrft, vuid 107 108 def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): 109 vimeo_config = self._search_regex( 110 r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', 111 webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) 112 if vimeo_config: 113 return self._parse_json(vimeo_config, video_id) 114 115 def _set_vimeo_cookie(self, name, value): 116 self._set_cookie('vimeo.com', name, value) 117 118 def _vimeo_sort_formats(self, formats): 119 # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps 120 # at the same time without actual units specified. This lead to wrong sorting. 121 self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) 122 123 def _parse_config(self, config, video_id): 124 video_data = config['video'] 125 video_title = video_data['title'] 126 live_event = video_data.get('live_event') or {} 127 is_live = live_event.get('status') == 'started' 128 request = config.get('request') or {} 129 130 formats = [] 131 config_files = video_data.get('files') or request.get('files') or {} 132 for f in (config_files.get('progressive') or []): 133 video_url = f.get('url') 134 if not video_url: 135 continue 136 formats.append({ 137 'url': video_url, 138 'format_id': 'http-%s' % f.get('quality'), 139 'width': int_or_none(f.get('width')), 140 'height': int_or_none(f.get('height')), 141 'fps': int_or_none(f.get('fps')), 142 'tbr': int_or_none(f.get('bitrate')), 143 }) 144 145 # TODO: fix handling of 308 status code returned for live archive manifest requests 146 sep_pattern = r'/sep/video/' 147 for files_type in ('hls', 'dash'): 148 for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items(): 149 manifest_url = cdn_data.get('url') 150 if not manifest_url: 151 continue 152 format_id = '%s-%s' % (files_type, cdn_name) 153 sep_manifest_urls = [] 154 if re.search(sep_pattern, manifest_url): 155 for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): 156 sep_manifest_urls.append((format_id + suffix, re.sub( 157 sep_pattern, '/%s/' % repl, manifest_url))) 158 else: 159 sep_manifest_urls = [(format_id, manifest_url)] 160 for f_id, m_url in sep_manifest_urls: 161 if files_type == 'hls': 162 formats.extend(self._extract_m3u8_formats( 163 m_url, video_id, 'mp4', 164 'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id, 165 note='Downloading %s m3u8 information' % cdn_name, 166 fatal=False)) 167 elif files_type == 'dash': 168 if 'json=1' in m_url: 169 real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url') 170 if real_m_url: 171 m_url = real_m_url 172 mpd_formats = self._extract_mpd_formats( 173 m_url.replace('/master.json', '/master.mpd'), video_id, f_id, 174 'Downloading %s MPD information' % cdn_name, 175 fatal=False) 176 formats.extend(mpd_formats) 177 178 live_archive = live_event.get('archive') or {} 179 live_archive_source_url = live_archive.get('source_url') 180 if live_archive_source_url and live_archive.get('status') == 'done': 181 formats.append({ 182 'format_id': 'live-archive-source', 183 'url': live_archive_source_url, 184 'preference': 1, 185 }) 186 187 for f in formats: 188 if f.get('vcodec') == 'none': 189 f['preference'] = -50 190 elif f.get('acodec') == 'none': 191 f['preference'] = -40 192 193 subtitles = {} 194 for tt in (request.get('text_tracks') or []): 195 subtitles[tt['lang']] = [{ 196 'ext': 'vtt', 197 'url': urljoin('https://vimeo.com', tt['url']), 198 }] 199 200 thumbnails = [] 201 if not is_live: 202 for key, thumb in (video_data.get('thumbs') or {}).items(): 203 thumbnails.append({ 204 'id': key, 205 'width': int_or_none(key), 206 'url': thumb, 207 }) 208 thumbnail = video_data.get('thumbnail') 209 if thumbnail: 210 thumbnails.append({ 211 'url': thumbnail, 212 }) 213 214 owner = video_data.get('owner') or {} 215 video_uploader_url = owner.get('url') 216 217 return { 218 'id': str_or_none(video_data.get('id')) or video_id, 219 'title': self._live_title(video_title) if is_live else video_title, 220 'uploader': owner.get('name'), 221 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, 222 'uploader_url': video_uploader_url, 223 'thumbnails': thumbnails, 224 'duration': int_or_none(video_data.get('duration')), 225 'formats': formats, 226 'subtitles': subtitles, 227 'is_live': is_live, 228 } 229 230 def _extract_original_format(self, url, video_id, unlisted_hash=None): 231 query = {'action': 'load_download_config'} 232 if unlisted_hash: 233 query['unlisted_hash'] = unlisted_hash 234 download_data = self._download_json( 235 url, video_id, fatal=False, query=query, 236 headers={'X-Requested-With': 'XMLHttpRequest'}) 237 if download_data: 238 source_file = download_data.get('source_file') 239 if isinstance(source_file, dict): 240 download_url = source_file.get('download_url') 241 if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): 242 source_name = source_file.get('public_name', 'Original') 243 if self._is_valid_url(download_url, video_id, '%s video' % source_name): 244 ext = (try_get( 245 source_file, lambda x: x['extension'], 246 compat_str) or determine_ext( 247 download_url, None) or 'mp4').lower() 248 return { 249 'url': download_url, 250 'ext': ext, 251 'width': int_or_none(source_file.get('width')), 252 'height': int_or_none(source_file.get('height')), 253 'filesize': parse_filesize(source_file.get('size')), 254 'format_id': source_name, 255 'preference': 1, 256 } 257 258 259 class VimeoIE(VimeoBaseInfoExtractor): 260 """Information extractor for vimeo.com.""" 261 262 # _VALID_URL matches Vimeo URLs 263 _VALID_URL = r'''(?x) 264 https?:// 265 (?: 266 (?: 267 www| 268 player 269 ) 270 \. 271 )? 272 vimeo(?:pro)?\.com/ 273 (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) 274 (?:.*?/)? 275 (?: 276 (?: 277 play_redirect_hls| 278 moogaloop\.swf)\?clip_id= 279 )? 280 (?:videos?/)? 281 (?P<id>[0-9]+) 282 (?:/(?P<unlisted_hash>[\da-f]{10}))? 283 /?(?:[?&].*)?(?:[#].*)?$ 284 ''' 285 IE_NAME = 'vimeo' 286 _TESTS = [ 287 { 288 'url': 'http://vimeo.com/56015672#at=0', 289 'md5': '8879b6cc097e987f02484baf890129e5', 290 'info_dict': { 291 'id': '56015672', 292 'ext': 'mp4', 293 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 294 'description': 'md5:2d3305bad981a06ff79f027f19865021', 295 'timestamp': 1355990239, 296 'upload_date': '20121220', 297 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434', 298 'uploader_id': 'user7108434', 299 'uploader': 'Filippo Valsorda', 300 'duration': 10, 301 'license': 'by-sa', 302 }, 303 'params': { 304 'format': 'best[protocol=https]', 305 }, 306 }, 307 { 308 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', 309 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', 310 'note': 'Vimeo Pro video (#1197)', 311 'info_dict': { 312 'id': '68093876', 313 'ext': 'mp4', 314 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus', 315 'uploader_id': 'openstreetmapus', 316 'uploader': 'OpenStreetMap US', 317 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', 318 'description': 'md5:2c362968038d4499f4d79f88458590c1', 319 'duration': 1595, 320 'upload_date': '20130610', 321 'timestamp': 1370893156, 322 'license': 'by', 323 }, 324 'params': { 325 'format': 'best[protocol=https]', 326 }, 327 }, 328 { 329 'url': 'http://player.vimeo.com/video/54469442', 330 'md5': '619b811a4417aa4abe78dc653becf511', 331 'note': 'Videos that embed the url in the player page', 332 'info_dict': { 333 'id': '54469442', 334 'ext': 'mp4', 335 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012', 336 'uploader': 'Business of Software', 337 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware', 338 'uploader_id': 'businessofsoftware', 339 'duration': 3610, 340 'description': None, 341 }, 342 'params': { 343 'format': 'best[protocol=https]', 344 }, 345 'expected_warnings': ['Unable to download JSON metadata'], 346 }, 347 { 348 'url': 'http://vimeo.com/68375962', 349 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 350 'note': 'Video protected with password', 351 'info_dict': { 352 'id': '68375962', 353 'ext': 'mp4', 354 'title': 'youtube-dl password protected test video', 355 'timestamp': 1371200155, 356 'upload_date': '20130614', 357 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 358 'uploader_id': 'user18948128', 359 'uploader': 'Jaime Marquínez Ferrándiz', 360 'duration': 10, 361 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', 362 }, 363 'params': { 364 'format': 'best[protocol=https]', 365 'videopassword': 'youtube-dl', 366 }, 367 }, 368 { 369 'url': 'http://vimeo.com/channels/keypeele/75629013', 370 'md5': '2f86a05afe9d7abc0b9126d229bbe15d', 371 'info_dict': { 372 'id': '75629013', 373 'ext': 'mp4', 374 'title': 'Key & Peele: Terrorist Interrogation', 375 'description': 'md5:8678b246399b070816b12313e8b4eb5c', 376 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 377 'uploader_id': 'atencio', 378 'uploader': 'Peter Atencio', 379 'channel_id': 'keypeele', 380 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele', 381 'timestamp': 1380339469, 382 'upload_date': '20130928', 383 'duration': 187, 384 }, 385 'expected_warnings': ['Unable to download JSON metadata'], 386 }, 387 { 388 'url': 'http://vimeo.com/76979871', 389 'note': 'Video with subtitles', 390 'info_dict': { 391 'id': '76979871', 392 'ext': 'mp4', 393 'title': 'The New Vimeo Player (You Know, For Videos)', 394 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', 395 'timestamp': 1381846109, 396 'upload_date': '20131015', 397 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff', 398 'uploader_id': 'staff', 399 'uploader': 'Vimeo Staff', 400 'duration': 62, 401 'subtitles': { 402 'de': [{'ext': 'vtt'}], 403 'en': [{'ext': 'vtt'}], 404 'es': [{'ext': 'vtt'}], 405 'fr': [{'ext': 'vtt'}], 406 }, 407 } 408 }, 409 { 410 # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ 411 'url': 'https://player.vimeo.com/video/98044508', 412 'note': 'The js code contains assignments to the same variable as the config', 413 'info_dict': { 414 'id': '98044508', 415 'ext': 'mp4', 416 'title': 'Pier Solar OUYA Official Trailer', 417 'uploader': 'Tulio Gonçalves', 418 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593', 419 'uploader_id': 'user28849593', 420 }, 421 }, 422 { 423 # contains original format 424 'url': 'https://vimeo.com/33951933', 425 'md5': '53c688fa95a55bf4b7293d37a89c5c53', 426 'info_dict': { 427 'id': '33951933', 428 'ext': 'mp4', 429 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute', 430 'uploader': 'The DMCI', 431 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci', 432 'uploader_id': 'dmci', 433 'timestamp': 1324343742, 434 'upload_date': '20111220', 435 'description': 'md5:ae23671e82d05415868f7ad1aec21147', 436 }, 437 }, 438 { 439 # only available via https://vimeo.com/channels/tributes/6213729 and 440 # not via https://vimeo.com/6213729 441 'url': 'https://vimeo.com/channels/tributes/6213729', 442 'info_dict': { 443 'id': '6213729', 444 'ext': 'mp4', 445 'title': 'Vimeo Tribute: The Shining', 446 'uploader': 'Casey Donahue', 447 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 448 'uploader_id': 'caseydonahue', 449 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes', 450 'channel_id': 'tributes', 451 'timestamp': 1250886430, 452 'upload_date': '20090821', 453 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', 454 }, 455 'params': { 456 'skip_download': True, 457 }, 458 'expected_warnings': ['Unable to download JSON metadata'], 459 }, 460 { 461 # redirects to ondemand extractor and should be passed through it 462 # for successful extraction 463 'url': 'https://vimeo.com/73445910', 464 'info_dict': { 465 'id': '73445910', 466 'ext': 'mp4', 467 'title': 'The Reluctant Revolutionary', 468 'uploader': '10Ft Films', 469 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms', 470 'uploader_id': 'tenfootfilms', 471 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384', 472 'upload_date': '20130830', 473 'timestamp': 1377853339, 474 }, 475 'params': { 476 'skip_download': True, 477 }, 478 'expected_warnings': ['Unable to download JSON metadata'], 479 'skip': 'this page is no longer available.', 480 }, 481 { 482 'url': 'http://player.vimeo.com/video/68375962', 483 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 484 'info_dict': { 485 'id': '68375962', 486 'ext': 'mp4', 487 'title': 'youtube-dl password protected test video', 488 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 489 'uploader_id': 'user18948128', 490 'uploader': 'Jaime Marquínez Ferrándiz', 491 'duration': 10, 492 }, 493 'params': { 494 'format': 'best[protocol=https]', 495 'videopassword': 'youtube-dl', 496 }, 497 }, 498 { 499 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', 500 'only_matching': True, 501 }, 502 { 503 'url': 'https://vimeo.com/109815029', 504 'note': 'Video not completely processed, "failed" seed status', 505 'only_matching': True, 506 }, 507 { 508 'url': 'https://vimeo.com/groups/travelhd/videos/22439234', 509 'only_matching': True, 510 }, 511 { 512 'url': 'https://vimeo.com/album/2632481/video/79010983', 513 'only_matching': True, 514 }, 515 { 516 # source file returns 403: Forbidden 517 'url': 'https://vimeo.com/7809605', 518 'only_matching': True, 519 }, 520 { 521 'url': 'https://vimeo.com/160743502/abd0e13fb4', 522 'only_matching': True, 523 }, 524 { 525 # requires passing unlisted_hash(a52724358e) to load_download_config request 526 'url': 'https://vimeo.com/392479337/a52724358e', 527 'only_matching': True, 528 } 529 # https://gettingthingsdone.com/workflowmap/ 530 # vimeo embed with check-password page protected by Referer header 531 ] 532 533 @staticmethod 534 def _smuggle_referrer(url, referrer_url): 535 return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) 536 537 @staticmethod 538 def _extract_urls(url, webpage): 539 urls = [] 540 # Look for embedded (iframe) Vimeo player 541 for mobj in re.finditer( 542 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1', 543 webpage): 544 urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) 545 PLAIN_EMBED_RE = ( 546 # Look for embedded (swf embed) Vimeo player 547 r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1', 548 # Look more for non-standard embedded Vimeo player 549 r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1', 550 ) 551 for embed_re in PLAIN_EMBED_RE: 552 for mobj in re.finditer(embed_re, webpage): 553 urls.append(mobj.group('url')) 554 return urls 555 556 @staticmethod 557 def _extract_url(url, webpage): 558 urls = VimeoIE._extract_urls(url, webpage) 559 return urls[0] if urls else None 560 561 def _verify_player_video_password(self, url, video_id, headers): 562 password = self._get_video_password() 563 data = urlencode_postdata({ 564 'password': base64.b64encode(password.encode()), 565 }) 566 headers = merge_dicts(headers, { 567 'Content-Type': 'application/x-www-form-urlencoded', 568 }) 569 checked = self._download_json( 570 url + '/check-password', video_id, 571 'Verifying the password', data=data, headers=headers) 572 if checked is False: 573 raise ExtractorError('Wrong video password', expected=True) 574 return checked 575 576 def _real_initialize(self): 577 self._login() 578 579 def _extract_from_api(self, video_id, unlisted_hash=None): 580 token = self._download_json( 581 'https://vimeo.com/_rv/jwt', video_id, headers={ 582 'X-Requested-With': 'XMLHttpRequest' 583 })['token'] 584 api_url = 'https://api.vimeo.com/videos/' + video_id 585 if unlisted_hash: 586 api_url += ':' + unlisted_hash 587 video = self._download_json( 588 api_url, video_id, headers={ 589 'Authorization': 'jwt ' + token, 590 }, query={ 591 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', 592 }) 593 info = self._parse_config(self._download_json( 594 video['config_url'], video_id), video_id) 595 self._vimeo_sort_formats(info['formats']) 596 get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) 597 info.update({ 598 'description': video.get('description'), 599 'license': video.get('license'), 600 'release_timestamp': get_timestamp('release'), 601 'timestamp': get_timestamp('created'), 602 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])), 603 }) 604 connections = try_get( 605 video, lambda x: x['metadata']['connections'], dict) or {} 606 for k in ('comment', 'like'): 607 info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total'])) 608 return info 609 610 def _real_extract(self, url): 611 url, data = unsmuggle_url(url, {}) 612 headers = std_headers.copy() 613 if 'http_headers' in data: 614 headers.update(data['http_headers']) 615 if 'Referer' not in headers: 616 headers['Referer'] = url 617 618 mobj = re.match(self._VALID_URL, url).groupdict() 619 video_id, unlisted_hash = mobj['id'], mobj.get('unlisted_hash') 620 if unlisted_hash: 621 return self._extract_from_api(video_id, unlisted_hash) 622 623 orig_url = url 624 is_pro = 'vimeopro.com/' in url 625 if is_pro: 626 # some videos require portfolio_id to be present in player url 627 # https://github.com/ytdl-org/youtube-dl/issues/20070 628 url = self._extract_url(url, self._download_webpage(url, video_id)) 629 if not url: 630 url = 'https://vimeo.com/' + video_id 631 elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): 632 url = 'https://vimeo.com/' + video_id 633 634 try: 635 # Retrieve video webpage to extract further information 636 webpage, urlh = self._download_webpage_handle( 637 url, video_id, headers=headers) 638 redirect_url = urlh.geturl() 639 except ExtractorError as ee: 640 if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: 641 errmsg = ee.cause.read() 642 if b'Because of its privacy settings, this video cannot be played here' in errmsg: 643 raise ExtractorError( 644 'Cannot download embed-only video without embedding ' 645 'URL. Please call youtube-dl with the URL of the page ' 646 'that embeds this video.', 647 expected=True) 648 raise 649 650 if '//player.vimeo.com/video/' in url: 651 config = self._parse_json(self._search_regex( 652 r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) 653 if config.get('view') == 4: 654 config = self._verify_player_video_password( 655 redirect_url, video_id, headers) 656 info = self._parse_config(config, video_id) 657 self._vimeo_sort_formats(info['formats']) 658 return info 659 660 if re.search(r'<form[^>]+?id="pw_form"', webpage): 661 video_password = self._get_video_password() 662 token, vuid = self._extract_xsrft_and_vuid(webpage) 663 webpage = self._verify_video_password( 664 redirect_url, video_id, video_password, token, vuid) 665 666 vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) 667 if vimeo_config: 668 seed_status = vimeo_config.get('seed_status') or {} 669 if seed_status.get('state') == 'failed': 670 raise ExtractorError( 671 '%s said: %s' % (self.IE_NAME, seed_status['title']), 672 expected=True) 673 674 cc_license = None 675 timestamp = None 676 video_description = None 677 info_dict = {} 678 679 channel_id = self._search_regex( 680 r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) 681 if channel_id: 682 config_url = self._html_search_regex( 683 r'\bdata-config-url="([^"]+)"', webpage, 'config URL') 684 video_description = clean_html(get_element_by_class('description', webpage)) 685 info_dict.update({ 686 'channel_id': channel_id, 687 'channel_url': 'https://vimeo.com/channels/' + channel_id, 688 }) 689 else: 690 page_config = self._parse_json(self._search_regex( 691 r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', 692 webpage, 'page config', default='{}'), video_id, fatal=False) 693 if not page_config: 694 return self._extract_from_api(video_id) 695 config_url = page_config['player']['config_url'] 696 cc_license = page_config.get('cc_license') 697 clip = page_config.get('clip') or {} 698 timestamp = clip.get('uploaded_on') 699 video_description = clean_html( 700 clip.get('description') or page_config.get('description_html_escaped')) 701 config = self._download_json(config_url, video_id) 702 video = config.get('video') or {} 703 vod = video.get('vod') or {} 704 705 def is_rented(): 706 if '>You rented this title.<' in webpage: 707 return True 708 if try_get(config, lambda x: x['user']['purchased']): 709 return True 710 for purchase_option in (vod.get('purchase_options') or []): 711 if purchase_option.get('purchased'): 712 return True 713 label = purchase_option.get('label_string') 714 if label and (label.startswith('You rented this') or label.endswith(' remaining')): 715 return True 716 return False 717 718 if is_rented() and vod.get('is_trailer'): 719 feature_id = vod.get('feature_id') 720 if feature_id and not data.get('force_feature_id', False): 721 return self.url_result(smuggle_url( 722 'https://player.vimeo.com/player/%s' % feature_id, 723 {'force_feature_id': True}), 'Vimeo') 724 725 if not video_description: 726 video_description = self._html_search_meta( 727 ['description', 'og:description', 'twitter:description'], 728 webpage, default=None) 729 if not video_description and is_pro: 730 orig_webpage = self._download_webpage( 731 orig_url, video_id, 732 note='Downloading webpage for description', 733 fatal=False) 734 if orig_webpage: 735 video_description = self._html_search_meta( 736 'description', orig_webpage, default=None) 737 if not video_description: 738 self._downloader.report_warning('Cannot find video description') 739 740 if not timestamp: 741 timestamp = self._search_regex( 742 r'<time[^>]+datetime="([^"]+)"', webpage, 743 'timestamp', default=None) 744 745 formats = [] 746 747 source_format = self._extract_original_format( 748 'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash')) 749 if source_format: 750 formats.append(source_format) 751 752 info_dict_config = self._parse_config(config, video_id) 753 formats.extend(info_dict_config['formats']) 754 self._vimeo_sort_formats(formats) 755 756 json_ld = self._search_json_ld(webpage, video_id, default={}) 757 758 if not cc_license: 759 cc_license = self._search_regex( 760 r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1', 761 webpage, 'license', default=None, group='license') 762 763 info_dict.update({ 764 'formats': formats, 765 'timestamp': unified_timestamp(timestamp), 766 'description': video_description, 767 'webpage_url': url, 768 'license': cc_license, 769 }) 770 771 return merge_dicts(info_dict, info_dict_config, json_ld) 772 773 774 class VimeoOndemandIE(VimeoIE): 775 IE_NAME = 'vimeo:ondemand' 776 _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)' 777 _TESTS = [{ 778 # ondemand video not available via https://vimeo.com/id 779 'url': 'https://vimeo.com/ondemand/20704', 780 'md5': 'c424deda8c7f73c1dfb3edd7630e2f35', 781 'info_dict': { 782 'id': '105442900', 783 'ext': 'mp4', 784 'title': 'המעבדה - במאי יותם פלדמן', 785 'uploader': 'גם סרטים', 786 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms', 787 'uploader_id': 'gumfilms', 788 'description': 'md5:4c027c965e439de4baab621e48b60791', 789 'upload_date': '20140906', 790 'timestamp': 1410032453, 791 }, 792 'params': { 793 'format': 'best[protocol=https]', 794 }, 795 'expected_warnings': ['Unable to download JSON metadata'], 796 }, { 797 # requires Referer to be passed along with og:video:url 798 'url': 'https://vimeo.com/ondemand/36938/126682985', 799 'info_dict': { 800 'id': '126584684', 801 'ext': 'mp4', 802 'title': 'Rävlock, rätt läte på rätt plats', 803 'uploader': 'Lindroth & Norin', 804 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin', 805 'uploader_id': 'lindrothnorin', 806 'description': 'md5:c3c46a90529612c8279fb6af803fc0df', 807 'upload_date': '20150502', 808 'timestamp': 1430586422, 809 }, 810 'params': { 811 'skip_download': True, 812 }, 813 'expected_warnings': ['Unable to download JSON metadata'], 814 }, { 815 'url': 'https://vimeo.com/ondemand/nazmaalik', 816 'only_matching': True, 817 }, { 818 'url': 'https://vimeo.com/ondemand/141692381', 819 'only_matching': True, 820 }, { 821 'url': 'https://vimeo.com/ondemand/thelastcolony/150274832', 822 'only_matching': True, 823 }] 824 825 826 class VimeoChannelIE(VimeoBaseInfoExtractor): 827 IE_NAME = 'vimeo:channel' 828 _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])' 829 _MORE_PAGES_INDICATOR = r'<a.+?rel="next"' 830 _TITLE = None 831 _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"' 832 _TESTS = [{ 833 'url': 'https://vimeo.com/channels/tributes', 834 'info_dict': { 835 'id': 'tributes', 836 'title': 'Vimeo Tributes', 837 }, 838 'playlist_mincount': 25, 839 }] 840 _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s' 841 842 def _page_url(self, base_url, pagenum): 843 return '%s/videos/page:%d/' % (base_url, pagenum) 844 845 def _extract_list_title(self, webpage): 846 return self._TITLE or self._html_search_regex( 847 self._TITLE_RE, webpage, 'list title', fatal=False) 848 849 def _title_and_entries(self, list_id, base_url): 850 for pagenum in itertools.count(1): 851 page_url = self._page_url(base_url, pagenum) 852 webpage = self._download_webpage( 853 page_url, list_id, 854 'Downloading page %s' % pagenum) 855 856 if pagenum == 1: 857 yield self._extract_list_title(webpage) 858 859 # Try extracting href first since not all videos are available via 860 # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) 861 clips = re.findall( 862 r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage) 863 if clips: 864 for video_id, video_url, video_title in clips: 865 yield self.url_result( 866 compat_urlparse.urljoin(base_url, video_url), 867 VimeoIE.ie_key(), video_id=video_id, video_title=video_title) 868 # More relaxed fallback 869 else: 870 for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): 871 yield self.url_result( 872 'https://vimeo.com/%s' % video_id, 873 VimeoIE.ie_key(), video_id=video_id) 874 875 if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: 876 break 877 878 def _extract_videos(self, list_id, base_url): 879 title_and_entries = self._title_and_entries(list_id, base_url) 880 list_title = next(title_and_entries) 881 return self.playlist_result(title_and_entries, list_id, list_title) 882 883 def _real_extract(self, url): 884 channel_id = self._match_id(url) 885 return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id) 886 887 888 class VimeoUserIE(VimeoChannelIE): 889 IE_NAME = 'vimeo:user' 890 _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos|[#?]|$)' 891 _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>' 892 _TESTS = [{ 893 'url': 'https://vimeo.com/nkistudio/videos', 894 'info_dict': { 895 'title': 'Nki', 896 'id': 'nkistudio', 897 }, 898 'playlist_mincount': 66, 899 }] 900 _BASE_URL_TEMPL = 'https://vimeo.com/%s' 901 902 903 class VimeoAlbumIE(VimeoBaseInfoExtractor): 904 IE_NAME = 'vimeo:album' 905 _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))' 906 _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' 907 _TESTS = [{ 908 'url': 'https://vimeo.com/album/2632481', 909 'info_dict': { 910 'id': '2632481', 911 'title': 'Staff Favorites: November 2013', 912 }, 913 'playlist_mincount': 13, 914 }, { 915 'note': 'Password-protected album', 916 'url': 'https://vimeo.com/album/3253534', 917 'info_dict': { 918 'title': 'test', 919 'id': '3253534', 920 }, 921 'playlist_count': 1, 922 'params': { 923 'videopassword': 'youtube-dl', 924 } 925 }] 926 _PAGE_SIZE = 100 927 928 def _fetch_page(self, album_id, authorization, hashed_pass, page): 929 api_page = page + 1 930 query = { 931 'fields': 'link,uri', 932 'page': api_page, 933 'per_page': self._PAGE_SIZE, 934 } 935 if hashed_pass: 936 query['_hashed_pass'] = hashed_pass 937 try: 938 videos = self._download_json( 939 'https://api.vimeo.com/albums/%s/videos' % album_id, 940 album_id, 'Downloading page %d' % api_page, query=query, headers={ 941 'Authorization': 'jwt ' + authorization, 942 })['data'] 943 except ExtractorError as e: 944 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: 945 return 946 for video in videos: 947 link = video.get('link') 948 if not link: 949 continue 950 uri = video.get('uri') 951 video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None 952 yield self.url_result(link, VimeoIE.ie_key(), video_id) 953 954 def _real_extract(self, url): 955 album_id = self._match_id(url) 956 viewer = self._download_json( 957 'https://vimeo.com/_rv/viewer', album_id, fatal=False) 958 if not viewer: 959 webpage = self._download_webpage(url, album_id) 960 viewer = self._parse_json(self._search_regex( 961 r'bootstrap_data\s*=\s*({.+?})</script>', 962 webpage, 'bootstrap data'), album_id)['viewer'] 963 jwt = viewer['jwt'] 964 album = self._download_json( 965 'https://api.vimeo.com/albums/' + album_id, 966 album_id, headers={'Authorization': 'jwt ' + jwt}, 967 query={'fields': 'description,name,privacy'}) 968 hashed_pass = None 969 if try_get(album, lambda x: x['privacy']['view']) == 'password': 970 password = self._downloader.params.get('videopassword') 971 if not password: 972 raise ExtractorError( 973 'This album is protected by a password, use the --video-password option', 974 expected=True) 975 self._set_vimeo_cookie('vuid', viewer['vuid']) 976 try: 977 hashed_pass = self._download_json( 978 'https://vimeo.com/showcase/%s/auth' % album_id, 979 album_id, 'Verifying the password', data=urlencode_postdata({ 980 'password': password, 981 'token': viewer['xsrft'], 982 }), headers={ 983 'X-Requested-With': 'XMLHttpRequest', 984 })['hashed_pass'] 985 except ExtractorError as e: 986 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: 987 raise ExtractorError('Wrong password', expected=True) 988 raise 989 entries = OnDemandPagedList(functools.partial( 990 self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE) 991 return self.playlist_result( 992 entries, album_id, album.get('name'), album.get('description')) 993 994 995 class VimeoGroupsIE(VimeoChannelIE): 996 IE_NAME = 'vimeo:group' 997 _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)' 998 _TESTS = [{ 999 'url': 'https://vimeo.com/groups/kattykay', 1000 'info_dict': { 1001 'id': 'kattykay', 1002 'title': 'Katty Kay', 1003 }, 1004 'playlist_mincount': 27, 1005 }] 1006 _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s' 1007 1008 1009 class VimeoReviewIE(VimeoBaseInfoExtractor): 1010 IE_NAME = 'vimeo:review' 1011 IE_DESC = 'Review pages on vimeo' 1012 _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})' 1013 _TESTS = [{ 1014 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 1015 'md5': 'c507a72f780cacc12b2248bb4006d253', 1016 'info_dict': { 1017 'id': '75524534', 1018 'ext': 'mp4', 1019 'title': "DICK HARDWICK 'Comedian'", 1020 'uploader': 'Richard Hardwick', 1021 'uploader_id': 'user21297594', 1022 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks", 1023 }, 1024 'expected_warnings': ['Unable to download JSON metadata'], 1025 }, { 1026 'note': 'video player needs Referer', 1027 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053', 1028 'md5': '6295fdab8f4bf6a002d058b2c6dce276', 1029 'info_dict': { 1030 'id': '91613211', 1031 'ext': 'mp4', 1032 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn', 1033 'uploader': 'DevWeek Events', 1034 'duration': 2773, 1035 'thumbnail': r're:^https?://.*\.jpg$', 1036 'uploader_id': 'user22258446', 1037 }, 1038 'skip': 'video gone', 1039 }, { 1040 'note': 'Password protected', 1041 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde', 1042 'info_dict': { 1043 'id': '138823582', 1044 'ext': 'mp4', 1045 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1', 1046 'uploader': 'TMB', 1047 'uploader_id': 'user37284429', 1048 }, 1049 'params': { 1050 'videopassword': 'holygrail', 1051 }, 1052 'skip': 'video gone', 1053 }] 1054 1055 def _real_initialize(self): 1056 self._login() 1057 1058 def _real_extract(self, url): 1059 page_url, video_id = re.match(self._VALID_URL, url).groups() 1060 data = self._download_json( 1061 page_url.replace('/review/', '/review/data/'), video_id) 1062 if data.get('isLocked') is True: 1063 video_password = self._get_video_password() 1064 viewer = self._download_json( 1065 'https://vimeo.com/_rv/viewer', video_id) 1066 webpage = self._verify_video_password( 1067 'https://vimeo.com/' + video_id, video_id, 1068 video_password, viewer['xsrft'], viewer['vuid']) 1069 clip_page_config = self._parse_json(self._search_regex( 1070 r'window\.vimeo\.clip_page_config\s*=\s*({.+?});', 1071 webpage, 'clip page config'), video_id) 1072 config_url = clip_page_config['player']['config_url'] 1073 clip_data = clip_page_config.get('clip') or {} 1074 else: 1075 clip_data = data['clipData'] 1076 config_url = clip_data['configUrl'] 1077 config = self._download_json(config_url, video_id) 1078 info_dict = self._parse_config(config, video_id) 1079 source_format = self._extract_original_format( 1080 page_url + '/action', video_id) 1081 if source_format: 1082 info_dict['formats'].append(source_format) 1083 self._vimeo_sort_formats(info_dict['formats']) 1084 info_dict['description'] = clean_html(clip_data.get('description')) 1085 return info_dict 1086 1087 1088 class VimeoWatchLaterIE(VimeoChannelIE): 1089 IE_NAME = 'vimeo:watchlater' 1090 IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' 1091 _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater' 1092 _TITLE = 'Watch Later' 1093 _LOGIN_REQUIRED = True 1094 _TESTS = [{ 1095 'url': 'https://vimeo.com/watchlater', 1096 'only_matching': True, 1097 }] 1098 1099 def _real_initialize(self): 1100 self._login() 1101 1102 def _page_url(self, base_url, pagenum): 1103 url = '%s/page:%d/' % (base_url, pagenum) 1104 request = sanitized_Request(url) 1105 # Set the header to get a partial html page with the ids, 1106 # the normal page doesn't contain them. 1107 request.add_header('X-Requested-With', 'XMLHttpRequest') 1108 return request 1109 1110 def _real_extract(self, url): 1111 return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') 1112 1113 1114 class VimeoLikesIE(VimeoChannelIE): 1115 _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)' 1116 IE_NAME = 'vimeo:likes' 1117 IE_DESC = 'Vimeo user likes' 1118 _TESTS = [{ 1119 'url': 'https://vimeo.com/user755559/likes/', 1120 'playlist_mincount': 293, 1121 'info_dict': { 1122 'id': 'user755559', 1123 'title': 'urza’s Likes', 1124 }, 1125 }, { 1126 'url': 'https://vimeo.com/stormlapse/likes', 1127 'only_matching': True, 1128 }] 1129 1130 def _page_url(self, base_url, pagenum): 1131 return '%s/page:%d/' % (base_url, pagenum) 1132 1133 def _real_extract(self, url): 1134 user_id = self._match_id(url) 1135 return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id) 1136 1137 1138 class VHXEmbedIE(VimeoBaseInfoExtractor): 1139 IE_NAME = 'vhx:embed' 1140 _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)' 1141 1142 @staticmethod 1143 def _extract_url(webpage): 1144 mobj = re.search( 1145 r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage) 1146 return unescapeHTML(mobj.group(1)) if mobj else None 1147 1148 def _real_extract(self, url): 1149 video_id = self._match_id(url) 1150 webpage = self._download_webpage(url, video_id) 1151 config_url = self._parse_json(self._search_regex( 1152 r'window\.OTTData\s*=\s*({.+})', webpage, 1153 'ott data'), video_id, js_to_json)['config_url'] 1154 config = self._download_json(config_url, video_id) 1155 info = self._parse_config(config, video_id) 1156 info['id'] = video_id 1157 self._vimeo_sort_formats(info['formats']) 1158 return info