youtube.py (142971B)
1 # coding: utf-8 2 3 from __future__ import unicode_literals 4 5 import itertools 6 import json 7 import os.path 8 import random 9 import re 10 import traceback 11 12 from .common import InfoExtractor, SearchInfoExtractor 13 from ..compat import ( 14 compat_chr, 15 compat_HTTPError, 16 compat_parse_qs, 17 compat_str, 18 compat_urllib_parse_unquote_plus, 19 compat_urllib_parse_urlencode, 20 compat_urllib_parse_urlparse, 21 compat_urlparse, 22 ) 23 from ..jsinterp import JSInterpreter 24 from ..utils import ( 25 ExtractorError, 26 clean_html, 27 dict_get, 28 float_or_none, 29 int_or_none, 30 mimetype2ext, 31 parse_codecs, 32 parse_duration, 33 qualities, 34 remove_start, 35 smuggle_url, 36 str_or_none, 37 str_to_int, 38 try_get, 39 unescapeHTML, 40 unified_strdate, 41 unsmuggle_url, 42 update_url_query, 43 url_or_none, 44 urlencode_postdata, 45 urljoin, 46 ) 47 48 49 def parse_qs(url): 50 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) 51 52 53 class YoutubeBaseInfoExtractor(InfoExtractor): 54 """Provide base functions for Youtube extractors""" 55 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' 56 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' 57 58 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup' 59 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' 60 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' 61 62 _NETRC_MACHINE = 'youtube' 63 # If True it will raise an error if no login info is provided 64 _LOGIN_REQUIRED = False 65 66 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)' 67 68 def _login(self): 69 """ 70 Attempt to log in to YouTube. 71 True is returned if successful or skipped. 72 False is returned if login failed. 73 74 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. 75 """ 76 username, password = self._get_login_info() 77 # No authentication to be performed 78 if username is None: 79 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None: 80 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) 81 return True 82 83 login_page = self._download_webpage( 84 self._LOGIN_URL, None, 85 note='Downloading login page', 86 errnote='unable to fetch login page', fatal=False) 87 if login_page is False: 88 return 89 90 login_form = self._hidden_inputs(login_page) 91 92 def req(url, f_req, note, errnote): 93 data = login_form.copy() 94 data.update({ 95 'pstMsg': 1, 96 'checkConnection': 'youtube', 97 'checkedDomains': 'youtube', 98 'hl': 'en', 99 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]', 100 'f.req': json.dumps(f_req), 101 'flowName': 'GlifWebSignIn', 102 'flowEntry': 'ServiceLogin', 103 # TODO: reverse actual botguard identifier generation algo 104 'bgRequest': '["identifier",""]', 105 }) 106 return self._download_json( 107 url, None, note=note, errnote=errnote, 108 transform_source=lambda s: re.sub(r'^[^[]*', '', s), 109 fatal=False, 110 data=urlencode_postdata(data), headers={ 111 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', 112 'Google-Accounts-XSRF': 1, 113 }) 114 115 def warn(message): 116 self._downloader.report_warning(message) 117 118 lookup_req = [ 119 username, 120 None, [], None, 'US', None, None, 2, False, True, 121 [ 122 None, None, 123 [2, 1, None, 1, 124 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', 125 None, [], 4], 126 1, [None, None, []], None, None, None, True 127 ], 128 username, 129 ] 130 131 lookup_results = req( 132 self._LOOKUP_URL, lookup_req, 133 'Looking up account info', 'Unable to look up account info') 134 135 if lookup_results is False: 136 return False 137 138 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str) 139 if not user_hash: 140 warn('Unable to extract user hash') 141 return False 142 143 challenge_req = [ 144 user_hash, 145 None, 1, None, [1, None, None, None, [password, None, True]], 146 [ 147 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4], 148 1, [None, None, []], None, None, None, True 149 ]] 150 151 challenge_results = req( 152 self._CHALLENGE_URL, challenge_req, 153 'Logging in', 'Unable to log in') 154 155 if challenge_results is False: 156 return 157 158 login_res = try_get(challenge_results, lambda x: x[0][5], list) 159 if login_res: 160 login_msg = try_get(login_res, lambda x: x[5], compat_str) 161 warn( 162 'Unable to login: %s' % 'Invalid password' 163 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg) 164 return False 165 166 res = try_get(challenge_results, lambda x: x[0][-1], list) 167 if not res: 168 warn('Unable to extract result entry') 169 return False 170 171 login_challenge = try_get(res, lambda x: x[0][0], list) 172 if login_challenge: 173 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str) 174 if challenge_str == 'TWO_STEP_VERIFICATION': 175 # SEND_SUCCESS - TFA code has been successfully sent to phone 176 # QUOTA_EXCEEDED - reached the limit of TFA codes 177 status = try_get(login_challenge, lambda x: x[5], compat_str) 178 if status == 'QUOTA_EXCEEDED': 179 warn('Exceeded the limit of TFA codes, try later') 180 return False 181 182 tl = try_get(challenge_results, lambda x: x[1][2], compat_str) 183 if not tl: 184 warn('Unable to extract TL') 185 return False 186 187 tfa_code = self._get_tfa_info('2-step verification code') 188 189 if not tfa_code: 190 warn( 191 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>' 192 '(Note that only TOTP (Google Authenticator App) codes work at this time.)') 193 return False 194 195 tfa_code = remove_start(tfa_code, 'G-') 196 197 tfa_req = [ 198 user_hash, None, 2, None, 199 [ 200 9, None, None, None, None, None, None, None, 201 [None, tfa_code, True, 2] 202 ]] 203 204 tfa_results = req( 205 self._TFA_URL.format(tl), tfa_req, 206 'Submitting TFA code', 'Unable to submit TFA code') 207 208 if tfa_results is False: 209 return False 210 211 tfa_res = try_get(tfa_results, lambda x: x[0][5], list) 212 if tfa_res: 213 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str) 214 warn( 215 'Unable to finish TFA: %s' % 'Invalid TFA code' 216 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg) 217 return False 218 219 check_cookie_url = try_get( 220 tfa_results, lambda x: x[0][-1][2], compat_str) 221 else: 222 CHALLENGES = { 223 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.", 224 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.', 225 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.", 226 } 227 challenge = CHALLENGES.get( 228 challenge_str, 229 '%s returned error %s.' % (self.IE_NAME, challenge_str)) 230 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge) 231 return False 232 else: 233 check_cookie_url = try_get(res, lambda x: x[2], compat_str) 234 235 if not check_cookie_url: 236 warn('Unable to extract CheckCookie URL') 237 return False 238 239 check_cookie_results = self._download_webpage( 240 check_cookie_url, None, 'Checking cookie', fatal=False) 241 242 if check_cookie_results is False: 243 return False 244 245 if 'https://myaccount.google.com/' not in check_cookie_results: 246 warn('Unable to log in') 247 return False 248 249 return True 250 251 def _initialize_consent(self): 252 cookies = self._get_cookies('https://www.youtube.com/') 253 if cookies.get('__Secure-3PSID'): 254 return 255 consent_id = None 256 consent = cookies.get('CONSENT') 257 if consent: 258 if 'YES' in consent.value: 259 return 260 consent_id = self._search_regex( 261 r'PENDING\+(\d+)', consent.value, 'consent', default=None) 262 if not consent_id: 263 consent_id = random.randint(100, 999) 264 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id) 265 266 def _real_initialize(self): 267 self._initialize_consent() 268 if self._downloader is None: 269 return 270 if not self._login(): 271 return 272 273 _DEFAULT_API_DATA = { 274 'context': { 275 'client': { 276 'clientName': 'WEB', 277 'clientVersion': '2.20201021.03.00', 278 } 279 }, 280 } 281 282 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' 283 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' 284 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)' 285 286 def _call_api(self, ep, query, video_id, fatal=True): 287 data = self._DEFAULT_API_DATA.copy() 288 data.update(query) 289 290 return self._download_json( 291 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id, 292 note='Downloading API JSON', errnote='Unable to download API page', 293 data=json.dumps(data).encode('utf8'), fatal=fatal, 294 headers={'content-type': 'application/json'}, 295 query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'}) 296 297 def _extract_yt_initial_data(self, video_id, webpage): 298 return self._parse_json( 299 self._search_regex( 300 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), 301 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'), 302 video_id) 303 304 def _extract_ytcfg(self, video_id, webpage): 305 return self._parse_json( 306 self._search_regex( 307 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', 308 default='{}'), video_id, fatal=False) or {} 309 310 def _extract_video(self, renderer): 311 video_id = renderer['videoId'] 312 title = try_get( 313 renderer, 314 (lambda x: x['title']['runs'][0]['text'], 315 lambda x: x['title']['simpleText']), compat_str) 316 description = try_get( 317 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'], 318 compat_str) 319 duration = parse_duration(try_get( 320 renderer, lambda x: x['lengthText']['simpleText'], compat_str)) 321 view_count_text = try_get( 322 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or '' 323 view_count = str_to_int(self._search_regex( 324 r'^([\d,]+)', re.sub(r'\s', '', view_count_text), 325 'view count', default=None)) 326 uploader = try_get( 327 renderer, 328 (lambda x: x['ownerText']['runs'][0]['text'], 329 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str) 330 return { 331 '_type': 'url', 332 'ie_key': YoutubeIE.ie_key(), 333 'id': video_id, 334 'url': video_id, 335 'title': title, 336 'description': description, 337 'duration': duration, 338 'view_count': view_count, 339 'uploader': uploader, 340 } 341 342 343 class YoutubeIE(YoutubeBaseInfoExtractor): 344 IE_DESC = 'YouTube.com' 345 _INVIDIOUS_SITES = ( 346 # invidious-redirect websites 347 r'(?:www\.)?redirect\.invidious\.io', 348 r'(?:(?:www|dev)\.)?invidio\.us', 349 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md 350 r'(?:(?:www|no)\.)?invidiou\.sh', 351 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org', 352 r'(?:www\.)?invidious\.kabi\.tk', 353 r'(?:www\.)?invidious\.13ad\.de', 354 r'(?:www\.)?invidious\.mastodon\.host', 355 r'(?:www\.)?invidious\.zapashcanon\.fr', 356 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks', 357 r'(?:www\.)?invidious\.tinfoil-hat\.net', 358 r'(?:www\.)?invidious\.himiko\.cloud', 359 r'(?:www\.)?invidious\.reallyancient\.tech', 360 r'(?:www\.)?invidious\.tube', 361 r'(?:www\.)?invidiou\.site', 362 r'(?:www\.)?invidious\.site', 363 r'(?:www\.)?invidious\.xyz', 364 r'(?:www\.)?invidious\.nixnet\.xyz', 365 r'(?:www\.)?invidious\.048596\.xyz', 366 r'(?:www\.)?invidious\.drycat\.fr', 367 r'(?:www\.)?inv\.skyn3t\.in', 368 r'(?:www\.)?tube\.poal\.co', 369 r'(?:www\.)?tube\.connect\.cafe', 370 r'(?:www\.)?vid\.wxzm\.sx', 371 r'(?:www\.)?vid\.mint\.lgbt', 372 r'(?:www\.)?vid\.puffyan\.us', 373 r'(?:www\.)?yewtu\.be', 374 r'(?:www\.)?yt\.elukerio\.org', 375 r'(?:www\.)?yt\.lelux\.fi', 376 r'(?:www\.)?invidious\.ggc-project\.de', 377 r'(?:www\.)?yt\.maisputain\.ovh', 378 r'(?:www\.)?ytprivate\.com', 379 r'(?:www\.)?invidious\.13ad\.de', 380 r'(?:www\.)?invidious\.toot\.koeln', 381 r'(?:www\.)?invidious\.fdn\.fr', 382 r'(?:www\.)?watch\.nettohikari\.com', 383 r'(?:www\.)?invidious\.namazso\.eu', 384 r'(?:www\.)?invidious\.silkky\.cloud', 385 r'(?:www\.)?invidious\.exonip\.de', 386 r'(?:www\.)?invidious\.riverside\.rocks', 387 r'(?:www\.)?invidious\.blamefran\.net', 388 r'(?:www\.)?invidious\.moomoo\.de', 389 r'(?:www\.)?ytb\.trom\.tf', 390 r'(?:www\.)?yt\.cyberhost\.uk', 391 r'(?:www\.)?kgg2m7yk5aybusll\.onion', 392 r'(?:www\.)?qklhadlycap4cnod\.onion', 393 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion', 394 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion', 395 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion', 396 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion', 397 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p', 398 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion', 399 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion', 400 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', 401 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion', 402 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', 403 ) 404 _VALID_URL = r"""(?x)^ 405 ( 406 (?:https?://|//) # http(s):// or protocol-independent URL 407 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com| 408 (?:www\.)?deturl\.com/www\.youtube\.com| 409 (?:www\.)?pwnyoutube\.com| 410 (?:www\.)?hooktube\.com| 411 (?:www\.)?yourepeat\.com| 412 tube\.majestyc\.net| 413 %(invidious)s| 414 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains 415 (?:.*?\#/)? # handle anchor (#/) redirect urls 416 (?: # the various things that can precede the ID: 417 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/ 418 |(?: # or the v= param in all its forms 419 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 420 (?:\?|\#!?) # the params delimiter ? or # or #! 421 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY) 422 v= 423 ) 424 )) 425 |(?: 426 youtu\.be| # just youtu.be/xxxx 427 vid\.plus| # or vid.plus/xxxx 428 zwearz\.com/watch| # or zwearz.com/watch/xxxx 429 %(invidious)s 430 )/ 431 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= 432 ) 433 )? # all until now is optional -> you can pass the naked ID 434 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID 435 (?(1).+)? # if we found the ID, everything can follow 436 $""" % { 437 'invidious': '|'.join(_INVIDIOUS_SITES), 438 } 439 _PLAYER_INFO_RE = ( 440 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', 441 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', 442 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', 443 ) 444 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') 445 446 _GEO_BYPASS = False 447 448 IE_NAME = 'youtube' 449 _TESTS = [ 450 { 451 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', 452 'info_dict': { 453 'id': 'BaW_jenozKc', 454 'ext': 'mp4', 455 'title': 'youtube-dl test video "\'/\\ä↭𝕐', 456 'uploader': 'Philipp Hagemeister', 457 'uploader_id': 'phihag', 458 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 459 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', 460 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 461 'upload_date': '20121002', 462 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 463 'categories': ['Science & Technology'], 464 'tags': ['youtube-dl'], 465 'duration': 10, 466 'view_count': int, 467 'like_count': int, 468 'dislike_count': int, 469 'start_time': 1, 470 'end_time': 9, 471 } 472 }, 473 { 474 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', 475 'note': 'Embed-only video (#1746)', 476 'info_dict': { 477 'id': 'yZIXLfi8CZQ', 478 'ext': 'mp4', 479 'upload_date': '20120608', 480 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', 481 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', 482 'uploader': 'SET India', 483 'uploader_id': 'setindia', 484 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia', 485 'age_limit': 18, 486 }, 487 'skip': 'Private video', 488 }, 489 { 490 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ', 491 'note': 'Use the first video ID in the URL', 492 'info_dict': { 493 'id': 'BaW_jenozKc', 494 'ext': 'mp4', 495 'title': 'youtube-dl test video "\'/\\ä↭𝕐', 496 'uploader': 'Philipp Hagemeister', 497 'uploader_id': 'phihag', 498 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 499 'upload_date': '20121002', 500 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 501 'categories': ['Science & Technology'], 502 'tags': ['youtube-dl'], 503 'duration': 10, 504 'view_count': int, 505 'like_count': int, 506 'dislike_count': int, 507 }, 508 'params': { 509 'skip_download': True, 510 }, 511 }, 512 { 513 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I', 514 'note': '256k DASH audio (format 141) via DASH manifest', 515 'info_dict': { 516 'id': 'a9LDPn-MO4I', 517 'ext': 'm4a', 518 'upload_date': '20121002', 519 'uploader_id': '8KVIDEO', 520 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', 521 'description': '', 522 'uploader': '8KVIDEO', 523 'title': 'UHDTV TEST 8K VIDEO.mp4' 524 }, 525 'params': { 526 'youtube_include_dash_manifest': True, 527 'format': '141', 528 }, 529 'skip': 'format 141 not served anymore', 530 }, 531 # DASH manifest with encrypted signature 532 { 533 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA', 534 'info_dict': { 535 'id': 'IB3lcPjvWLA', 536 'ext': 'm4a', 537 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson', 538 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf', 539 'duration': 244, 540 'uploader': 'AfrojackVEVO', 541 'uploader_id': 'AfrojackVEVO', 542 'upload_date': '20131011', 543 'abr': 129.495, 544 }, 545 'params': { 546 'youtube_include_dash_manifest': True, 547 'format': '141/bestaudio[ext=m4a]', 548 }, 549 }, 550 # Controversy video 551 { 552 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8', 553 'info_dict': { 554 'id': 'T4XJQO3qol8', 555 'ext': 'mp4', 556 'duration': 219, 557 'upload_date': '20100909', 558 'uploader': 'Amazing Atheist', 559 'uploader_id': 'TheAmazingAtheist', 560 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', 561 'title': 'Burning Everyone\'s Koran', 562 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', 563 } 564 }, 565 # Normal age-gate video (No vevo, embed allowed), available via embed page 566 { 567 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', 568 'info_dict': { 569 'id': 'HtVdAasjOgU', 570 'ext': 'mp4', 571 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', 572 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', 573 'duration': 142, 574 'uploader': 'The Witcher', 575 'uploader_id': 'WitcherGame', 576 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 577 'upload_date': '20140605', 578 'age_limit': 18, 579 }, 580 }, 581 { 582 # Age-gated video only available with authentication (unavailable 583 # via embed page workaround) 584 'url': 'XgnwCQzjau8', 585 'only_matching': True, 586 }, 587 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421) 588 # YouTube Red ad is not captured for creator 589 { 590 'url': '__2ABJjxzNo', 591 'info_dict': { 592 'id': '__2ABJjxzNo', 593 'ext': 'mp4', 594 'duration': 266, 595 'upload_date': '20100430', 596 'uploader_id': 'deadmau5', 597 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', 598 'creator': 'deadmau5', 599 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336', 600 'uploader': 'deadmau5', 601 'title': 'Deadmau5 - Some Chords (HD)', 602 'alt_title': 'Some Chords', 603 }, 604 'expected_warnings': [ 605 'DASH manifest missing', 606 ] 607 }, 608 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) 609 { 610 'url': 'lqQg6PlCWgI', 611 'info_dict': { 612 'id': 'lqQg6PlCWgI', 613 'ext': 'mp4', 614 'duration': 6085, 615 'upload_date': '20150827', 616 'uploader_id': 'olympic', 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', 618 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 619 'uploader': 'Olympic', 620 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', 621 }, 622 'params': { 623 'skip_download': 'requires avconv', 624 } 625 }, 626 # Non-square pixels 627 { 628 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0', 629 'info_dict': { 630 'id': '_b-2C3KPAM0', 631 'ext': 'mp4', 632 'stretched_ratio': 16 / 9., 633 'duration': 85, 634 'upload_date': '20110310', 635 'uploader_id': 'AllenMeow', 636 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', 637 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 638 'uploader': '孫ᄋᄅ', 639 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', 640 }, 641 }, 642 # url_encoded_fmt_stream_map is empty string 643 { 644 'url': 'qEJwOuvDf7I', 645 'info_dict': { 646 'id': 'qEJwOuvDf7I', 647 'ext': 'webm', 648 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге', 649 'description': '', 650 'upload_date': '20150404', 651 'uploader_id': 'spbelect', 652 'uploader': 'Наблюдатели Петербурга', 653 }, 654 'params': { 655 'skip_download': 'requires avconv', 656 }, 657 'skip': 'This live event has ended.', 658 }, 659 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097) 660 { 661 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y', 662 'info_dict': { 663 'id': 'FIl7x6_3R5Y', 664 'ext': 'webm', 665 'title': 'md5:7b81415841e02ecd4313668cde88737a', 666 'description': 'md5:116377fd2963b81ec4ce64b542173306', 667 'duration': 220, 668 'upload_date': '20150625', 669 'uploader_id': 'dorappi2000', 670 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 671 'uploader': 'dorappi2000', 672 'formats': 'mincount:31', 673 }, 674 'skip': 'not actual anymore', 675 }, 676 # DASH manifest with segment_list 677 { 678 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8', 679 'md5': '8ce563a1d667b599d21064e982ab9e31', 680 'info_dict': { 681 'id': 'CsmdDsKjzN8', 682 'ext': 'mp4', 683 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510 684 'uploader': 'Airtek', 685 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.', 686 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ', 687 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015', 688 }, 689 'params': { 690 'youtube_include_dash_manifest': True, 691 'format': '135', # bestvideo 692 }, 693 'skip': 'This live event has ended.', 694 }, 695 { 696 # Multifeed videos (multiple cameras), URL is for Main Camera 697 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg', 698 'info_dict': { 699 'id': 'jvGDaLqkpTg', 700 'title': 'Tom Clancy Free Weekend Rainbow Whatever', 701 'description': 'md5:e03b909557865076822aa169218d6a5d', 702 }, 703 'playlist': [{ 704 'info_dict': { 705 'id': 'jvGDaLqkpTg', 706 'ext': 'mp4', 707 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)', 708 'description': 'md5:e03b909557865076822aa169218d6a5d', 709 'duration': 10643, 710 'upload_date': '20161111', 711 'uploader': 'Team PGP', 712 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', 713 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', 714 }, 715 }, { 716 'info_dict': { 717 'id': '3AKt1R1aDnw', 718 'ext': 'mp4', 719 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)', 720 'description': 'md5:e03b909557865076822aa169218d6a5d', 721 'duration': 10991, 722 'upload_date': '20161111', 723 'uploader': 'Team PGP', 724 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', 725 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', 726 }, 727 }, { 728 'info_dict': { 729 'id': 'RtAMM00gpVc', 730 'ext': 'mp4', 731 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)', 732 'description': 'md5:e03b909557865076822aa169218d6a5d', 733 'duration': 10995, 734 'upload_date': '20161111', 735 'uploader': 'Team PGP', 736 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', 737 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', 738 }, 739 }, { 740 'info_dict': { 741 'id': '6N2fdlP3C5U', 742 'ext': 'mp4', 743 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)', 744 'description': 'md5:e03b909557865076822aa169218d6a5d', 745 'duration': 10990, 746 'upload_date': '20161111', 747 'uploader': 'Team PGP', 748 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', 749 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', 750 }, 751 }], 752 'params': { 753 'skip_download': True, 754 }, 755 }, 756 { 757 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) 758 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo', 759 'info_dict': { 760 'id': 'gVfLd0zydlo', 761 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', 762 }, 763 'playlist_count': 2, 764 'skip': 'Not multifeed anymore', 765 }, 766 { 767 'url': 'https://vid.plus/FlRa-iH7PGw', 768 'only_matching': True, 769 }, 770 { 771 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', 772 'only_matching': True, 773 }, 774 { 775 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468) 776 # Also tests cut-off URL expansion in video description (see 777 # https://github.com/ytdl-org/youtube-dl/issues/1892, 778 # https://github.com/ytdl-org/youtube-dl/issues/8164) 779 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', 780 'info_dict': { 781 'id': 'lsguqyKfVQg', 782 'ext': 'mp4', 783 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', 784 'alt_title': 'Dark Walk - Position Music', 785 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 786 'duration': 133, 787 'upload_date': '20151119', 788 'uploader_id': 'IronSoulElf', 789 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', 790 'uploader': 'IronSoulElf', 791 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 792 'track': 'Dark Walk - Position Music', 793 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 794 'album': 'Position Music - Production Music Vol. 143 - Dark Walk', 795 }, 796 'params': { 797 'skip_download': True, 798 }, 799 }, 800 { 801 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468) 802 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8', 803 'only_matching': True, 804 }, 805 { 806 # Video with yt:stretch=17:0 807 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM', 808 'info_dict': { 809 'id': 'Q39EVAstoRM', 810 'ext': 'mp4', 811 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4', 812 'description': 'md5:ee18a25c350637c8faff806845bddee9', 813 'upload_date': '20151107', 814 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA', 815 'uploader': 'CH GAMER DROID', 816 }, 817 'params': { 818 'skip_download': True, 819 }, 820 'skip': 'This video does not exist.', 821 }, 822 { 823 # Video with incomplete 'yt:stretch=16:' 824 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI', 825 'only_matching': True, 826 }, 827 { 828 # Video licensed under Creative Commons 829 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA', 830 'info_dict': { 831 'id': 'M4gD1WSo5mA', 832 'ext': 'mp4', 833 'title': 'md5:e41008789470fc2533a3252216f1c1d1', 834 'description': 'md5:a677553cf0840649b731a3024aeff4cc', 835 'duration': 721, 836 'upload_date': '20150127', 837 'uploader_id': 'BerkmanCenter', 838 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', 839 'uploader': 'The Berkman Klein Center for Internet & Society', 840 'license': 'Creative Commons Attribution license (reuse allowed)', 841 }, 842 'params': { 843 'skip_download': True, 844 }, 845 }, 846 { 847 # Channel-like uploader_url 848 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg', 849 'info_dict': { 850 'id': 'eQcmzGIKrzg', 851 'ext': 'mp4', 852 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', 853 'description': 'md5:13a2503d7b5904ef4b223aa101628f39', 854 'duration': 4060, 855 'upload_date': '20151119', 856 'uploader': 'Bernie Sanders', 857 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', 858 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', 859 'license': 'Creative Commons Attribution license (reuse allowed)', 860 }, 861 'params': { 862 'skip_download': True, 863 }, 864 }, 865 { 866 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY', 867 'only_matching': True, 868 }, 869 { 870 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059) 871 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', 872 'only_matching': True, 873 }, 874 { 875 # Rental video preview 876 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg', 877 'info_dict': { 878 'id': 'uGpuVWrhIzE', 879 'ext': 'mp4', 880 'title': 'Piku - Trailer', 881 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb', 882 'upload_date': '20150811', 883 'uploader': 'FlixMatrix', 884 'uploader_id': 'FlixMatrixKaravan', 885 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', 886 'license': 'Standard YouTube License', 887 }, 888 'params': { 889 'skip_download': True, 890 }, 891 'skip': 'This video is not available.', 892 }, 893 { 894 # YouTube Red video with episode data 895 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4', 896 'info_dict': { 897 'id': 'iqKdEhx-dD4', 898 'ext': 'mp4', 899 'title': 'Isolation - Mind Field (Ep 1)', 900 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd', 901 'duration': 2085, 902 'upload_date': '20170118', 903 'uploader': 'Vsauce', 904 'uploader_id': 'Vsauce', 905 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', 906 'series': 'Mind Field', 907 'season_number': 1, 908 'episode_number': 1, 909 }, 910 'params': { 911 'skip_download': True, 912 }, 913 'expected_warnings': [ 914 'Skipping DASH manifest', 915 ], 916 }, 917 { 918 # The following content has been identified by the YouTube community 919 # as inappropriate or offensive to some audiences. 920 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI', 921 'info_dict': { 922 'id': '6SJNVb0GnPI', 923 'ext': 'mp4', 924 'title': 'Race Differences in Intelligence', 925 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1', 926 'duration': 965, 927 'upload_date': '20140124', 928 'uploader': 'New Century Foundation', 929 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg', 930 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg', 931 }, 932 'params': { 933 'skip_download': True, 934 }, 935 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.', 936 }, 937 { 938 # itag 212 939 'url': '1t24XAntNCY', 940 'only_matching': True, 941 }, 942 { 943 # geo restricted to JP 944 'url': 'sJL6WA-aGkQ', 945 'only_matching': True, 946 }, 947 { 948 'url': 'https://invidio.us/watch?v=BaW_jenozKc', 949 'only_matching': True, 950 }, 951 { 952 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc', 953 'only_matching': True, 954 }, 955 { 956 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m 957 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA', 958 'only_matching': True, 959 }, 960 { 961 # DRM protected 962 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc', 963 'only_matching': True, 964 }, 965 { 966 # Video with unsupported adaptive stream type formats 967 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U', 968 'info_dict': { 969 'id': 'Z4Vy8R84T1U', 970 'ext': 'mp4', 971 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta', 972 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 973 'duration': 433, 974 'upload_date': '20130923', 975 'uploader': 'Amelia Putri Harwita', 976 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q', 977 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q', 978 'formats': 'maxcount:10', 979 }, 980 'params': { 981 'skip_download': True, 982 'youtube_include_dash_manifest': False, 983 }, 984 'skip': 'not actual anymore', 985 }, 986 { 987 # Youtube Music Auto-generated description 988 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs', 989 'info_dict': { 990 'id': 'MgNrAu2pzNs', 991 'ext': 'mp4', 992 'title': 'Voyeur Girl', 993 'description': 'md5:7ae382a65843d6df2685993e90a8628f', 994 'upload_date': '20190312', 995 'uploader': 'Stephen - Topic', 996 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA', 997 'artist': 'Stephen', 998 'track': 'Voyeur Girl', 999 'album': 'it\'s too much love to know my dear', 1000 'release_date': '20190313', 1001 'release_year': 2019, 1002 }, 1003 'params': { 1004 'skip_download': True, 1005 }, 1006 }, 1007 { 1008 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q', 1009 'only_matching': True, 1010 }, 1011 { 1012 # invalid -> valid video id redirection 1013 'url': 'DJztXj2GPfl', 1014 'info_dict': { 1015 'id': 'DJztXj2GPfk', 1016 'ext': 'mp4', 1017 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)', 1018 'description': 'md5:bf577a41da97918e94fa9798d9228825', 1019 'upload_date': '20090125', 1020 'uploader': 'Prochorowka', 1021 'uploader_id': 'Prochorowka', 1022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka', 1023 'artist': 'Panjabi MC', 1024 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix', 1025 'album': 'Beware of the Boys (Mundian To Bach Ke)', 1026 }, 1027 'params': { 1028 'skip_download': True, 1029 }, 1030 'skip': 'Video unavailable', 1031 }, 1032 { 1033 # empty description results in an empty string 1034 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k', 1035 'info_dict': { 1036 'id': 'x41yOUIvK2k', 1037 'ext': 'mp4', 1038 'title': 'IMG 3456', 1039 'description': '', 1040 'upload_date': '20170613', 1041 'uploader_id': 'ElevageOrVert', 1042 'uploader': 'ElevageOrVert', 1043 }, 1044 'params': { 1045 'skip_download': True, 1046 }, 1047 }, 1048 { 1049 # with '};' inside yt initial data (see [1]) 1050 # see [2] for an example with '};' inside ytInitialPlayerResponse 1051 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093 1052 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216 1053 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no', 1054 'info_dict': { 1055 'id': 'CHqg6qOn4no', 1056 'ext': 'mp4', 1057 'title': 'Part 77 Sort a list of simple types in c#', 1058 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc', 1059 'upload_date': '20130831', 1060 'uploader_id': 'kudvenkat', 1061 'uploader': 'kudvenkat', 1062 }, 1063 'params': { 1064 'skip_download': True, 1065 }, 1066 }, 1067 { 1068 # another example of '};' in ytInitialData 1069 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY', 1070 'only_matching': True, 1071 }, 1072 { 1073 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ', 1074 'only_matching': True, 1075 }, 1076 { 1077 # https://github.com/ytdl-org/youtube-dl/pull/28094 1078 'url': 'OtqTfy26tG0', 1079 'info_dict': { 1080 'id': 'OtqTfy26tG0', 1081 'ext': 'mp4', 1082 'title': 'Burn Out', 1083 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131', 1084 'upload_date': '20141120', 1085 'uploader': 'The Cinematic Orchestra - Topic', 1086 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw', 1087 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw', 1088 'artist': 'The Cinematic Orchestra', 1089 'track': 'Burn Out', 1090 'album': 'Every Day', 1091 'release_data': None, 1092 'release_year': None, 1093 }, 1094 'params': { 1095 'skip_download': True, 1096 }, 1097 }, 1098 { 1099 # controversial video, only works with bpctr when authenticated with cookies 1100 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg', 1101 'only_matching': True, 1102 }, 1103 { 1104 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 1105 'url': 'cBvYw8_A0vQ', 1106 'info_dict': { 1107 'id': 'cBvYw8_A0vQ', 1108 'ext': 'mp4', 1109 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き', 1110 'description': 'md5:ea770e474b7cd6722b4c95b833c03630', 1111 'upload_date': '20201120', 1112 'uploader': 'Walk around Japan', 1113 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw', 1114 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', 1115 }, 1116 'params': { 1117 'skip_download': True, 1118 }, 1119 }, 1120 ] 1121 _formats = { 1122 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, 1123 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, 1124 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, 1125 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, 1126 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, 1127 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, 1128 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, 1129 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, 1130 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well 1131 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'}, 1132 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, 1133 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, 1134 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, 1135 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, 1136 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, 1137 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, 1138 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, 1139 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, 1140 1141 1142 # 3D videos 1143 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, 1144 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, 1145 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, 1146 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, 1147 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20}, 1148 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, 1149 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, 1150 1151 # Apple HTTP Live Streaming 1152 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, 1153 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, 1154 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, 1155 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, 1156 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, 1157 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, 1158 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, 1159 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, 1160 1161 # DASH mp4 video 1162 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1163 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1164 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1165 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1166 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1167 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559) 1168 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1169 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1170 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1171 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, 1172 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, 1173 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'}, 1174 1175 # Dash mp4 audio 1176 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'}, 1177 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'}, 1178 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'}, 1179 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, 1180 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, 1181 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'}, 1182 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'}, 1183 1184 # Dash webm 1185 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 1186 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 1187 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 1188 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 1189 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 1190 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, 1191 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'}, 1192 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1193 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1194 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1195 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1196 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1197 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1198 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1199 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1200 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) 1201 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1202 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, 1203 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, 1204 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, 1205 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, 1206 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, 1207 1208 # Dash webm audio 1209 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128}, 1210 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256}, 1211 1212 # Dash webm audio with opus inside 1213 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50}, 1214 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70}, 1215 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160}, 1216 1217 # RTMP (unnamed) 1218 '_rtmp': {'protocol': 'rtmp'}, 1219 1220 # av01 video only formats sometimes served with "unknown" codecs 1221 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, 1222 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, 1223 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, 1224 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, 1225 } 1226 1227 @classmethod 1228 def suitable(cls, url): 1229 # Hack for lazy extractors until more generic solution is implemented 1230 # (see #28780) 1231 from .youtube import parse_qs 1232 qs = parse_qs(url) 1233 if qs.get('list', [None])[0]: 1234 return False 1235 return super(YoutubeIE, cls).suitable(url) 1236 1237 def __init__(self, *args, **kwargs): 1238 super(YoutubeIE, self).__init__(*args, **kwargs) 1239 self._code_cache = {} 1240 self._player_cache = {} 1241 1242 def _signature_cache_id(self, example_sig): 1243 """ Return a string representation of a signature """ 1244 return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) 1245 1246 @classmethod 1247 def _extract_player_info(cls, player_url): 1248 for player_re in cls._PLAYER_INFO_RE: 1249 id_m = re.search(player_re, player_url) 1250 if id_m: 1251 break 1252 else: 1253 raise ExtractorError('Cannot identify player %r' % player_url) 1254 return id_m.group('id') 1255 1256 def _extract_signature_function(self, video_id, player_url, example_sig): 1257 player_id = self._extract_player_info(player_url) 1258 1259 # Read from filesystem cache 1260 func_id = 'js_%s_%s' % ( 1261 player_id, self._signature_cache_id(example_sig)) 1262 assert os.path.basename(func_id) == func_id 1263 1264 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) 1265 if cache_spec is not None: 1266 return lambda s: ''.join(s[i] for i in cache_spec) 1267 1268 if player_id not in self._code_cache: 1269 self._code_cache[player_id] = self._download_webpage( 1270 player_url, video_id, 1271 note='Downloading player ' + player_id, 1272 errnote='Download of %s failed' % player_url) 1273 code = self._code_cache[player_id] 1274 res = self._parse_sig_js(code) 1275 1276 test_string = ''.join(map(compat_chr, range(len(example_sig)))) 1277 cache_res = res(test_string) 1278 cache_spec = [ord(c) for c in cache_res] 1279 1280 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) 1281 return res 1282 1283 def _print_sig_code(self, func, example_sig): 1284 def gen_sig_code(idxs): 1285 def _genslice(start, end, step): 1286 starts = '' if start == 0 else str(start) 1287 ends = (':%d' % (end + step)) if end + step >= 0 else ':' 1288 steps = '' if step == 1 else (':%d' % step) 1289 return 's[%s%s%s]' % (starts, ends, steps) 1290 1291 step = None 1292 # Quelch pyflakes warnings - start will be set when step is set 1293 start = '(Never used)' 1294 for i, prev in zip(idxs[1:], idxs[:-1]): 1295 if step is not None: 1296 if i - prev == step: 1297 continue 1298 yield _genslice(start, prev, step) 1299 step = None 1300 continue 1301 if i - prev in [-1, 1]: 1302 step = i - prev 1303 start = prev 1304 continue 1305 else: 1306 yield 's[%d]' % prev 1307 if step is None: 1308 yield 's[%d]' % i 1309 else: 1310 yield _genslice(start, i, step) 1311 1312 test_string = ''.join(map(compat_chr, range(len(example_sig)))) 1313 cache_res = func(test_string) 1314 cache_spec = [ord(c) for c in cache_res] 1315 expr_code = ' + '.join(gen_sig_code(cache_spec)) 1316 signature_id_tuple = '(%s)' % ( 1317 ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) 1318 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' 1319 ' return %s\n') % (signature_id_tuple, expr_code) 1320 self.to_screen('Extracted signature function:\n' + code) 1321 1322 def _parse_sig_js(self, jscode): 1323 funcname = self._search_regex( 1324 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', 1325 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', 1326 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', 1327 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)', 1328 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)', 1329 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', 1330 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', 1331 # Obsolete patterns 1332 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', 1333 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', 1334 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', 1335 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', 1336 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', 1337 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', 1338 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', 1339 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), 1340 jscode, 'Initial JS player signature function name', group='sig') 1341 1342 jsi = JSInterpreter(jscode) 1343 initial_function = jsi.extract_function(funcname) 1344 return lambda s: initial_function([s]) 1345 1346 def _decrypt_signature(self, s, video_id, player_url): 1347 """Turn the encrypted s field into a working signature""" 1348 1349 if player_url is None: 1350 raise ExtractorError('Cannot decrypt signature without player_url') 1351 1352 if player_url.startswith('//'): 1353 player_url = 'https:' + player_url 1354 elif not re.match(r'https?://', player_url): 1355 player_url = compat_urlparse.urljoin( 1356 'https://www.youtube.com', player_url) 1357 try: 1358 player_id = (player_url, self._signature_cache_id(s)) 1359 if player_id not in self._player_cache: 1360 func = self._extract_signature_function( 1361 video_id, player_url, s 1362 ) 1363 self._player_cache[player_id] = func 1364 func = self._player_cache[player_id] 1365 if self._downloader.params.get('youtube_print_sig_code'): 1366 self._print_sig_code(func, s) 1367 return func(s) 1368 except Exception as e: 1369 tb = traceback.format_exc() 1370 raise ExtractorError( 1371 'Signature extraction failed: ' + tb, cause=e) 1372 1373 def _mark_watched(self, video_id, player_response): 1374 playback_url = url_or_none(try_get( 1375 player_response, 1376 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl'])) 1377 if not playback_url: 1378 return 1379 parsed_playback_url = compat_urlparse.urlparse(playback_url) 1380 qs = compat_urlparse.parse_qs(parsed_playback_url.query) 1381 1382 # cpn generation algorithm is reverse engineered from base.js. 1383 # In fact it works even with dummy cpn. 1384 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' 1385 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))) 1386 1387 qs.update({ 1388 'ver': ['2'], 1389 'cpn': [cpn], 1390 }) 1391 playback_url = compat_urlparse.urlunparse( 1392 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) 1393 1394 self._download_webpage( 1395 playback_url, video_id, 'Marking watched', 1396 'Unable to mark watched', fatal=False) 1397 1398 @staticmethod 1399 def _extract_urls(webpage): 1400 # Embedded YouTube player 1401 entries = [ 1402 unescapeHTML(mobj.group('url')) 1403 for mobj in re.finditer(r'''(?x) 1404 (?: 1405 <iframe[^>]+?src=| 1406 data-video-url=| 1407 <embed[^>]+?src=| 1408 embedSWF\(?:\s*| 1409 <object[^>]+data=| 1410 new\s+SWFObject\( 1411 ) 1412 (["\']) 1413 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ 1414 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) 1415 \1''', webpage)] 1416 1417 # lazyYT YouTube embed 1418 entries.extend(list(map( 1419 unescapeHTML, 1420 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)))) 1421 1422 # Wordpress "YouTube Video Importer" plugin 1423 matches = re.findall(r'''(?x)<div[^>]+ 1424 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ 1425 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) 1426 entries.extend(m[-1] for m in matches) 1427 1428 return entries 1429 1430 @staticmethod 1431 def _extract_url(webpage): 1432 urls = YoutubeIE._extract_urls(webpage) 1433 return urls[0] if urls else None 1434 1435 @classmethod 1436 def extract_id(cls, url): 1437 mobj = re.match(cls._VALID_URL, url, re.VERBOSE) 1438 if mobj is None: 1439 raise ExtractorError('Invalid URL: %s' % url) 1440 video_id = mobj.group(2) 1441 return video_id 1442 1443 def _extract_chapters_from_json(self, data, video_id, duration): 1444 chapters_list = try_get( 1445 data, 1446 lambda x: x['playerOverlays'] 1447 ['playerOverlayRenderer'] 1448 ['decoratedPlayerBarRenderer'] 1449 ['decoratedPlayerBarRenderer'] 1450 ['playerBar'] 1451 ['chapteredPlayerBarRenderer'] 1452 ['chapters'], 1453 list) 1454 if not chapters_list: 1455 return 1456 1457 def chapter_time(chapter): 1458 return float_or_none( 1459 try_get( 1460 chapter, 1461 lambda x: x['chapterRenderer']['timeRangeStartMillis'], 1462 int), 1463 scale=1000) 1464 chapters = [] 1465 for next_num, chapter in enumerate(chapters_list, start=1): 1466 start_time = chapter_time(chapter) 1467 if start_time is None: 1468 continue 1469 end_time = (chapter_time(chapters_list[next_num]) 1470 if next_num < len(chapters_list) else duration) 1471 if end_time is None: 1472 continue 1473 title = try_get( 1474 chapter, lambda x: x['chapterRenderer']['title']['simpleText'], 1475 compat_str) 1476 chapters.append({ 1477 'start_time': start_time, 1478 'end_time': end_time, 1479 'title': title, 1480 }) 1481 return chapters 1482 1483 def _extract_yt_initial_variable(self, webpage, regex, video_id, name): 1484 return self._parse_json(self._search_regex( 1485 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), 1486 regex), webpage, name, default='{}'), video_id, fatal=False) 1487 1488 def _real_extract(self, url): 1489 url, smuggled_data = unsmuggle_url(url, {}) 1490 video_id = self._match_id(url) 1491 base_url = self.http_scheme() + '//www.youtube.com/' 1492 webpage_url = base_url + 'watch?v=' + video_id 1493 webpage = self._download_webpage( 1494 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) 1495 1496 player_response = None 1497 if webpage: 1498 player_response = self._extract_yt_initial_variable( 1499 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, 1500 video_id, 'initial player response') 1501 if not player_response: 1502 player_response = self._call_api( 1503 'player', {'videoId': video_id}, video_id) 1504 1505 playability_status = player_response.get('playabilityStatus') or {} 1506 if playability_status.get('reason') == 'Sign in to confirm your age': 1507 video_info = self._download_webpage( 1508 base_url + 'get_video_info', video_id, 1509 'Refetching age-gated info webpage', 1510 'unable to download video info webpage', query={ 1511 'video_id': video_id, 1512 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 1513 'html5': 1, 1514 # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544 1515 'c': 'TVHTML5', 1516 'cver': '6.20180913', 1517 }, fatal=False) 1518 if video_info: 1519 pr = self._parse_json( 1520 try_get( 1521 compat_parse_qs(video_info), 1522 lambda x: x['player_response'][0], compat_str) or '{}', 1523 video_id, fatal=False) 1524 if pr and isinstance(pr, dict): 1525 player_response = pr 1526 1527 trailer_video_id = try_get( 1528 playability_status, 1529 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'], 1530 compat_str) 1531 if trailer_video_id: 1532 return self.url_result( 1533 trailer_video_id, self.ie_key(), trailer_video_id) 1534 1535 def get_text(x): 1536 if not x: 1537 return 1538 text = x.get('simpleText') 1539 if text and isinstance(text, compat_str): 1540 return text 1541 runs = x.get('runs') 1542 if not isinstance(runs, list): 1543 return 1544 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)]) 1545 1546 search_meta = ( 1547 lambda x: self._html_search_meta(x, webpage, default=None)) \ 1548 if webpage else lambda x: None 1549 1550 video_details = player_response.get('videoDetails') or {} 1551 microformat = try_get( 1552 player_response, 1553 lambda x: x['microformat']['playerMicroformatRenderer'], 1554 dict) or {} 1555 video_title = video_details.get('title') \ 1556 or get_text(microformat.get('title')) \ 1557 or search_meta(['og:title', 'twitter:title', 'title']) 1558 video_description = video_details.get('shortDescription') 1559 1560 if not smuggled_data.get('force_singlefeed', False): 1561 if not self._downloader.params.get('noplaylist'): 1562 multifeed_metadata_list = try_get( 1563 player_response, 1564 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'], 1565 compat_str) 1566 if multifeed_metadata_list: 1567 entries = [] 1568 feed_ids = [] 1569 for feed in multifeed_metadata_list.split(','): 1570 # Unquote should take place before split on comma (,) since textual 1571 # fields may contain comma as well (see 1572 # https://github.com/ytdl-org/youtube-dl/issues/8536) 1573 feed_data = compat_parse_qs( 1574 compat_urllib_parse_unquote_plus(feed)) 1575 1576 def feed_entry(name): 1577 return try_get( 1578 feed_data, lambda x: x[name][0], compat_str) 1579 1580 feed_id = feed_entry('id') 1581 if not feed_id: 1582 continue 1583 feed_title = feed_entry('title') 1584 title = video_title 1585 if feed_title: 1586 title += ' (%s)' % feed_title 1587 entries.append({ 1588 '_type': 'url_transparent', 1589 'ie_key': 'Youtube', 1590 'url': smuggle_url( 1591 base_url + 'watch?v=' + feed_data['id'][0], 1592 {'force_singlefeed': True}), 1593 'title': title, 1594 }) 1595 feed_ids.append(feed_id) 1596 self.to_screen( 1597 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' 1598 % (', '.join(feed_ids), video_id)) 1599 return self.playlist_result( 1600 entries, video_id, video_title, video_description) 1601 else: 1602 self.to_screen('Downloading just video %s because of --no-playlist' % video_id) 1603 1604 formats = [] 1605 itags = [] 1606 itag_qualities = {} 1607 player_url = None 1608 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres']) 1609 streaming_data = player_response.get('streamingData') or {} 1610 streaming_formats = streaming_data.get('formats') or [] 1611 streaming_formats.extend(streaming_data.get('adaptiveFormats') or []) 1612 for fmt in streaming_formats: 1613 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): 1614 continue 1615 1616 itag = str_or_none(fmt.get('itag')) 1617 quality = fmt.get('quality') 1618 if itag and quality: 1619 itag_qualities[itag] = quality 1620 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment 1621 # (adding `&sq=0` to the URL) and parsing emsg box to determine the 1622 # number of fragment that would subsequently requested with (`&sq=N`) 1623 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF': 1624 continue 1625 1626 fmt_url = fmt.get('url') 1627 if not fmt_url: 1628 sc = compat_parse_qs(fmt.get('signatureCipher')) 1629 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0])) 1630 encrypted_sig = try_get(sc, lambda x: x['s'][0]) 1631 if not (sc and fmt_url and encrypted_sig): 1632 continue 1633 if not player_url: 1634 if not webpage: 1635 continue 1636 player_url = self._search_regex( 1637 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"', 1638 webpage, 'player URL', fatal=False) 1639 if not player_url: 1640 continue 1641 signature = self._decrypt_signature(sc['s'][0], video_id, player_url) 1642 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' 1643 fmt_url += '&' + sp + '=' + signature 1644 1645 if itag: 1646 itags.append(itag) 1647 tbr = float_or_none( 1648 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) 1649 dct = { 1650 'asr': int_or_none(fmt.get('audioSampleRate')), 1651 'filesize': int_or_none(fmt.get('contentLength')), 1652 'format_id': itag, 1653 'format_note': fmt.get('qualityLabel') or quality, 1654 'fps': int_or_none(fmt.get('fps')), 1655 'height': int_or_none(fmt.get('height')), 1656 'quality': q(quality), 1657 'tbr': tbr, 1658 'url': fmt_url, 1659 'width': fmt.get('width'), 1660 } 1661 mimetype = fmt.get('mimeType') 1662 if mimetype: 1663 mobj = re.match( 1664 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype) 1665 if mobj: 1666 dct['ext'] = mimetype2ext(mobj.group(1)) 1667 dct.update(parse_codecs(mobj.group(2))) 1668 no_audio = dct.get('acodec') == 'none' 1669 no_video = dct.get('vcodec') == 'none' 1670 if no_audio: 1671 dct['vbr'] = tbr 1672 if no_video: 1673 dct['abr'] = tbr 1674 if no_audio or no_video: 1675 dct['downloader_options'] = { 1676 # Youtube throttles chunks >~10M 1677 'http_chunk_size': 10485760, 1678 } 1679 if dct.get('ext'): 1680 dct['container'] = dct['ext'] + '_dash' 1681 formats.append(dct) 1682 1683 hls_manifest_url = streaming_data.get('hlsManifestUrl') 1684 if hls_manifest_url: 1685 for f in self._extract_m3u8_formats( 1686 hls_manifest_url, video_id, 'mp4', fatal=False): 1687 itag = self._search_regex( 1688 r'/itag/(\d+)', f['url'], 'itag', default=None) 1689 if itag: 1690 f['format_id'] = itag 1691 formats.append(f) 1692 1693 if self._downloader.params.get('youtube_include_dash_manifest', True): 1694 dash_manifest_url = streaming_data.get('dashManifestUrl') 1695 if dash_manifest_url: 1696 for f in self._extract_mpd_formats( 1697 dash_manifest_url, video_id, fatal=False): 1698 itag = f['format_id'] 1699 if itag in itags: 1700 continue 1701 if itag in itag_qualities: 1702 f['quality'] = q(itag_qualities[itag]) 1703 filesize = int_or_none(self._search_regex( 1704 r'/clen/(\d+)', f.get('fragment_base_url') 1705 or f['url'], 'file size', default=None)) 1706 if filesize: 1707 f['filesize'] = filesize 1708 formats.append(f) 1709 1710 if not formats: 1711 if streaming_data.get('licenseInfos'): 1712 raise ExtractorError( 1713 'This video is DRM protected.', expected=True) 1714 pemr = try_get( 1715 playability_status, 1716 lambda x: x['errorScreen']['playerErrorMessageRenderer'], 1717 dict) or {} 1718 reason = get_text(pemr.get('reason')) or playability_status.get('reason') 1719 subreason = pemr.get('subreason') 1720 if subreason: 1721 subreason = clean_html(get_text(subreason)) 1722 if subreason == 'The uploader has not made this video available in your country.': 1723 countries = microformat.get('availableCountries') 1724 if not countries: 1725 regions_allowed = search_meta('regionsAllowed') 1726 countries = regions_allowed.split(',') if regions_allowed else None 1727 self.raise_geo_restricted( 1728 subreason, countries) 1729 reason += '\n' + subreason 1730 if reason: 1731 raise ExtractorError(reason, expected=True) 1732 1733 self._sort_formats(formats) 1734 1735 keywords = video_details.get('keywords') or [] 1736 if not keywords and webpage: 1737 keywords = [ 1738 unescapeHTML(m.group('content')) 1739 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)] 1740 for keyword in keywords: 1741 if keyword.startswith('yt:stretch='): 1742 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword) 1743 if mobj: 1744 # NB: float is intentional for forcing float division 1745 w, h = (float(v) for v in mobj.groups()) 1746 if w > 0 and h > 0: 1747 ratio = w / h 1748 for f in formats: 1749 if f.get('vcodec') != 'none': 1750 f['stretched_ratio'] = ratio 1751 break 1752 1753 thumbnails = [] 1754 for container in (video_details, microformat): 1755 for thumbnail in (try_get( 1756 container, 1757 lambda x: x['thumbnail']['thumbnails'], list) or []): 1758 thumbnail_url = thumbnail.get('url') 1759 if not thumbnail_url: 1760 continue 1761 thumbnails.append({ 1762 'height': int_or_none(thumbnail.get('height')), 1763 'url': thumbnail_url, 1764 'width': int_or_none(thumbnail.get('width')), 1765 }) 1766 if thumbnails: 1767 break 1768 else: 1769 thumbnail = search_meta(['og:image', 'twitter:image']) 1770 if thumbnail: 1771 thumbnails = [{'url': thumbnail}] 1772 1773 category = microformat.get('category') or search_meta('genre') 1774 channel_id = video_details.get('channelId') \ 1775 or microformat.get('externalChannelId') \ 1776 or search_meta('channelId') 1777 duration = int_or_none( 1778 video_details.get('lengthSeconds') 1779 or microformat.get('lengthSeconds')) \ 1780 or parse_duration(search_meta('duration')) 1781 is_live = video_details.get('isLive') 1782 owner_profile_url = microformat.get('ownerProfileUrl') 1783 1784 info = { 1785 'id': video_id, 1786 'title': self._live_title(video_title) if is_live else video_title, 1787 'formats': formats, 1788 'thumbnails': thumbnails, 1789 'description': video_description, 1790 'upload_date': unified_strdate( 1791 microformat.get('uploadDate') 1792 or search_meta('uploadDate')), 1793 'uploader': video_details['author'], 1794 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None, 1795 'uploader_url': owner_profile_url, 1796 'channel_id': channel_id, 1797 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None, 1798 'duration': duration, 1799 'view_count': int_or_none( 1800 video_details.get('viewCount') 1801 or microformat.get('viewCount') 1802 or search_meta('interactionCount')), 1803 'average_rating': float_or_none(video_details.get('averageRating')), 1804 'age_limit': 18 if ( 1805 microformat.get('isFamilySafe') is False 1806 or search_meta('isFamilyFriendly') == 'false' 1807 or search_meta('og:restrictions:age') == '18+') else 0, 1808 'webpage_url': webpage_url, 1809 'categories': [category] if category else None, 1810 'tags': keywords, 1811 'is_live': is_live, 1812 } 1813 1814 pctr = try_get( 1815 player_response, 1816 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict) 1817 if pctr: 1818 def process_language(container, base_url, lang_code, query): 1819 lang_subs = [] 1820 for fmt in self._SUBTITLE_FORMATS: 1821 query.update({ 1822 'fmt': fmt, 1823 }) 1824 lang_subs.append({ 1825 'ext': fmt, 1826 'url': update_url_query(base_url, query), 1827 }) 1828 container[lang_code] = lang_subs 1829 1830 subtitles = {} 1831 for caption_track in (pctr.get('captionTracks') or []): 1832 base_url = caption_track.get('baseUrl') 1833 if not base_url: 1834 continue 1835 if caption_track.get('kind') != 'asr': 1836 lang_code = caption_track.get('languageCode') 1837 if not lang_code: 1838 continue 1839 process_language( 1840 subtitles, base_url, lang_code, {}) 1841 continue 1842 automatic_captions = {} 1843 for translation_language in (pctr.get('translationLanguages') or []): 1844 translation_language_code = translation_language.get('languageCode') 1845 if not translation_language_code: 1846 continue 1847 process_language( 1848 automatic_captions, base_url, translation_language_code, 1849 {'tlang': translation_language_code}) 1850 info['automatic_captions'] = automatic_captions 1851 info['subtitles'] = subtitles 1852 1853 parsed_url = compat_urllib_parse_urlparse(url) 1854 for component in [parsed_url.fragment, parsed_url.query]: 1855 query = compat_parse_qs(component) 1856 for k, v in query.items(): 1857 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]: 1858 d_k += '_time' 1859 if d_k not in info and k in s_ks: 1860 info[d_k] = parse_duration(query[k][0]) 1861 1862 if video_description: 1863 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description) 1864 if mobj: 1865 release_year = mobj.group('release_year') 1866 release_date = mobj.group('release_date') 1867 if release_date: 1868 release_date = release_date.replace('-', '') 1869 if not release_year: 1870 release_year = release_date[:4] 1871 info.update({ 1872 'album': mobj.group('album'.strip()), 1873 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), 1874 'track': mobj.group('track').strip(), 1875 'release_date': release_date, 1876 'release_year': int_or_none(release_year), 1877 }) 1878 1879 initial_data = None 1880 if webpage: 1881 initial_data = self._extract_yt_initial_variable( 1882 webpage, self._YT_INITIAL_DATA_RE, video_id, 1883 'yt initial data') 1884 if not initial_data: 1885 initial_data = self._call_api( 1886 'next', {'videoId': video_id}, video_id, fatal=False) 1887 1888 if initial_data: 1889 chapters = self._extract_chapters_from_json( 1890 initial_data, video_id, duration) 1891 if not chapters: 1892 for engagment_pannel in (initial_data.get('engagementPanels') or []): 1893 contents = try_get( 1894 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'], 1895 list) 1896 if not contents: 1897 continue 1898 1899 def chapter_time(mmlir): 1900 return parse_duration( 1901 get_text(mmlir.get('timeDescription'))) 1902 1903 chapters = [] 1904 for next_num, content in enumerate(contents, start=1): 1905 mmlir = content.get('macroMarkersListItemRenderer') or {} 1906 start_time = chapter_time(mmlir) 1907 end_time = chapter_time(try_get( 1908 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \ 1909 if next_num < len(contents) else duration 1910 if start_time is None or end_time is None: 1911 continue 1912 chapters.append({ 1913 'start_time': start_time, 1914 'end_time': end_time, 1915 'title': get_text(mmlir.get('title')), 1916 }) 1917 if chapters: 1918 break 1919 if chapters: 1920 info['chapters'] = chapters 1921 1922 contents = try_get( 1923 initial_data, 1924 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], 1925 list) or [] 1926 for content in contents: 1927 vpir = content.get('videoPrimaryInfoRenderer') 1928 if vpir: 1929 stl = vpir.get('superTitleLink') 1930 if stl: 1931 stl = get_text(stl) 1932 if try_get( 1933 vpir, 1934 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': 1935 info['location'] = stl 1936 else: 1937 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl) 1938 if mobj: 1939 info.update({ 1940 'series': mobj.group(1), 1941 'season_number': int(mobj.group(2)), 1942 'episode_number': int(mobj.group(3)), 1943 }) 1944 for tlb in (try_get( 1945 vpir, 1946 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'], 1947 list) or []): 1948 tbr = tlb.get('toggleButtonRenderer') or {} 1949 for getter, regex in [( 1950 lambda x: x['defaultText']['accessibility']['accessibilityData'], 1951 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([ 1952 lambda x: x['accessibility'], 1953 lambda x: x['accessibilityData']['accessibilityData'], 1954 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]: 1955 label = (try_get(tbr, getter, dict) or {}).get('label') 1956 if label: 1957 mobj = re.match(regex, label) 1958 if mobj: 1959 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count')) 1960 break 1961 sbr_tooltip = try_get( 1962 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip']) 1963 if sbr_tooltip: 1964 like_count, dislike_count = sbr_tooltip.split(' / ') 1965 info.update({ 1966 'like_count': str_to_int(like_count), 1967 'dislike_count': str_to_int(dislike_count), 1968 }) 1969 vsir = content.get('videoSecondaryInfoRenderer') 1970 if vsir: 1971 info['channel'] = get_text(try_get( 1972 vsir, 1973 lambda x: x['owner']['videoOwnerRenderer']['title'], 1974 dict)) 1975 rows = try_get( 1976 vsir, 1977 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'], 1978 list) or [] 1979 multiple_songs = False 1980 for row in rows: 1981 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: 1982 multiple_songs = True 1983 break 1984 for row in rows: 1985 mrr = row.get('metadataRowRenderer') or {} 1986 mrr_title = mrr.get('title') 1987 if not mrr_title: 1988 continue 1989 mrr_title = get_text(mrr['title']) 1990 mrr_contents_text = get_text(mrr['contents'][0]) 1991 if mrr_title == 'License': 1992 info['license'] = mrr_contents_text 1993 elif not multiple_songs: 1994 if mrr_title == 'Album': 1995 info['album'] = mrr_contents_text 1996 elif mrr_title == 'Artist': 1997 info['artist'] = mrr_contents_text 1998 elif mrr_title == 'Song': 1999 info['track'] = mrr_contents_text 2000 2001 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: 2002 v = info.get(s_k) 2003 if v: 2004 info[d_k] = v 2005 2006 self.mark_watched(video_id, player_response) 2007 2008 return info 2009 2010 2011 class YoutubeTabIE(YoutubeBaseInfoExtractor): 2012 IE_DESC = 'YouTube.com tab' 2013 _VALID_URL = r'''(?x) 2014 https?:// 2015 (?:\w+\.)? 2016 (?: 2017 youtube(?:kids)?\.com| 2018 invidio\.us 2019 )/ 2020 (?: 2021 (?:channel|c|user|feed|hashtag)/| 2022 (?:playlist|watch)\?.*?\blist=| 2023 (?!(?:watch|embed|v|e)\b) 2024 ) 2025 (?P<id>[^/?\#&]+) 2026 ''' 2027 IE_NAME = 'youtube:tab' 2028 2029 _TESTS = [{ 2030 # playlists, multipage 2031 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid', 2032 'playlist_mincount': 94, 2033 'info_dict': { 2034 'id': 'UCqj7Cz7revf5maW9g5pgNcg', 2035 'title': 'Игорь Клейнер - Playlists', 2036 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', 2037 }, 2038 }, { 2039 # playlists, multipage, different order 2040 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', 2041 'playlist_mincount': 94, 2042 'info_dict': { 2043 'id': 'UCqj7Cz7revf5maW9g5pgNcg', 2044 'title': 'Игорь Клейнер - Playlists', 2045 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', 2046 }, 2047 }, { 2048 # playlists, series 2049 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', 2050 'playlist_mincount': 5, 2051 'info_dict': { 2052 'id': 'UCYO_jab_esuFRV4b17AJtAw', 2053 'title': '3Blue1Brown - Playlists', 2054 'description': 'md5:e1384e8a133307dd10edee76e875d62f', 2055 }, 2056 }, { 2057 # playlists, singlepage 2058 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', 2059 'playlist_mincount': 4, 2060 'info_dict': { 2061 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ', 2062 'title': 'ThirstForScience - Playlists', 2063 'description': 'md5:609399d937ea957b0f53cbffb747a14c', 2064 } 2065 }, { 2066 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 2067 'only_matching': True, 2068 }, { 2069 # basic, single video playlist 2070 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 2071 'info_dict': { 2072 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 2073 'uploader': 'Sergey M.', 2074 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 2075 'title': 'youtube-dl public playlist', 2076 }, 2077 'playlist_count': 1, 2078 }, { 2079 # empty playlist 2080 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', 2081 'info_dict': { 2082 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 2083 'uploader': 'Sergey M.', 2084 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', 2085 'title': 'youtube-dl empty playlist', 2086 }, 2087 'playlist_count': 0, 2088 }, { 2089 # Home tab 2090 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured', 2091 'info_dict': { 2092 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 2093 'title': 'lex will - Home', 2094 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 2095 }, 2096 'playlist_mincount': 2, 2097 }, { 2098 # Videos tab 2099 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos', 2100 'info_dict': { 2101 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 2102 'title': 'lex will - Videos', 2103 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 2104 }, 2105 'playlist_mincount': 975, 2106 }, { 2107 # Videos tab, sorted by popular 2108 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid', 2109 'info_dict': { 2110 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 2111 'title': 'lex will - Videos', 2112 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 2113 }, 2114 'playlist_mincount': 199, 2115 }, { 2116 # Playlists tab 2117 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists', 2118 'info_dict': { 2119 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 2120 'title': 'lex will - Playlists', 2121 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 2122 }, 2123 'playlist_mincount': 17, 2124 }, { 2125 # Community tab 2126 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', 2127 'info_dict': { 2128 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 2129 'title': 'lex will - Community', 2130 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 2131 }, 2132 'playlist_mincount': 18, 2133 }, { 2134 # Channels tab 2135 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels', 2136 'info_dict': { 2137 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 2138 'title': 'lex will - Channels', 2139 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 2140 }, 2141 'playlist_mincount': 138, 2142 }, { 2143 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', 2144 'only_matching': True, 2145 }, { 2146 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 2147 'only_matching': True, 2148 }, { 2149 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 2150 'only_matching': True, 2151 }, { 2152 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 2153 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 2154 'info_dict': { 2155 'title': '29C3: Not my department', 2156 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 2157 'uploader': 'Christiaan008', 2158 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg', 2159 }, 2160 'playlist_count': 96, 2161 }, { 2162 'note': 'Large playlist', 2163 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', 2164 'info_dict': { 2165 'title': 'Uploads from Cauchemar', 2166 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', 2167 'uploader': 'Cauchemar', 2168 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q', 2169 }, 2170 'playlist_mincount': 1123, 2171 }, { 2172 # even larger playlist, 8832 videos 2173 'url': 'http://www.youtube.com/user/NASAgovVideo/videos', 2174 'only_matching': True, 2175 }, { 2176 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 2177 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 2178 'info_dict': { 2179 'title': 'Uploads from Interstellar Movie', 2180 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', 2181 'uploader': 'Interstellar Movie', 2182 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', 2183 }, 2184 'playlist_mincount': 21, 2185 }, { 2186 # https://github.com/ytdl-org/youtube-dl/issues/21844 2187 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 2188 'info_dict': { 2189 'title': 'Data Analysis with Dr Mike Pound', 2190 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 2191 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA', 2192 'uploader': 'Computerphile', 2193 }, 2194 'playlist_mincount': 11, 2195 }, { 2196 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 2197 'only_matching': True, 2198 }, { 2199 # Playlist URL that does not actually serve a playlist 2200 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', 2201 'info_dict': { 2202 'id': 'FqZTN594JQw', 2203 'ext': 'webm', 2204 'title': "Smiley's People 01 detective, Adventure Series, Action", 2205 'uploader': 'STREEM', 2206 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', 2207 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', 2208 'upload_date': '20150526', 2209 'license': 'Standard YouTube License', 2210 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', 2211 'categories': ['People & Blogs'], 2212 'tags': list, 2213 'view_count': int, 2214 'like_count': int, 2215 'dislike_count': int, 2216 }, 2217 'params': { 2218 'skip_download': True, 2219 }, 2220 'skip': 'This video is not available.', 2221 'add_ie': [YoutubeIE.ie_key()], 2222 }, { 2223 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g', 2224 'only_matching': True, 2225 }, { 2226 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', 2227 'only_matching': True, 2228 }, { 2229 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 2230 'info_dict': { 2231 'id': '9Auq9mYxFEE', 2232 'ext': 'mp4', 2233 'title': 'Watch Sky News live', 2234 'uploader': 'Sky News', 2235 'uploader_id': 'skynews', 2236 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews', 2237 'upload_date': '20191102', 2238 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662', 2239 'categories': ['News & Politics'], 2240 'tags': list, 2241 'like_count': int, 2242 'dislike_count': int, 2243 }, 2244 'params': { 2245 'skip_download': True, 2246 }, 2247 }, { 2248 'url': 'https://www.youtube.com/user/TheYoungTurks/live', 2249 'info_dict': { 2250 'id': 'a48o2S1cPoo', 2251 'ext': 'mp4', 2252 'title': 'The Young Turks - Live Main Show', 2253 'uploader': 'The Young Turks', 2254 'uploader_id': 'TheYoungTurks', 2255 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', 2256 'upload_date': '20150715', 2257 'license': 'Standard YouTube License', 2258 'description': 'md5:438179573adcdff3c97ebb1ee632b891', 2259 'categories': ['News & Politics'], 2260 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], 2261 'like_count': int, 2262 'dislike_count': int, 2263 }, 2264 'params': { 2265 'skip_download': True, 2266 }, 2267 'only_matching': True, 2268 }, { 2269 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', 2270 'only_matching': True, 2271 }, { 2272 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', 2273 'only_matching': True, 2274 }, { 2275 'url': 'https://www.youtube.com/feed/trending', 2276 'only_matching': True, 2277 }, { 2278 # needs auth 2279 'url': 'https://www.youtube.com/feed/library', 2280 'only_matching': True, 2281 }, { 2282 # needs auth 2283 'url': 'https://www.youtube.com/feed/history', 2284 'only_matching': True, 2285 }, { 2286 # needs auth 2287 'url': 'https://www.youtube.com/feed/subscriptions', 2288 'only_matching': True, 2289 }, { 2290 # needs auth 2291 'url': 'https://www.youtube.com/feed/watch_later', 2292 'only_matching': True, 2293 }, { 2294 # no longer available? 2295 'url': 'https://www.youtube.com/feed/recommended', 2296 'only_matching': True, 2297 }, { 2298 # inline playlist with not always working continuations 2299 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', 2300 'only_matching': True, 2301 }, { 2302 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8', 2303 'only_matching': True, 2304 }, { 2305 'url': 'https://www.youtube.com/course', 2306 'only_matching': True, 2307 }, { 2308 'url': 'https://www.youtube.com/zsecurity', 2309 'only_matching': True, 2310 }, { 2311 'url': 'http://www.youtube.com/NASAgovVideo/videos', 2312 'only_matching': True, 2313 }, { 2314 'url': 'https://www.youtube.com/TheYoungTurks/live', 2315 'only_matching': True, 2316 }, { 2317 'url': 'https://www.youtube.com/hashtag/cctv9', 2318 'info_dict': { 2319 'id': 'cctv9', 2320 'title': '#cctv9', 2321 }, 2322 'playlist_mincount': 350, 2323 }, { 2324 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', 2325 'only_matching': True, 2326 }] 2327 2328 @classmethod 2329 def suitable(cls, url): 2330 return False if YoutubeIE.suitable(url) else super( 2331 YoutubeTabIE, cls).suitable(url) 2332 2333 def _extract_channel_id(self, webpage): 2334 channel_id = self._html_search_meta( 2335 'channelId', webpage, 'channel id', default=None) 2336 if channel_id: 2337 return channel_id 2338 channel_url = self._html_search_meta( 2339 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url', 2340 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad', 2341 'twitter:app:url:googleplay'), webpage, 'channel url') 2342 return self._search_regex( 2343 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+', 2344 channel_url, 'channel id') 2345 2346 @staticmethod 2347 def _extract_grid_item_renderer(item): 2348 assert isinstance(item, dict) 2349 for key, renderer in item.items(): 2350 if not key.startswith('grid') or not key.endswith('Renderer'): 2351 continue 2352 if not isinstance(renderer, dict): 2353 continue 2354 return renderer 2355 2356 def _grid_entries(self, grid_renderer): 2357 for item in grid_renderer['items']: 2358 if not isinstance(item, dict): 2359 continue 2360 renderer = self._extract_grid_item_renderer(item) 2361 if not isinstance(renderer, dict): 2362 continue 2363 title = try_get( 2364 renderer, (lambda x: x['title']['runs'][0]['text'], 2365 lambda x: x['title']['simpleText']), compat_str) 2366 # playlist 2367 playlist_id = renderer.get('playlistId') 2368 if playlist_id: 2369 yield self.url_result( 2370 'https://www.youtube.com/playlist?list=%s' % playlist_id, 2371 ie=YoutubeTabIE.ie_key(), video_id=playlist_id, 2372 video_title=title) 2373 continue 2374 # video 2375 video_id = renderer.get('videoId') 2376 if video_id: 2377 yield self._extract_video(renderer) 2378 continue 2379 # channel 2380 channel_id = renderer.get('channelId') 2381 if channel_id: 2382 title = try_get( 2383 renderer, lambda x: x['title']['simpleText'], compat_str) 2384 yield self.url_result( 2385 'https://www.youtube.com/channel/%s' % channel_id, 2386 ie=YoutubeTabIE.ie_key(), video_title=title) 2387 continue 2388 # generic endpoint URL support 2389 ep_url = urljoin('https://www.youtube.com/', try_get( 2390 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], 2391 compat_str)) 2392 if ep_url: 2393 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE): 2394 if ie.suitable(ep_url): 2395 yield self.url_result( 2396 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title) 2397 break 2398 2399 def _shelf_entries_from_content(self, shelf_renderer): 2400 content = shelf_renderer.get('content') 2401 if not isinstance(content, dict): 2402 return 2403 renderer = content.get('gridRenderer') 2404 if renderer: 2405 # TODO: add support for nested playlists so each shelf is processed 2406 # as separate playlist 2407 # TODO: this includes only first N items 2408 for entry in self._grid_entries(renderer): 2409 yield entry 2410 renderer = content.get('horizontalListRenderer') 2411 if renderer: 2412 # TODO 2413 pass 2414 2415 def _shelf_entries(self, shelf_renderer, skip_channels=False): 2416 ep = try_get( 2417 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], 2418 compat_str) 2419 shelf_url = urljoin('https://www.youtube.com', ep) 2420 if shelf_url: 2421 # Skipping links to another channels, note that checking for 2422 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL 2423 # will not work 2424 if skip_channels and '/channels?' in shelf_url: 2425 return 2426 title = try_get( 2427 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str) 2428 yield self.url_result(shelf_url, video_title=title) 2429 # Shelf may not contain shelf URL, fallback to extraction from content 2430 for entry in self._shelf_entries_from_content(shelf_renderer): 2431 yield entry 2432 2433 def _playlist_entries(self, video_list_renderer): 2434 for content in video_list_renderer['contents']: 2435 if not isinstance(content, dict): 2436 continue 2437 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer') 2438 if not isinstance(renderer, dict): 2439 continue 2440 video_id = renderer.get('videoId') 2441 if not video_id: 2442 continue 2443 yield self._extract_video(renderer) 2444 2445 def _video_entry(self, video_renderer): 2446 video_id = video_renderer.get('videoId') 2447 if video_id: 2448 return self._extract_video(video_renderer) 2449 2450 def _post_thread_entries(self, post_thread_renderer): 2451 post_renderer = try_get( 2452 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict) 2453 if not post_renderer: 2454 return 2455 # video attachment 2456 video_renderer = try_get( 2457 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) 2458 video_id = None 2459 if video_renderer: 2460 entry = self._video_entry(video_renderer) 2461 if entry: 2462 yield entry 2463 # inline video links 2464 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or [] 2465 for run in runs: 2466 if not isinstance(run, dict): 2467 continue 2468 ep_url = try_get( 2469 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str) 2470 if not ep_url: 2471 continue 2472 if not YoutubeIE.suitable(ep_url): 2473 continue 2474 ep_video_id = YoutubeIE._match_id(ep_url) 2475 if video_id == ep_video_id: 2476 continue 2477 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id) 2478 2479 def _post_thread_continuation_entries(self, post_thread_continuation): 2480 contents = post_thread_continuation.get('contents') 2481 if not isinstance(contents, list): 2482 return 2483 for content in contents: 2484 renderer = content.get('backstagePostThreadRenderer') 2485 if not isinstance(renderer, dict): 2486 continue 2487 for entry in self._post_thread_entries(renderer): 2488 yield entry 2489 2490 def _rich_grid_entries(self, contents): 2491 for content in contents: 2492 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict) 2493 if video_renderer: 2494 entry = self._video_entry(video_renderer) 2495 if entry: 2496 yield entry 2497 2498 @staticmethod 2499 def _build_continuation_query(continuation, ctp=None): 2500 query = { 2501 'ctoken': continuation, 2502 'continuation': continuation, 2503 } 2504 if ctp: 2505 query['itct'] = ctp 2506 return query 2507 2508 @staticmethod 2509 def _extract_next_continuation_data(renderer): 2510 next_continuation = try_get( 2511 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict) 2512 if not next_continuation: 2513 return 2514 continuation = next_continuation.get('continuation') 2515 if not continuation: 2516 return 2517 ctp = next_continuation.get('clickTrackingParams') 2518 return YoutubeTabIE._build_continuation_query(continuation, ctp) 2519 2520 @classmethod 2521 def _extract_continuation(cls, renderer): 2522 next_continuation = cls._extract_next_continuation_data(renderer) 2523 if next_continuation: 2524 return next_continuation 2525 contents = [] 2526 for key in ('contents', 'items'): 2527 contents.extend(try_get(renderer, lambda x: x[key], list) or []) 2528 for content in contents: 2529 if not isinstance(content, dict): 2530 continue 2531 continuation_ep = try_get( 2532 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'], 2533 dict) 2534 if not continuation_ep: 2535 continue 2536 continuation = try_get( 2537 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str) 2538 if not continuation: 2539 continue 2540 ctp = continuation_ep.get('clickTrackingParams') 2541 return YoutubeTabIE._build_continuation_query(continuation, ctp) 2542 2543 def _entries(self, tab, item_id, webpage): 2544 tab_content = try_get(tab, lambda x: x['content'], dict) 2545 if not tab_content: 2546 return 2547 slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict) 2548 if slr_renderer: 2549 is_channels_tab = tab.get('title') == 'Channels' 2550 continuation = None 2551 slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or [] 2552 for slr_content in slr_contents: 2553 if not isinstance(slr_content, dict): 2554 continue 2555 is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict) 2556 if not is_renderer: 2557 continue 2558 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] 2559 for isr_content in isr_contents: 2560 if not isinstance(isr_content, dict): 2561 continue 2562 renderer = isr_content.get('playlistVideoListRenderer') 2563 if renderer: 2564 for entry in self._playlist_entries(renderer): 2565 yield entry 2566 continuation = self._extract_continuation(renderer) 2567 continue 2568 renderer = isr_content.get('gridRenderer') 2569 if renderer: 2570 for entry in self._grid_entries(renderer): 2571 yield entry 2572 continuation = self._extract_continuation(renderer) 2573 continue 2574 renderer = isr_content.get('shelfRenderer') 2575 if renderer: 2576 for entry in self._shelf_entries(renderer, not is_channels_tab): 2577 yield entry 2578 continue 2579 renderer = isr_content.get('backstagePostThreadRenderer') 2580 if renderer: 2581 for entry in self._post_thread_entries(renderer): 2582 yield entry 2583 continuation = self._extract_continuation(renderer) 2584 continue 2585 renderer = isr_content.get('videoRenderer') 2586 if renderer: 2587 entry = self._video_entry(renderer) 2588 if entry: 2589 yield entry 2590 2591 if not continuation: 2592 continuation = self._extract_continuation(is_renderer) 2593 if not continuation: 2594 continuation = self._extract_continuation(slr_renderer) 2595 else: 2596 rich_grid_renderer = tab_content.get('richGridRenderer') 2597 if not rich_grid_renderer: 2598 return 2599 for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []): 2600 yield entry 2601 continuation = self._extract_continuation(rich_grid_renderer) 2602 2603 ytcfg = self._extract_ytcfg(item_id, webpage) 2604 client_version = try_get( 2605 ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00' 2606 2607 headers = { 2608 'x-youtube-client-name': '1', 2609 'x-youtube-client-version': client_version, 2610 'content-type': 'application/json', 2611 } 2612 2613 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or { 2614 'client': { 2615 'clientName': 'WEB', 2616 'clientVersion': client_version, 2617 } 2618 } 2619 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str) 2620 2621 identity_token = self._extract_identity_token(ytcfg, webpage) 2622 if identity_token: 2623 headers['x-youtube-identity-token'] = identity_token 2624 2625 data = { 2626 'context': context, 2627 } 2628 2629 for page_num in itertools.count(1): 2630 if not continuation: 2631 break 2632 if visitor_data: 2633 headers['x-goog-visitor-id'] = visitor_data 2634 data['continuation'] = continuation['continuation'] 2635 data['clickTracking'] = { 2636 'clickTrackingParams': continuation['itct'] 2637 } 2638 count = 0 2639 retries = 3 2640 while count <= retries: 2641 try: 2642 # Downloading page may result in intermittent 5xx HTTP error 2643 # that is usually worked around with a retry 2644 response = self._download_json( 2645 'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 2646 None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''), 2647 headers=headers, data=json.dumps(data).encode('utf8')) 2648 break 2649 except ExtractorError as e: 2650 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): 2651 count += 1 2652 if count <= retries: 2653 continue 2654 raise 2655 if not response: 2656 break 2657 2658 visitor_data = try_get( 2659 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data 2660 2661 continuation_contents = try_get( 2662 response, lambda x: x['continuationContents'], dict) 2663 if continuation_contents: 2664 continuation_renderer = continuation_contents.get('playlistVideoListContinuation') 2665 if continuation_renderer: 2666 for entry in self._playlist_entries(continuation_renderer): 2667 yield entry 2668 continuation = self._extract_continuation(continuation_renderer) 2669 continue 2670 continuation_renderer = continuation_contents.get('gridContinuation') 2671 if continuation_renderer: 2672 for entry in self._grid_entries(continuation_renderer): 2673 yield entry 2674 continuation = self._extract_continuation(continuation_renderer) 2675 continue 2676 continuation_renderer = continuation_contents.get('itemSectionContinuation') 2677 if continuation_renderer: 2678 for entry in self._post_thread_continuation_entries(continuation_renderer): 2679 yield entry 2680 continuation = self._extract_continuation(continuation_renderer) 2681 continue 2682 2683 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints')) 2684 continuation_items = try_get( 2685 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list) 2686 if continuation_items: 2687 continuation_item = continuation_items[0] 2688 if not isinstance(continuation_item, dict): 2689 continue 2690 renderer = self._extract_grid_item_renderer(continuation_item) 2691 if renderer: 2692 grid_renderer = {'items': continuation_items} 2693 for entry in self._grid_entries(grid_renderer): 2694 yield entry 2695 continuation = self._extract_continuation(grid_renderer) 2696 continue 2697 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer') 2698 if renderer: 2699 video_list_renderer = {'contents': continuation_items} 2700 for entry in self._playlist_entries(video_list_renderer): 2701 yield entry 2702 continuation = self._extract_continuation(video_list_renderer) 2703 continue 2704 renderer = continuation_item.get('backstagePostThreadRenderer') 2705 if renderer: 2706 continuation_renderer = {'contents': continuation_items} 2707 for entry in self._post_thread_continuation_entries(continuation_renderer): 2708 yield entry 2709 continuation = self._extract_continuation(continuation_renderer) 2710 continue 2711 renderer = continuation_item.get('richItemRenderer') 2712 if renderer: 2713 for entry in self._rich_grid_entries(continuation_items): 2714 yield entry 2715 continuation = self._extract_continuation({'contents': continuation_items}) 2716 continue 2717 2718 break 2719 2720 @staticmethod 2721 def _extract_selected_tab(tabs): 2722 for tab in tabs: 2723 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool): 2724 return tab['tabRenderer'] 2725 else: 2726 raise ExtractorError('Unable to find selected tab') 2727 2728 @staticmethod 2729 def _extract_uploader(data): 2730 uploader = {} 2731 sidebar_renderer = try_get( 2732 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) 2733 if sidebar_renderer: 2734 for item in sidebar_renderer: 2735 if not isinstance(item, dict): 2736 continue 2737 renderer = item.get('playlistSidebarSecondaryInfoRenderer') 2738 if not isinstance(renderer, dict): 2739 continue 2740 owner = try_get( 2741 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) 2742 if owner: 2743 uploader['uploader'] = owner.get('text') 2744 uploader['uploader_id'] = try_get( 2745 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) 2746 uploader['uploader_url'] = urljoin( 2747 'https://www.youtube.com/', 2748 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) 2749 return uploader 2750 2751 @staticmethod 2752 def _extract_alert(data): 2753 alerts = [] 2754 for alert in try_get(data, lambda x: x['alerts'], list) or []: 2755 if not isinstance(alert, dict): 2756 continue 2757 alert_text = try_get( 2758 alert, lambda x: x['alertRenderer']['text'], dict) 2759 if not alert_text: 2760 continue 2761 text = try_get( 2762 alert_text, 2763 (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']), 2764 compat_str) 2765 if text: 2766 alerts.append(text) 2767 return '\n'.join(alerts) 2768 2769 def _extract_from_tabs(self, item_id, webpage, data, tabs): 2770 selected_tab = self._extract_selected_tab(tabs) 2771 renderer = try_get( 2772 data, lambda x: x['metadata']['channelMetadataRenderer'], dict) 2773 playlist_id = item_id 2774 title = description = None 2775 if renderer: 2776 channel_title = renderer.get('title') or item_id 2777 tab_title = selected_tab.get('title') 2778 title = channel_title or item_id 2779 if tab_title: 2780 title += ' - %s' % tab_title 2781 description = renderer.get('description') 2782 playlist_id = renderer.get('externalId') 2783 else: 2784 renderer = try_get( 2785 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) 2786 if renderer: 2787 title = renderer.get('title') 2788 else: 2789 renderer = try_get( 2790 data, lambda x: x['header']['hashtagHeaderRenderer'], dict) 2791 if renderer: 2792 title = try_get(renderer, lambda x: x['hashtag']['simpleText']) 2793 playlist = self.playlist_result( 2794 self._entries(selected_tab, item_id, webpage), 2795 playlist_id=playlist_id, playlist_title=title, 2796 playlist_description=description) 2797 playlist.update(self._extract_uploader(data)) 2798 return playlist 2799 2800 def _extract_from_playlist(self, item_id, url, data, playlist): 2801 title = playlist.get('title') or try_get( 2802 data, lambda x: x['titleText']['simpleText'], compat_str) 2803 playlist_id = playlist.get('playlistId') or item_id 2804 # Inline playlist rendition continuation does not always work 2805 # at Youtube side, so delegating regular tab-based playlist URL 2806 # processing whenever possible. 2807 playlist_url = urljoin(url, try_get( 2808 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], 2809 compat_str)) 2810 if playlist_url and playlist_url != url: 2811 return self.url_result( 2812 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, 2813 video_title=title) 2814 return self.playlist_result( 2815 self._playlist_entries(playlist), playlist_id=playlist_id, 2816 playlist_title=title) 2817 2818 def _extract_identity_token(self, ytcfg, webpage): 2819 if ytcfg: 2820 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str) 2821 if token: 2822 return token 2823 return self._search_regex( 2824 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, 2825 'identity token', default=None) 2826 2827 def _real_extract(self, url): 2828 item_id = self._match_id(url) 2829 url = compat_urlparse.urlunparse( 2830 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com')) 2831 # Handle both video/playlist URLs 2832 qs = parse_qs(url) 2833 video_id = qs.get('v', [None])[0] 2834 playlist_id = qs.get('list', [None])[0] 2835 if video_id and playlist_id: 2836 if self._downloader.params.get('noplaylist'): 2837 self.to_screen('Downloading just video %s because of --no-playlist' % video_id) 2838 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) 2839 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) 2840 webpage = self._download_webpage(url, item_id) 2841 data = self._extract_yt_initial_data(item_id, webpage) 2842 tabs = try_get( 2843 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) 2844 if tabs: 2845 return self._extract_from_tabs(item_id, webpage, data, tabs) 2846 playlist = try_get( 2847 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) 2848 if playlist: 2849 return self._extract_from_playlist(item_id, url, data, playlist) 2850 # Fallback to video extraction if no playlist alike page is recognized. 2851 # First check for the current video then try the v attribute of URL query. 2852 video_id = try_get( 2853 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'], 2854 compat_str) or video_id 2855 if video_id: 2856 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) 2857 # Capture and output alerts 2858 alert = self._extract_alert(data) 2859 if alert: 2860 raise ExtractorError(alert, expected=True) 2861 # Failed to recognize 2862 raise ExtractorError('Unable to recognize tab page') 2863 2864 2865 class YoutubePlaylistIE(InfoExtractor): 2866 IE_DESC = 'YouTube.com playlists' 2867 _VALID_URL = r'''(?x)(?: 2868 (?:https?://)? 2869 (?:\w+\.)? 2870 (?: 2871 (?: 2872 youtube(?:kids)?\.com| 2873 invidio\.us 2874 ) 2875 /.*?\?.*?\blist= 2876 )? 2877 (?P<id>%(playlist_id)s) 2878 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} 2879 IE_NAME = 'youtube:playlist' 2880 _TESTS = [{ 2881 'note': 'issue #673', 2882 'url': 'PLBB231211A4F62143', 2883 'info_dict': { 2884 'title': '[OLD]Team Fortress 2 (Class-based LP)', 2885 'id': 'PLBB231211A4F62143', 2886 'uploader': 'Wickydoo', 2887 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q', 2888 }, 2889 'playlist_mincount': 29, 2890 }, { 2891 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 2892 'info_dict': { 2893 'title': 'YDL_safe_search', 2894 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 2895 }, 2896 'playlist_count': 2, 2897 'skip': 'This playlist is private', 2898 }, { 2899 'note': 'embedded', 2900 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 2901 'playlist_count': 4, 2902 'info_dict': { 2903 'title': 'JODA15', 2904 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 2905 'uploader': 'milan', 2906 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw', 2907 } 2908 }, { 2909 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 2910 'playlist_mincount': 982, 2911 'info_dict': { 2912 'title': '2018 Chinese New Singles (11/6 updated)', 2913 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 2914 'uploader': 'LBK', 2915 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA', 2916 } 2917 }, { 2918 'url': 'TLGGrESM50VT6acwMjAyMjAxNw', 2919 'only_matching': True, 2920 }, { 2921 # music album playlist 2922 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM', 2923 'only_matching': True, 2924 }] 2925 2926 @classmethod 2927 def suitable(cls, url): 2928 if YoutubeTabIE.suitable(url): 2929 return False 2930 # Hack for lazy extractors until more generic solution is implemented 2931 # (see #28780) 2932 from .youtube import parse_qs 2933 qs = parse_qs(url) 2934 if qs.get('v', [None])[0]: 2935 return False 2936 return super(YoutubePlaylistIE, cls).suitable(url) 2937 2938 def _real_extract(self, url): 2939 playlist_id = self._match_id(url) 2940 qs = parse_qs(url) 2941 if not qs: 2942 qs = {'list': playlist_id} 2943 return self.url_result( 2944 update_url_query('https://www.youtube.com/playlist', qs), 2945 ie=YoutubeTabIE.ie_key(), video_id=playlist_id) 2946 2947 2948 class YoutubeYtBeIE(InfoExtractor): 2949 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} 2950 _TESTS = [{ 2951 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5', 2952 'info_dict': { 2953 'id': 'yeWKywCrFtk', 2954 'ext': 'mp4', 2955 'title': 'Small Scale Baler and Braiding Rugs', 2956 'uploader': 'Backus-Page House Museum', 2957 'uploader_id': 'backuspagemuseum', 2958 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum', 2959 'upload_date': '20161008', 2960 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a', 2961 'categories': ['Nonprofits & Activism'], 2962 'tags': list, 2963 'like_count': int, 2964 'dislike_count': int, 2965 }, 2966 'params': { 2967 'noplaylist': True, 2968 'skip_download': True, 2969 }, 2970 }, { 2971 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', 2972 'only_matching': True, 2973 }] 2974 2975 def _real_extract(self, url): 2976 mobj = re.match(self._VALID_URL, url) 2977 video_id = mobj.group('id') 2978 playlist_id = mobj.group('playlist_id') 2979 return self.url_result( 2980 update_url_query('https://www.youtube.com/watch', { 2981 'v': video_id, 2982 'list': playlist_id, 2983 'feature': 'youtu.be', 2984 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id) 2985 2986 2987 class YoutubeYtUserIE(InfoExtractor): 2988 _VALID_URL = r'ytuser:(?P<id>.+)' 2989 _TESTS = [{ 2990 'url': 'ytuser:phihag', 2991 'only_matching': True, 2992 }] 2993 2994 def _real_extract(self, url): 2995 user_id = self._match_id(url) 2996 return self.url_result( 2997 'https://www.youtube.com/user/%s' % user_id, 2998 ie=YoutubeTabIE.ie_key(), video_id=user_id) 2999 3000 3001 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): 3002 IE_NAME = 'youtube:favorites' 3003 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)' 3004 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?' 3005 _LOGIN_REQUIRED = True 3006 _TESTS = [{ 3007 'url': ':ytfav', 3008 'only_matching': True, 3009 }, { 3010 'url': ':ytfavorites', 3011 'only_matching': True, 3012 }] 3013 3014 def _real_extract(self, url): 3015 return self.url_result( 3016 'https://www.youtube.com/playlist?list=LL', 3017 ie=YoutubeTabIE.ie_key()) 3018 3019 3020 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): 3021 IE_DESC = 'YouTube.com searches' 3022 # there doesn't appear to be a real limit, for example if you search for 3023 # 'python' you get more than 8.000.000 results 3024 _MAX_RESULTS = float('inf') 3025 IE_NAME = 'youtube:search' 3026 _SEARCH_KEY = 'ytsearch' 3027 _SEARCH_PARAMS = None 3028 _TESTS = [] 3029 3030 def _entries(self, query, n): 3031 data = { 3032 'context': { 3033 'client': { 3034 'clientName': 'WEB', 3035 'clientVersion': '2.20201021.03.00', 3036 } 3037 }, 3038 'query': query, 3039 } 3040 if self._SEARCH_PARAMS: 3041 data['params'] = self._SEARCH_PARAMS 3042 total = 0 3043 for page_num in itertools.count(1): 3044 search = self._download_json( 3045 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 3046 video_id='query "%s"' % query, 3047 note='Downloading page %s' % page_num, 3048 errnote='Unable to download API page', fatal=False, 3049 data=json.dumps(data).encode('utf8'), 3050 headers={'content-type': 'application/json'}) 3051 if not search: 3052 break 3053 slr_contents = try_get( 3054 search, 3055 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], 3056 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), 3057 list) 3058 if not slr_contents: 3059 break 3060 for slr_content in slr_contents: 3061 isr_contents = try_get( 3062 slr_content, 3063 lambda x: x['itemSectionRenderer']['contents'], 3064 list) 3065 if not isr_contents: 3066 continue 3067 for content in isr_contents: 3068 if not isinstance(content, dict): 3069 continue 3070 video = content.get('videoRenderer') 3071 if not isinstance(video, dict): 3072 continue 3073 video_id = video.get('videoId') 3074 if not video_id: 3075 continue 3076 yield self._extract_video(video) 3077 total += 1 3078 if total == n: 3079 return 3080 token = try_get( 3081 slr_contents, 3082 lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], 3083 compat_str) 3084 if not token: 3085 break 3086 data['continuation'] = token 3087 3088 def _get_n_results(self, query, n): 3089 """Get a specified number of results for a query""" 3090 return self.playlist_result(self._entries(query, n), query) 3091 3092 3093 class YoutubeSearchDateIE(YoutubeSearchIE): 3094 IE_NAME = YoutubeSearchIE.IE_NAME + ':date' 3095 _SEARCH_KEY = 'ytsearchdate' 3096 IE_DESC = 'YouTube.com searches, newest videos first' 3097 _SEARCH_PARAMS = 'CAI%3D' 3098 3099 3100 r""" 3101 class YoutubeSearchURLIE(YoutubeSearchIE): 3102 IE_DESC = 'YouTube.com search URLs' 3103 IE_NAME = 'youtube:search_url' 3104 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)' 3105 _TESTS = [{ 3106 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 3107 'playlist_mincount': 5, 3108 'info_dict': { 3109 'title': 'youtube-dl test video', 3110 } 3111 }, { 3112 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 3113 'only_matching': True, 3114 }] 3115 3116 def _real_extract(self, url): 3117 mobj = re.match(self._VALID_URL, url) 3118 query = compat_urllib_parse_unquote_plus(mobj.group('query')) 3119 webpage = self._download_webpage(url, query) 3120 return self.playlist_result(self._process_page(webpage), playlist_title=query) 3121 """ 3122 3123 3124 class YoutubeFeedsInfoExtractor(YoutubeTabIE): 3125 """ 3126 Base class for feed extractors 3127 Subclasses must define the _FEED_NAME property. 3128 """ 3129 _LOGIN_REQUIRED = True 3130 3131 @property 3132 def IE_NAME(self): 3133 return 'youtube:%s' % self._FEED_NAME 3134 3135 def _real_initialize(self): 3136 self._login() 3137 3138 def _real_extract(self, url): 3139 return self.url_result( 3140 'https://www.youtube.com/feed/%s' % self._FEED_NAME, 3141 ie=YoutubeTabIE.ie_key()) 3142 3143 3144 class YoutubeWatchLaterIE(InfoExtractor): 3145 IE_NAME = 'youtube:watchlater' 3146 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' 3147 _VALID_URL = r':ytwatchlater' 3148 _TESTS = [{ 3149 'url': ':ytwatchlater', 3150 'only_matching': True, 3151 }] 3152 3153 def _real_extract(self, url): 3154 return self.url_result( 3155 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key()) 3156 3157 3158 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): 3159 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' 3160 _VALID_URL = r':ytrec(?:ommended)?' 3161 _FEED_NAME = 'recommended' 3162 _TESTS = [{ 3163 'url': ':ytrec', 3164 'only_matching': True, 3165 }, { 3166 'url': ':ytrecommended', 3167 'only_matching': True, 3168 }] 3169 3170 3171 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): 3172 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' 3173 _VALID_URL = r':ytsubs(?:criptions)?' 3174 _FEED_NAME = 'subscriptions' 3175 _TESTS = [{ 3176 'url': ':ytsubs', 3177 'only_matching': True, 3178 }, { 3179 'url': ':ytsubscriptions', 3180 'only_matching': True, 3181 }] 3182 3183 3184 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): 3185 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' 3186 _VALID_URL = r':ythistory' 3187 _FEED_NAME = 'history' 3188 _TESTS = [{ 3189 'url': ':ythistory', 3190 'only_matching': True, 3191 }] 3192 3193 3194 class YoutubeTruncatedURLIE(InfoExtractor): 3195 IE_NAME = 'youtube:truncated_url' 3196 IE_DESC = False # Do not list 3197 _VALID_URL = r'''(?x) 3198 (?:https?://)? 3199 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/ 3200 (?:watch\?(?: 3201 feature=[a-z_]+| 3202 annotation_id=annotation_[^&]+| 3203 x-yt-cl=[0-9]+| 3204 hl=[^&]*| 3205 t=[0-9]+ 3206 )? 3207 | 3208 attribution_link\?a=[^&]+ 3209 ) 3210 $ 3211 ''' 3212 3213 _TESTS = [{ 3214 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041', 3215 'only_matching': True, 3216 }, { 3217 'url': 'https://www.youtube.com/watch?', 3218 'only_matching': True, 3219 }, { 3220 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534', 3221 'only_matching': True, 3222 }, { 3223 'url': 'https://www.youtube.com/watch?feature=foo', 3224 'only_matching': True, 3225 }, { 3226 'url': 'https://www.youtube.com/watch?hl=en-GB', 3227 'only_matching': True, 3228 }, { 3229 'url': 'https://www.youtube.com/watch?t=2372', 3230 'only_matching': True, 3231 }] 3232 3233 def _real_extract(self, url): 3234 raise ExtractorError( 3235 'Did you forget to quote the URL? Remember that & is a meta ' 3236 'character in most shells, so you want to put the URL in quotes, ' 3237 'like youtube-dl ' 3238 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' 3239 ' or simply youtube-dl BaW_jenozKc .', 3240 expected=True) 3241 3242 3243 class YoutubeTruncatedIDIE(InfoExtractor): 3244 IE_NAME = 'youtube:truncated_id' 3245 IE_DESC = False # Do not list 3246 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$' 3247 3248 _TESTS = [{ 3249 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob', 3250 'only_matching': True, 3251 }] 3252 3253 def _real_extract(self, url): 3254 video_id = self._match_id(url) 3255 raise ExtractorError( 3256 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url), 3257 expected=True)