youtube.py - youtube-dl - Another place where youtube-dl lives on

youtube.py (142971B)
      1 # coding: utf-8
      2 
      3 from __future__ import unicode_literals
      4 
      5 import itertools
      6 import json
      7 import os.path
      8 import random
      9 import re
     10 import traceback
     11 
     12 from .common import InfoExtractor, SearchInfoExtractor
     13 from ..compat import (
     14     compat_chr,
     15     compat_HTTPError,
     16     compat_parse_qs,
     17     compat_str,
     18     compat_urllib_parse_unquote_plus,
     19     compat_urllib_parse_urlencode,
     20     compat_urllib_parse_urlparse,
     21     compat_urlparse,
     22 )
     23 from ..jsinterp import JSInterpreter
     24 from ..utils import (
     25     ExtractorError,
     26     clean_html,
     27     dict_get,
     28     float_or_none,
     29     int_or_none,
     30     mimetype2ext,
     31     parse_codecs,
     32     parse_duration,
     33     qualities,
     34     remove_start,
     35     smuggle_url,
     36     str_or_none,
     37     str_to_int,
     38     try_get,
     39     unescapeHTML,
     40     unified_strdate,
     41     unsmuggle_url,
     42     update_url_query,
     43     url_or_none,
     44     urlencode_postdata,
     45     urljoin,
     46 )
     47 
     48 
     49 def parse_qs(url):
     50     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
     51 
     52 
     53 class YoutubeBaseInfoExtractor(InfoExtractor):
     54     """Provide base functions for Youtube extractors"""
     55     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
     56     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
     57 
     58     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
     59     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
     60     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
     61 
     62     _NETRC_MACHINE = 'youtube'
     63     # If True it will raise an error if no login info is provided
     64     _LOGIN_REQUIRED = False
     65 
     66     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
     67 
     68     def _login(self):
     69         """
     70         Attempt to log in to YouTube.
     71         True is returned if successful or skipped.
     72         False is returned if login failed.
     73 
     74         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
     75         """
     76         username, password = self._get_login_info()
     77         # No authentication to be performed
     78         if username is None:
     79             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
     80                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
     81             return True
     82 
     83         login_page = self._download_webpage(
     84             self._LOGIN_URL, None,
     85             note='Downloading login page',
     86             errnote='unable to fetch login page', fatal=False)
     87         if login_page is False:
     88             return
     89 
     90         login_form = self._hidden_inputs(login_page)
     91 
     92         def req(url, f_req, note, errnote):
     93             data = login_form.copy()
     94             data.update({
     95                 'pstMsg': 1,
     96                 'checkConnection': 'youtube',
     97                 'checkedDomains': 'youtube',
     98                 'hl': 'en',
     99                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
    100                 'f.req': json.dumps(f_req),
    101                 'flowName': 'GlifWebSignIn',
    102                 'flowEntry': 'ServiceLogin',
    103                 # TODO: reverse actual botguard identifier generation algo
    104                 'bgRequest': '["identifier",""]',
    105             })
    106             return self._download_json(
    107                 url, None, note=note, errnote=errnote,
    108                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
    109                 fatal=False,
    110                 data=urlencode_postdata(data), headers={
    111                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
    112                     'Google-Accounts-XSRF': 1,
    113                 })
    114 
    115         def warn(message):
    116             self._downloader.report_warning(message)
    117 
    118         lookup_req = [
    119             username,
    120             None, [], None, 'US', None, None, 2, False, True,
    121             [
    122                 None, None,
    123                 [2, 1, None, 1,
    124                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
    125                  None, [], 4],
    126                 1, [None, None, []], None, None, None, True
    127             ],
    128             username,
    129         ]
    130 
    131         lookup_results = req(
    132             self._LOOKUP_URL, lookup_req,
    133             'Looking up account info', 'Unable to look up account info')
    134 
    135         if lookup_results is False:
    136             return False
    137 
    138         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
    139         if not user_hash:
    140             warn('Unable to extract user hash')
    141             return False
    142 
    143         challenge_req = [
    144             user_hash,
    145             None, 1, None, [1, None, None, None, [password, None, True]],
    146             [
    147                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
    148                 1, [None, None, []], None, None, None, True
    149             ]]
    150 
    151         challenge_results = req(
    152             self._CHALLENGE_URL, challenge_req,
    153             'Logging in', 'Unable to log in')
    154 
    155         if challenge_results is False:
    156             return
    157 
    158         login_res = try_get(challenge_results, lambda x: x[0][5], list)
    159         if login_res:
    160             login_msg = try_get(login_res, lambda x: x[5], compat_str)
    161             warn(
    162                 'Unable to login: %s' % 'Invalid password'
    163                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
    164             return False
    165 
    166         res = try_get(challenge_results, lambda x: x[0][-1], list)
    167         if not res:
    168             warn('Unable to extract result entry')
    169             return False
    170 
    171         login_challenge = try_get(res, lambda x: x[0][0], list)
    172         if login_challenge:
    173             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
    174             if challenge_str == 'TWO_STEP_VERIFICATION':
    175                 # SEND_SUCCESS - TFA code has been successfully sent to phone
    176                 # QUOTA_EXCEEDED - reached the limit of TFA codes
    177                 status = try_get(login_challenge, lambda x: x[5], compat_str)
    178                 if status == 'QUOTA_EXCEEDED':
    179                     warn('Exceeded the limit of TFA codes, try later')
    180                     return False
    181 
    182                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
    183                 if not tl:
    184                     warn('Unable to extract TL')
    185                     return False
    186 
    187                 tfa_code = self._get_tfa_info('2-step verification code')
    188 
    189                 if not tfa_code:
    190                     warn(
    191                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
    192                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
    193                     return False
    194 
    195                 tfa_code = remove_start(tfa_code, 'G-')
    196 
    197                 tfa_req = [
    198                     user_hash, None, 2, None,
    199                     [
    200                         9, None, None, None, None, None, None, None,
    201                         [None, tfa_code, True, 2]
    202                     ]]
    203 
    204                 tfa_results = req(
    205                     self._TFA_URL.format(tl), tfa_req,
    206                     'Submitting TFA code', 'Unable to submit TFA code')
    207 
    208                 if tfa_results is False:
    209                     return False
    210 
    211                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
    212                 if tfa_res:
    213                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
    214                     warn(
    215                         'Unable to finish TFA: %s' % 'Invalid TFA code'
    216                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
    217                     return False
    218 
    219                 check_cookie_url = try_get(
    220                     tfa_results, lambda x: x[0][-1][2], compat_str)
    221             else:
    222                 CHALLENGES = {
    223                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
    224                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
    225                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
    226                 }
    227                 challenge = CHALLENGES.get(
    228                     challenge_str,
    229                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
    230                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
    231                 return False
    232         else:
    233             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
    234 
    235         if not check_cookie_url:
    236             warn('Unable to extract CheckCookie URL')
    237             return False
    238 
    239         check_cookie_results = self._download_webpage(
    240             check_cookie_url, None, 'Checking cookie', fatal=False)
    241 
    242         if check_cookie_results is False:
    243             return False
    244 
    245         if 'https://myaccount.google.com/' not in check_cookie_results:
    246             warn('Unable to log in')
    247             return False
    248 
    249         return True
    250 
    251     def _initialize_consent(self):
    252         cookies = self._get_cookies('https://www.youtube.com/')
    253         if cookies.get('__Secure-3PSID'):
    254             return
    255         consent_id = None
    256         consent = cookies.get('CONSENT')
    257         if consent:
    258             if 'YES' in consent.value:
    259                 return
    260             consent_id = self._search_regex(
    261                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
    262         if not consent_id:
    263             consent_id = random.randint(100, 999)
    264         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
    265 
    266     def _real_initialize(self):
    267         self._initialize_consent()
    268         if self._downloader is None:
    269             return
    270         if not self._login():
    271             return
    272 
    273     _DEFAULT_API_DATA = {
    274         'context': {
    275             'client': {
    276                 'clientName': 'WEB',
    277                 'clientVersion': '2.20201021.03.00',
    278             }
    279         },
    280     }
    281 
    282     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
    283     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
    284     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
    285 
    286     def _call_api(self, ep, query, video_id, fatal=True):
    287         data = self._DEFAULT_API_DATA.copy()
    288         data.update(query)
    289 
    290         return self._download_json(
    291             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
    292             note='Downloading API JSON', errnote='Unable to download API page',
    293             data=json.dumps(data).encode('utf8'), fatal=fatal,
    294             headers={'content-type': 'application/json'},
    295             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
    296 
    297     def _extract_yt_initial_data(self, video_id, webpage):
    298         return self._parse_json(
    299             self._search_regex(
    300                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
    301                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
    302             video_id)
    303 
    304     def _extract_ytcfg(self, video_id, webpage):
    305         return self._parse_json(
    306             self._search_regex(
    307                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
    308                 default='{}'), video_id, fatal=False) or {}
    309 
    310     def _extract_video(self, renderer):
    311         video_id = renderer['videoId']
    312         title = try_get(
    313             renderer,
    314             (lambda x: x['title']['runs'][0]['text'],
    315              lambda x: x['title']['simpleText']), compat_str)
    316         description = try_get(
    317             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
    318             compat_str)
    319         duration = parse_duration(try_get(
    320             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
    321         view_count_text = try_get(
    322             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
    323         view_count = str_to_int(self._search_regex(
    324             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
    325             'view count', default=None))
    326         uploader = try_get(
    327             renderer,
    328             (lambda x: x['ownerText']['runs'][0]['text'],
    329              lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
    330         return {
    331             '_type': 'url',
    332             'ie_key': YoutubeIE.ie_key(),
    333             'id': video_id,
    334             'url': video_id,
    335             'title': title,
    336             'description': description,
    337             'duration': duration,
    338             'view_count': view_count,
    339             'uploader': uploader,
    340         }
    341 
    342 
    343 class YoutubeIE(YoutubeBaseInfoExtractor):
    344     IE_DESC = 'YouTube.com'
    345     _INVIDIOUS_SITES = (
    346         # invidious-redirect websites
    347         r'(?:www\.)?redirect\.invidious\.io',
    348         r'(?:(?:www|dev)\.)?invidio\.us',
    349         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
    350         r'(?:(?:www|no)\.)?invidiou\.sh',
    351         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
    352         r'(?:www\.)?invidious\.kabi\.tk',
    353         r'(?:www\.)?invidious\.13ad\.de',
    354         r'(?:www\.)?invidious\.mastodon\.host',
    355         r'(?:www\.)?invidious\.zapashcanon\.fr',
    356         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
    357         r'(?:www\.)?invidious\.tinfoil-hat\.net',
    358         r'(?:www\.)?invidious\.himiko\.cloud',
    359         r'(?:www\.)?invidious\.reallyancient\.tech',
    360         r'(?:www\.)?invidious\.tube',
    361         r'(?:www\.)?invidiou\.site',
    362         r'(?:www\.)?invidious\.site',
    363         r'(?:www\.)?invidious\.xyz',
    364         r'(?:www\.)?invidious\.nixnet\.xyz',
    365         r'(?:www\.)?invidious\.048596\.xyz',
    366         r'(?:www\.)?invidious\.drycat\.fr',
    367         r'(?:www\.)?inv\.skyn3t\.in',
    368         r'(?:www\.)?tube\.poal\.co',
    369         r'(?:www\.)?tube\.connect\.cafe',
    370         r'(?:www\.)?vid\.wxzm\.sx',
    371         r'(?:www\.)?vid\.mint\.lgbt',
    372         r'(?:www\.)?vid\.puffyan\.us',
    373         r'(?:www\.)?yewtu\.be',
    374         r'(?:www\.)?yt\.elukerio\.org',
    375         r'(?:www\.)?yt\.lelux\.fi',
    376         r'(?:www\.)?invidious\.ggc-project\.de',
    377         r'(?:www\.)?yt\.maisputain\.ovh',
    378         r'(?:www\.)?ytprivate\.com',
    379         r'(?:www\.)?invidious\.13ad\.de',
    380         r'(?:www\.)?invidious\.toot\.koeln',
    381         r'(?:www\.)?invidious\.fdn\.fr',
    382         r'(?:www\.)?watch\.nettohikari\.com',
    383         r'(?:www\.)?invidious\.namazso\.eu',
    384         r'(?:www\.)?invidious\.silkky\.cloud',
    385         r'(?:www\.)?invidious\.exonip\.de',
    386         r'(?:www\.)?invidious\.riverside\.rocks',
    387         r'(?:www\.)?invidious\.blamefran\.net',
    388         r'(?:www\.)?invidious\.moomoo\.de',
    389         r'(?:www\.)?ytb\.trom\.tf',
    390         r'(?:www\.)?yt\.cyberhost\.uk',
    391         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
    392         r'(?:www\.)?qklhadlycap4cnod\.onion',
    393         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
    394         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
    395         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
    396         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
    397         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
    398         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
    399         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
    400         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
    401         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
    402         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
    403     )
    404     _VALID_URL = r"""(?x)^
    405                      (
    406                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
    407                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
    408                             (?:www\.)?deturl\.com/www\.youtube\.com|
    409                             (?:www\.)?pwnyoutube\.com|
    410                             (?:www\.)?hooktube\.com|
    411                             (?:www\.)?yourepeat\.com|
    412                             tube\.majestyc\.net|
    413                             %(invidious)s|
    414                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
    415                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
    416                          (?:                                                  # the various things that can precede the ID:
    417                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
    418                              |(?:                                             # or the v= param in all its forms
    419                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
    420                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
    421                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
    422                                  v=
    423                              )
    424                          ))
    425                          |(?:
    426                             youtu\.be|                                        # just youtu.be/xxxx
    427                             vid\.plus|                                        # or vid.plus/xxxx
    428                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
    429                             %(invidious)s
    430                          )/
    431                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
    432                          )
    433                      )?                                                       # all until now is optional -> you can pass the naked ID
    434                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
    435                      (?(1).+)?                                                # if we found the ID, everything can follow
    436                      $""" % {
    437         'invidious': '|'.join(_INVIDIOUS_SITES),
    438     }
    439     _PLAYER_INFO_RE = (
    440         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
    441         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
    442         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
    443     )
    444     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
    445 
    446     _GEO_BYPASS = False
    447 
    448     IE_NAME = 'youtube'
    449     _TESTS = [
    450         {
    451             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
    452             'info_dict': {
    453                 'id': 'BaW_jenozKc',
    454                 'ext': 'mp4',
    455                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
    456                 'uploader': 'Philipp Hagemeister',
    457                 'uploader_id': 'phihag',
    458                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
    459                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
    460                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
    461                 'upload_date': '20121002',
    462                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
    463                 'categories': ['Science & Technology'],
    464                 'tags': ['youtube-dl'],
    465                 'duration': 10,
    466                 'view_count': int,
    467                 'like_count': int,
    468                 'dislike_count': int,
    469                 'start_time': 1,
    470                 'end_time': 9,
    471             }
    472         },
    473         {
    474             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
    475             'note': 'Embed-only video (#1746)',
    476             'info_dict': {
    477                 'id': 'yZIXLfi8CZQ',
    478                 'ext': 'mp4',
    479                 'upload_date': '20120608',
    480                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
    481                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
    482                 'uploader': 'SET India',
    483                 'uploader_id': 'setindia',
    484                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
    485                 'age_limit': 18,
    486             },
    487             'skip': 'Private video',
    488         },
    489         {
    490             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
    491             'note': 'Use the first video ID in the URL',
    492             'info_dict': {
    493                 'id': 'BaW_jenozKc',
    494                 'ext': 'mp4',
    495                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
    496                 'uploader': 'Philipp Hagemeister',
    497                 'uploader_id': 'phihag',
    498                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
    499                 'upload_date': '20121002',
    500                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
    501                 'categories': ['Science & Technology'],
    502                 'tags': ['youtube-dl'],
    503                 'duration': 10,
    504                 'view_count': int,
    505                 'like_count': int,
    506                 'dislike_count': int,
    507             },
    508             'params': {
    509                 'skip_download': True,
    510             },
    511         },
    512         {
    513             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
    514             'note': '256k DASH audio (format 141) via DASH manifest',
    515             'info_dict': {
    516                 'id': 'a9LDPn-MO4I',
    517                 'ext': 'm4a',
    518                 'upload_date': '20121002',
    519                 'uploader_id': '8KVIDEO',
    520                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
    521                 'description': '',
    522                 'uploader': '8KVIDEO',
    523                 'title': 'UHDTV TEST 8K VIDEO.mp4'
    524             },
    525             'params': {
    526                 'youtube_include_dash_manifest': True,
    527                 'format': '141',
    528             },
    529             'skip': 'format 141 not served anymore',
    530         },
    531         # DASH manifest with encrypted signature
    532         {
    533             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
    534             'info_dict': {
    535                 'id': 'IB3lcPjvWLA',
    536                 'ext': 'm4a',
    537                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
    538                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
    539                 'duration': 244,
    540                 'uploader': 'AfrojackVEVO',
    541                 'uploader_id': 'AfrojackVEVO',
    542                 'upload_date': '20131011',
    543                 'abr': 129.495,
    544             },
    545             'params': {
    546                 'youtube_include_dash_manifest': True,
    547                 'format': '141/bestaudio[ext=m4a]',
    548             },
    549         },
    550         # Controversy video
    551         {
    552             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
    553             'info_dict': {
    554                 'id': 'T4XJQO3qol8',
    555                 'ext': 'mp4',
    556                 'duration': 219,
    557                 'upload_date': '20100909',
    558                 'uploader': 'Amazing Atheist',
    559                 'uploader_id': 'TheAmazingAtheist',
    560                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
    561                 'title': 'Burning Everyone\'s Koran',
    562                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
    563             }
    564         },
    565         # Normal age-gate video (No vevo, embed allowed), available via embed page
    566         {
    567             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
    568             'info_dict': {
    569                 'id': 'HtVdAasjOgU',
    570                 'ext': 'mp4',
    571                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
    572                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
    573                 'duration': 142,
    574                 'uploader': 'The Witcher',
    575                 'uploader_id': 'WitcherGame',
    576                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
    577                 'upload_date': '20140605',
    578                 'age_limit': 18,
    579             },
    580         },
    581         {
    582             # Age-gated video only available with authentication (unavailable
    583             # via embed page workaround)
    584             'url': 'XgnwCQzjau8',
    585             'only_matching': True,
    586         },
    587         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
    588         # YouTube Red ad is not captured for creator
    589         {
    590             'url': '__2ABJjxzNo',
    591             'info_dict': {
    592                 'id': '__2ABJjxzNo',
    593                 'ext': 'mp4',
    594                 'duration': 266,
    595                 'upload_date': '20100430',
    596                 'uploader_id': 'deadmau5',
    597                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
    598                 'creator': 'deadmau5',
    599                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
    600                 'uploader': 'deadmau5',
    601                 'title': 'Deadmau5 - Some Chords (HD)',
    602                 'alt_title': 'Some Chords',
    603             },
    604             'expected_warnings': [
    605                 'DASH manifest missing',
    606             ]
    607         },
    608         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
    609         {
    610             'url': 'lqQg6PlCWgI',
    611             'info_dict': {
    612                 'id': 'lqQg6PlCWgI',
    613                 'ext': 'mp4',
    614                 'duration': 6085,
    615                 'upload_date': '20150827',
    616                 'uploader_id': 'olympic',
    617                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
    618                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
    619                 'uploader': 'Olympic',
    620                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
    621             },
    622             'params': {
    623                 'skip_download': 'requires avconv',
    624             }
    625         },
    626         # Non-square pixels
    627         {
    628             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
    629             'info_dict': {
    630                 'id': '_b-2C3KPAM0',
    631                 'ext': 'mp4',
    632                 'stretched_ratio': 16 / 9.,
    633                 'duration': 85,
    634                 'upload_date': '20110310',
    635                 'uploader_id': 'AllenMeow',
    636                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
    637                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
    638                 'uploader': '孫ᄋᄅ',
    639                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
    640             },
    641         },
    642         # url_encoded_fmt_stream_map is empty string
    643         {
    644             'url': 'qEJwOuvDf7I',
    645             'info_dict': {
    646                 'id': 'qEJwOuvDf7I',
    647                 'ext': 'webm',
    648                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
    649                 'description': '',
    650                 'upload_date': '20150404',
    651                 'uploader_id': 'spbelect',
    652                 'uploader': 'Наблюдатели Петербурга',
    653             },
    654             'params': {
    655                 'skip_download': 'requires avconv',
    656             },
    657             'skip': 'This live event has ended.',
    658         },
    659         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
    660         {
    661             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
    662             'info_dict': {
    663                 'id': 'FIl7x6_3R5Y',
    664                 'ext': 'webm',
    665                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
    666                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
    667                 'duration': 220,
    668                 'upload_date': '20150625',
    669                 'uploader_id': 'dorappi2000',
    670                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
    671                 'uploader': 'dorappi2000',
    672                 'formats': 'mincount:31',
    673             },
    674             'skip': 'not actual anymore',
    675         },
    676         # DASH manifest with segment_list
    677         {
    678             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
    679             'md5': '8ce563a1d667b599d21064e982ab9e31',
    680             'info_dict': {
    681                 'id': 'CsmdDsKjzN8',
    682                 'ext': 'mp4',
    683                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
    684                 'uploader': 'Airtek',
    685                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
    686                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
    687                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
    688             },
    689             'params': {
    690                 'youtube_include_dash_manifest': True,
    691                 'format': '135',  # bestvideo
    692             },
    693             'skip': 'This live event has ended.',
    694         },
    695         {
    696             # Multifeed videos (multiple cameras), URL is for Main Camera
    697             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
    698             'info_dict': {
    699                 'id': 'jvGDaLqkpTg',
    700                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
    701                 'description': 'md5:e03b909557865076822aa169218d6a5d',
    702             },
    703             'playlist': [{
    704                 'info_dict': {
    705                     'id': 'jvGDaLqkpTg',
    706                     'ext': 'mp4',
    707                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
    708                     'description': 'md5:e03b909557865076822aa169218d6a5d',
    709                     'duration': 10643,
    710                     'upload_date': '20161111',
    711                     'uploader': 'Team PGP',
    712                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
    713                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
    714                 },
    715             }, {
    716                 'info_dict': {
    717                     'id': '3AKt1R1aDnw',
    718                     'ext': 'mp4',
    719                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
    720                     'description': 'md5:e03b909557865076822aa169218d6a5d',
    721                     'duration': 10991,
    722                     'upload_date': '20161111',
    723                     'uploader': 'Team PGP',
    724                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
    725                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
    726                 },
    727             }, {
    728                 'info_dict': {
    729                     'id': 'RtAMM00gpVc',
    730                     'ext': 'mp4',
    731                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
    732                     'description': 'md5:e03b909557865076822aa169218d6a5d',
    733                     'duration': 10995,
    734                     'upload_date': '20161111',
    735                     'uploader': 'Team PGP',
    736                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
    737                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
    738                 },
    739             }, {
    740                 'info_dict': {
    741                     'id': '6N2fdlP3C5U',
    742                     'ext': 'mp4',
    743                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
    744                     'description': 'md5:e03b909557865076822aa169218d6a5d',
    745                     'duration': 10990,
    746                     'upload_date': '20161111',
    747                     'uploader': 'Team PGP',
    748                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
    749                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
    750                 },
    751             }],
    752             'params': {
    753                 'skip_download': True,
    754             },
    755         },
    756         {
    757             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
    758             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
    759             'info_dict': {
    760                 'id': 'gVfLd0zydlo',
    761                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
    762             },
    763             'playlist_count': 2,
    764             'skip': 'Not multifeed anymore',
    765         },
    766         {
    767             'url': 'https://vid.plus/FlRa-iH7PGw',
    768             'only_matching': True,
    769         },
    770         {
    771             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
    772             'only_matching': True,
    773         },
    774         {
    775             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
    776             # Also tests cut-off URL expansion in video description (see
    777             # https://github.com/ytdl-org/youtube-dl/issues/1892,
    778             # https://github.com/ytdl-org/youtube-dl/issues/8164)
    779             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
    780             'info_dict': {
    781                 'id': 'lsguqyKfVQg',
    782                 'ext': 'mp4',
    783                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
    784                 'alt_title': 'Dark Walk - Position Music',
    785                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
    786                 'duration': 133,
    787                 'upload_date': '20151119',
    788                 'uploader_id': 'IronSoulElf',
    789                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
    790                 'uploader': 'IronSoulElf',
    791                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
    792                 'track': 'Dark Walk - Position Music',
    793                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
    794                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
    795             },
    796             'params': {
    797                 'skip_download': True,
    798             },
    799         },
    800         {
    801             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
    802             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
    803             'only_matching': True,
    804         },
    805         {
    806             # Video with yt:stretch=17:0
    807             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
    808             'info_dict': {
    809                 'id': 'Q39EVAstoRM',
    810                 'ext': 'mp4',
    811                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
    812                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
    813                 'upload_date': '20151107',
    814                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
    815                 'uploader': 'CH GAMER DROID',
    816             },
    817             'params': {
    818                 'skip_download': True,
    819             },
    820             'skip': 'This video does not exist.',
    821         },
    822         {
    823             # Video with incomplete 'yt:stretch=16:'
    824             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
    825             'only_matching': True,
    826         },
    827         {
    828             # Video licensed under Creative Commons
    829             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
    830             'info_dict': {
    831                 'id': 'M4gD1WSo5mA',
    832                 'ext': 'mp4',
    833                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
    834                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
    835                 'duration': 721,
    836                 'upload_date': '20150127',
    837                 'uploader_id': 'BerkmanCenter',
    838                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
    839                 'uploader': 'The Berkman Klein Center for Internet & Society',
    840                 'license': 'Creative Commons Attribution license (reuse allowed)',
    841             },
    842             'params': {
    843                 'skip_download': True,
    844             },
    845         },
    846         {
    847             # Channel-like uploader_url
    848             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
    849             'info_dict': {
    850                 'id': 'eQcmzGIKrzg',
    851                 'ext': 'mp4',
    852                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
    853                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
    854                 'duration': 4060,
    855                 'upload_date': '20151119',
    856                 'uploader': 'Bernie Sanders',
    857                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
    858                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
    859                 'license': 'Creative Commons Attribution license (reuse allowed)',
    860             },
    861             'params': {
    862                 'skip_download': True,
    863             },
    864         },
    865         {
    866             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
    867             'only_matching': True,
    868         },
    869         {
    870             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
    871             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
    872             'only_matching': True,
    873         },
    874         {
    875             # Rental video preview
    876             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
    877             'info_dict': {
    878                 'id': 'uGpuVWrhIzE',
    879                 'ext': 'mp4',
    880                 'title': 'Piku - Trailer',
    881                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
    882                 'upload_date': '20150811',
    883                 'uploader': 'FlixMatrix',
    884                 'uploader_id': 'FlixMatrixKaravan',
    885                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
    886                 'license': 'Standard YouTube License',
    887             },
    888             'params': {
    889                 'skip_download': True,
    890             },
    891             'skip': 'This video is not available.',
    892         },
    893         {
    894             # YouTube Red video with episode data
    895             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
    896             'info_dict': {
    897                 'id': 'iqKdEhx-dD4',
    898                 'ext': 'mp4',
    899                 'title': 'Isolation - Mind Field (Ep 1)',
    900                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
    901                 'duration': 2085,
    902                 'upload_date': '20170118',
    903                 'uploader': 'Vsauce',
    904                 'uploader_id': 'Vsauce',
    905                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
    906                 'series': 'Mind Field',
    907                 'season_number': 1,
    908                 'episode_number': 1,
    909             },
    910             'params': {
    911                 'skip_download': True,
    912             },
    913             'expected_warnings': [
    914                 'Skipping DASH manifest',
    915             ],
    916         },
    917         {
    918             # The following content has been identified by the YouTube community
    919             # as inappropriate or offensive to some audiences.
    920             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
    921             'info_dict': {
    922                 'id': '6SJNVb0GnPI',
    923                 'ext': 'mp4',
    924                 'title': 'Race Differences in Intelligence',
    925                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
    926                 'duration': 965,
    927                 'upload_date': '20140124',
    928                 'uploader': 'New Century Foundation',
    929                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
    930                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
    931             },
    932             'params': {
    933                 'skip_download': True,
    934             },
    935             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
    936         },
    937         {
    938             # itag 212
    939             'url': '1t24XAntNCY',
    940             'only_matching': True,
    941         },
    942         {
    943             # geo restricted to JP
    944             'url': 'sJL6WA-aGkQ',
    945             'only_matching': True,
    946         },
    947         {
    948             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
    949             'only_matching': True,
    950         },
    951         {
    952             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
    953             'only_matching': True,
    954         },
    955         {
    956             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
    957             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
    958             'only_matching': True,
    959         },
    960         {
    961             # DRM protected
    962             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
    963             'only_matching': True,
    964         },
    965         {
    966             # Video with unsupported adaptive stream type formats
    967             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
    968             'info_dict': {
    969                 'id': 'Z4Vy8R84T1U',
    970                 'ext': 'mp4',
    971                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
    972                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
    973                 'duration': 433,
    974                 'upload_date': '20130923',
    975                 'uploader': 'Amelia Putri Harwita',
    976                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
    977                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
    978                 'formats': 'maxcount:10',
    979             },
    980             'params': {
    981                 'skip_download': True,
    982                 'youtube_include_dash_manifest': False,
    983             },
    984             'skip': 'not actual anymore',
    985         },
    986         {
    987             # Youtube Music Auto-generated description
    988             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
    989             'info_dict': {
    990                 'id': 'MgNrAu2pzNs',
    991                 'ext': 'mp4',
    992                 'title': 'Voyeur Girl',
    993                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
    994                 'upload_date': '20190312',
    995                 'uploader': 'Stephen - Topic',
    996                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
    997                 'artist': 'Stephen',
    998                 'track': 'Voyeur Girl',
    999                 'album': 'it\'s too much love to know my dear',
   1000                 'release_date': '20190313',
   1001                 'release_year': 2019,
   1002             },
   1003             'params': {
   1004                 'skip_download': True,
   1005             },
   1006         },
   1007         {
   1008             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
   1009             'only_matching': True,
   1010         },
   1011         {
   1012             # invalid -> valid video id redirection
   1013             'url': 'DJztXj2GPfl',
   1014             'info_dict': {
   1015                 'id': 'DJztXj2GPfk',
   1016                 'ext': 'mp4',
   1017                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
   1018                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
   1019                 'upload_date': '20090125',
   1020                 'uploader': 'Prochorowka',
   1021                 'uploader_id': 'Prochorowka',
   1022                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
   1023                 'artist': 'Panjabi MC',
   1024                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
   1025                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
   1026             },
   1027             'params': {
   1028                 'skip_download': True,
   1029             },
   1030             'skip': 'Video unavailable',
   1031         },
   1032         {
   1033             # empty description results in an empty string
   1034             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
   1035             'info_dict': {
   1036                 'id': 'x41yOUIvK2k',
   1037                 'ext': 'mp4',
   1038                 'title': 'IMG 3456',
   1039                 'description': '',
   1040                 'upload_date': '20170613',
   1041                 'uploader_id': 'ElevageOrVert',
   1042                 'uploader': 'ElevageOrVert',
   1043             },
   1044             'params': {
   1045                 'skip_download': True,
   1046             },
   1047         },
   1048         {
   1049             # with '};' inside yt initial data (see [1])
   1050             # see [2] for an example with '};' inside ytInitialPlayerResponse
   1051             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
   1052             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
   1053             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
   1054             'info_dict': {
   1055                 'id': 'CHqg6qOn4no',
   1056                 'ext': 'mp4',
   1057                 'title': 'Part 77   Sort a list of simple types in c#',
   1058                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
   1059                 'upload_date': '20130831',
   1060                 'uploader_id': 'kudvenkat',
   1061                 'uploader': 'kudvenkat',
   1062             },
   1063             'params': {
   1064                 'skip_download': True,
   1065             },
   1066         },
   1067         {
   1068             # another example of '};' in ytInitialData
   1069             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
   1070             'only_matching': True,
   1071         },
   1072         {
   1073             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
   1074             'only_matching': True,
   1075         },
   1076         {
   1077             # https://github.com/ytdl-org/youtube-dl/pull/28094
   1078             'url': 'OtqTfy26tG0',
   1079             'info_dict': {
   1080                 'id': 'OtqTfy26tG0',
   1081                 'ext': 'mp4',
   1082                 'title': 'Burn Out',
   1083                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
   1084                 'upload_date': '20141120',
   1085                 'uploader': 'The Cinematic Orchestra - Topic',
   1086                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
   1087                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
   1088                 'artist': 'The Cinematic Orchestra',
   1089                 'track': 'Burn Out',
   1090                 'album': 'Every Day',
   1091                 'release_data': None,
   1092                 'release_year': None,
   1093             },
   1094             'params': {
   1095                 'skip_download': True,
   1096             },
   1097         },
   1098         {
   1099             # controversial video, only works with bpctr when authenticated with cookies
   1100             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
   1101             'only_matching': True,
   1102         },
   1103         {
   1104             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
   1105             'url': 'cBvYw8_A0vQ',
   1106             'info_dict': {
   1107                 'id': 'cBvYw8_A0vQ',
   1108                 'ext': 'mp4',
   1109                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
   1110                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
   1111                 'upload_date': '20201120',
   1112                 'uploader': 'Walk around Japan',
   1113                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
   1114                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
   1115             },
   1116             'params': {
   1117                 'skip_download': True,
   1118             },
   1119         },
   1120     ]
   1121     _formats = {
   1122         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
   1123         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
   1124         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
   1125         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
   1126         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
   1127         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
   1128         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
   1129         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
   1130         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
   1131         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
   1132         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
   1133         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
   1134         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
   1135         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
   1136         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
   1137         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
   1138         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
   1139         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
   1140 
   1141 
   1142         # 3D videos
   1143         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
   1144         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
   1145         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
   1146         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
   1147         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
   1148         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
   1149         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
   1150 
   1151         # Apple HTTP Live Streaming
   1152         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
   1153         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
   1154         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
   1155         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
   1156         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
   1157         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
   1158         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
   1159         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
   1160 
   1161         # DASH mp4 video
   1162         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1163         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1164         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1165         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1166         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1167         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
   1168         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1169         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1170         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1171         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
   1172         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
   1173         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
   1174 
   1175         # Dash mp4 audio
   1176         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
   1177         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
   1178         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
   1179         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
   1180         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
   1181         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
   1182         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
   1183 
   1184         # Dash webm
   1185         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
   1186         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
   1187         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
   1188         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
   1189         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
   1190         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
   1191         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
   1192         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1193         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1194         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1195         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1196         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1197         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1198         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1199         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1200         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
   1201         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1202         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
   1203         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
   1204         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
   1205         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
   1206         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
   1207 
   1208         # Dash webm audio
   1209         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
   1210         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
   1211 
   1212         # Dash webm audio with opus inside
   1213         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
   1214         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
   1215         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
   1216 
   1217         # RTMP (unnamed)
   1218         '_rtmp': {'protocol': 'rtmp'},
   1219 
   1220         # av01 video only formats sometimes served with "unknown" codecs
   1221         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
   1222         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
   1223         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
   1224         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
   1225     }
   1226 
   1227     @classmethod
   1228     def suitable(cls, url):
   1229         # Hack for lazy extractors until more generic solution is implemented
   1230         # (see #28780)
   1231         from .youtube import parse_qs
   1232         qs = parse_qs(url)
   1233         if qs.get('list', [None])[0]:
   1234             return False
   1235         return super(YoutubeIE, cls).suitable(url)
   1236 
   1237     def __init__(self, *args, **kwargs):
   1238         super(YoutubeIE, self).__init__(*args, **kwargs)
   1239         self._code_cache = {}
   1240         self._player_cache = {}
   1241 
   1242     def _signature_cache_id(self, example_sig):
   1243         """ Return a string representation of a signature """
   1244         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
   1245 
   1246     @classmethod
   1247     def _extract_player_info(cls, player_url):
   1248         for player_re in cls._PLAYER_INFO_RE:
   1249             id_m = re.search(player_re, player_url)
   1250             if id_m:
   1251                 break
   1252         else:
   1253             raise ExtractorError('Cannot identify player %r' % player_url)
   1254         return id_m.group('id')
   1255 
   1256     def _extract_signature_function(self, video_id, player_url, example_sig):
   1257         player_id = self._extract_player_info(player_url)
   1258 
   1259         # Read from filesystem cache
   1260         func_id = 'js_%s_%s' % (
   1261             player_id, self._signature_cache_id(example_sig))
   1262         assert os.path.basename(func_id) == func_id
   1263 
   1264         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
   1265         if cache_spec is not None:
   1266             return lambda s: ''.join(s[i] for i in cache_spec)
   1267 
   1268         if player_id not in self._code_cache:
   1269             self._code_cache[player_id] = self._download_webpage(
   1270                 player_url, video_id,
   1271                 note='Downloading player ' + player_id,
   1272                 errnote='Download of %s failed' % player_url)
   1273         code = self._code_cache[player_id]
   1274         res = self._parse_sig_js(code)
   1275 
   1276         test_string = ''.join(map(compat_chr, range(len(example_sig))))
   1277         cache_res = res(test_string)
   1278         cache_spec = [ord(c) for c in cache_res]
   1279 
   1280         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
   1281         return res
   1282 
   1283     def _print_sig_code(self, func, example_sig):
   1284         def gen_sig_code(idxs):
   1285             def _genslice(start, end, step):
   1286                 starts = '' if start == 0 else str(start)
   1287                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
   1288                 steps = '' if step == 1 else (':%d' % step)
   1289                 return 's[%s%s%s]' % (starts, ends, steps)
   1290 
   1291             step = None
   1292             # Quelch pyflakes warnings - start will be set when step is set
   1293             start = '(Never used)'
   1294             for i, prev in zip(idxs[1:], idxs[:-1]):
   1295                 if step is not None:
   1296                     if i - prev == step:
   1297                         continue
   1298                     yield _genslice(start, prev, step)
   1299                     step = None
   1300                     continue
   1301                 if i - prev in [-1, 1]:
   1302                     step = i - prev
   1303                     start = prev
   1304                     continue
   1305                 else:
   1306                     yield 's[%d]' % prev
   1307             if step is None:
   1308                 yield 's[%d]' % i
   1309             else:
   1310                 yield _genslice(start, i, step)
   1311 
   1312         test_string = ''.join(map(compat_chr, range(len(example_sig))))
   1313         cache_res = func(test_string)
   1314         cache_spec = [ord(c) for c in cache_res]
   1315         expr_code = ' + '.join(gen_sig_code(cache_spec))
   1316         signature_id_tuple = '(%s)' % (
   1317             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
   1318         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
   1319                 '    return %s\n') % (signature_id_tuple, expr_code)
   1320         self.to_screen('Extracted signature function:\n' + code)
   1321 
   1322     def _parse_sig_js(self, jscode):
   1323         funcname = self._search_regex(
   1324             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
   1325              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
   1326              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
   1327              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
   1328              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
   1329              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
   1330              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
   1331              # Obsolete patterns
   1332              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
   1333              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
   1334              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
   1335              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
   1336              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
   1337              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
   1338              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
   1339              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
   1340             jscode, 'Initial JS player signature function name', group='sig')
   1341 
   1342         jsi = JSInterpreter(jscode)
   1343         initial_function = jsi.extract_function(funcname)
   1344         return lambda s: initial_function([s])
   1345 
   1346     def _decrypt_signature(self, s, video_id, player_url):
   1347         """Turn the encrypted s field into a working signature"""
   1348 
   1349         if player_url is None:
   1350             raise ExtractorError('Cannot decrypt signature without player_url')
   1351 
   1352         if player_url.startswith('//'):
   1353             player_url = 'https:' + player_url
   1354         elif not re.match(r'https?://', player_url):
   1355             player_url = compat_urlparse.urljoin(
   1356                 'https://www.youtube.com', player_url)
   1357         try:
   1358             player_id = (player_url, self._signature_cache_id(s))
   1359             if player_id not in self._player_cache:
   1360                 func = self._extract_signature_function(
   1361                     video_id, player_url, s
   1362                 )
   1363                 self._player_cache[player_id] = func
   1364             func = self._player_cache[player_id]
   1365             if self._downloader.params.get('youtube_print_sig_code'):
   1366                 self._print_sig_code(func, s)
   1367             return func(s)
   1368         except Exception as e:
   1369             tb = traceback.format_exc()
   1370             raise ExtractorError(
   1371                 'Signature extraction failed: ' + tb, cause=e)
   1372 
   1373     def _mark_watched(self, video_id, player_response):
   1374         playback_url = url_or_none(try_get(
   1375             player_response,
   1376             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
   1377         if not playback_url:
   1378             return
   1379         parsed_playback_url = compat_urlparse.urlparse(playback_url)
   1380         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
   1381 
   1382         # cpn generation algorithm is reverse engineered from base.js.
   1383         # In fact it works even with dummy cpn.
   1384         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
   1385         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
   1386 
   1387         qs.update({
   1388             'ver': ['2'],
   1389             'cpn': [cpn],
   1390         })
   1391         playback_url = compat_urlparse.urlunparse(
   1392             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
   1393 
   1394         self._download_webpage(
   1395             playback_url, video_id, 'Marking watched',
   1396             'Unable to mark watched', fatal=False)
   1397 
   1398     @staticmethod
   1399     def _extract_urls(webpage):
   1400         # Embedded YouTube player
   1401         entries = [
   1402             unescapeHTML(mobj.group('url'))
   1403             for mobj in re.finditer(r'''(?x)
   1404             (?:
   1405                 <iframe[^>]+?src=|
   1406                 data-video-url=|
   1407                 <embed[^>]+?src=|
   1408                 embedSWF\(?:\s*|
   1409                 <object[^>]+data=|
   1410                 new\s+SWFObject\(
   1411             )
   1412             (["\'])
   1413                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
   1414                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
   1415             \1''', webpage)]
   1416 
   1417         # lazyYT YouTube embed
   1418         entries.extend(list(map(
   1419             unescapeHTML,
   1420             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
   1421 
   1422         # Wordpress "YouTube Video Importer" plugin
   1423         matches = re.findall(r'''(?x)<div[^>]+
   1424             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
   1425             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
   1426         entries.extend(m[-1] for m in matches)
   1427 
   1428         return entries
   1429 
   1430     @staticmethod
   1431     def _extract_url(webpage):
   1432         urls = YoutubeIE._extract_urls(webpage)
   1433         return urls[0] if urls else None
   1434 
   1435     @classmethod
   1436     def extract_id(cls, url):
   1437         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
   1438         if mobj is None:
   1439             raise ExtractorError('Invalid URL: %s' % url)
   1440         video_id = mobj.group(2)
   1441         return video_id
   1442 
   1443     def _extract_chapters_from_json(self, data, video_id, duration):
   1444         chapters_list = try_get(
   1445             data,
   1446             lambda x: x['playerOverlays']
   1447                        ['playerOverlayRenderer']
   1448                        ['decoratedPlayerBarRenderer']
   1449                        ['decoratedPlayerBarRenderer']
   1450                        ['playerBar']
   1451                        ['chapteredPlayerBarRenderer']
   1452                        ['chapters'],
   1453             list)
   1454         if not chapters_list:
   1455             return
   1456 
   1457         def chapter_time(chapter):
   1458             return float_or_none(
   1459                 try_get(
   1460                     chapter,
   1461                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
   1462                     int),
   1463                 scale=1000)
   1464         chapters = []
   1465         for next_num, chapter in enumerate(chapters_list, start=1):
   1466             start_time = chapter_time(chapter)
   1467             if start_time is None:
   1468                 continue
   1469             end_time = (chapter_time(chapters_list[next_num])
   1470                         if next_num < len(chapters_list) else duration)
   1471             if end_time is None:
   1472                 continue
   1473             title = try_get(
   1474                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
   1475                 compat_str)
   1476             chapters.append({
   1477                 'start_time': start_time,
   1478                 'end_time': end_time,
   1479                 'title': title,
   1480             })
   1481         return chapters
   1482 
   1483     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
   1484         return self._parse_json(self._search_regex(
   1485             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
   1486              regex), webpage, name, default='{}'), video_id, fatal=False)
   1487 
   1488     def _real_extract(self, url):
   1489         url, smuggled_data = unsmuggle_url(url, {})
   1490         video_id = self._match_id(url)
   1491         base_url = self.http_scheme() + '//www.youtube.com/'
   1492         webpage_url = base_url + 'watch?v=' + video_id
   1493         webpage = self._download_webpage(
   1494             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
   1495 
   1496         player_response = None
   1497         if webpage:
   1498             player_response = self._extract_yt_initial_variable(
   1499                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
   1500                 video_id, 'initial player response')
   1501         if not player_response:
   1502             player_response = self._call_api(
   1503                 'player', {'videoId': video_id}, video_id)
   1504 
   1505         playability_status = player_response.get('playabilityStatus') or {}
   1506         if playability_status.get('reason') == 'Sign in to confirm your age':
   1507             video_info = self._download_webpage(
   1508                 base_url + 'get_video_info', video_id,
   1509                 'Refetching age-gated info webpage',
   1510                 'unable to download video info webpage', query={
   1511                     'video_id': video_id,
   1512                     'eurl': 'https://youtube.googleapis.com/v/' + video_id,
   1513                     'html5': 1,
   1514                     # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
   1515                     'c': 'TVHTML5',
   1516                     'cver': '6.20180913',
   1517                 }, fatal=False)
   1518             if video_info:
   1519                 pr = self._parse_json(
   1520                     try_get(
   1521                         compat_parse_qs(video_info),
   1522                         lambda x: x['player_response'][0], compat_str) or '{}',
   1523                     video_id, fatal=False)
   1524                 if pr and isinstance(pr, dict):
   1525                     player_response = pr
   1526 
   1527         trailer_video_id = try_get(
   1528             playability_status,
   1529             lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
   1530             compat_str)
   1531         if trailer_video_id:
   1532             return self.url_result(
   1533                 trailer_video_id, self.ie_key(), trailer_video_id)
   1534 
   1535         def get_text(x):
   1536             if not x:
   1537                 return
   1538             text = x.get('simpleText')
   1539             if text and isinstance(text, compat_str):
   1540                 return text
   1541             runs = x.get('runs')
   1542             if not isinstance(runs, list):
   1543                 return
   1544             return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
   1545 
   1546         search_meta = (
   1547             lambda x: self._html_search_meta(x, webpage, default=None)) \
   1548             if webpage else lambda x: None
   1549 
   1550         video_details = player_response.get('videoDetails') or {}
   1551         microformat = try_get(
   1552             player_response,
   1553             lambda x: x['microformat']['playerMicroformatRenderer'],
   1554             dict) or {}
   1555         video_title = video_details.get('title') \
   1556             or get_text(microformat.get('title')) \
   1557             or search_meta(['og:title', 'twitter:title', 'title'])
   1558         video_description = video_details.get('shortDescription')
   1559 
   1560         if not smuggled_data.get('force_singlefeed', False):
   1561             if not self._downloader.params.get('noplaylist'):
   1562                 multifeed_metadata_list = try_get(
   1563                     player_response,
   1564                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
   1565                     compat_str)
   1566                 if multifeed_metadata_list:
   1567                     entries = []
   1568                     feed_ids = []
   1569                     for feed in multifeed_metadata_list.split(','):
   1570                         # Unquote should take place before split on comma (,) since textual
   1571                         # fields may contain comma as well (see
   1572                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
   1573                         feed_data = compat_parse_qs(
   1574                             compat_urllib_parse_unquote_plus(feed))
   1575 
   1576                         def feed_entry(name):
   1577                             return try_get(
   1578                                 feed_data, lambda x: x[name][0], compat_str)
   1579 
   1580                         feed_id = feed_entry('id')
   1581                         if not feed_id:
   1582                             continue
   1583                         feed_title = feed_entry('title')
   1584                         title = video_title
   1585                         if feed_title:
   1586                             title += ' (%s)' % feed_title
   1587                         entries.append({
   1588                             '_type': 'url_transparent',
   1589                             'ie_key': 'Youtube',
   1590                             'url': smuggle_url(
   1591                                 base_url + 'watch?v=' + feed_data['id'][0],
   1592                                 {'force_singlefeed': True}),
   1593                             'title': title,
   1594                         })
   1595                         feed_ids.append(feed_id)
   1596                     self.to_screen(
   1597                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
   1598                         % (', '.join(feed_ids), video_id))
   1599                     return self.playlist_result(
   1600                         entries, video_id, video_title, video_description)
   1601             else:
   1602                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
   1603 
   1604         formats = []
   1605         itags = []
   1606         itag_qualities = {}
   1607         player_url = None
   1608         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
   1609         streaming_data = player_response.get('streamingData') or {}
   1610         streaming_formats = streaming_data.get('formats') or []
   1611         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
   1612         for fmt in streaming_formats:
   1613             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
   1614                 continue
   1615 
   1616             itag = str_or_none(fmt.get('itag'))
   1617             quality = fmt.get('quality')
   1618             if itag and quality:
   1619                 itag_qualities[itag] = quality
   1620             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
   1621             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
   1622             # number of fragment that would subsequently requested with (`&sq=N`)
   1623             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
   1624                 continue
   1625 
   1626             fmt_url = fmt.get('url')
   1627             if not fmt_url:
   1628                 sc = compat_parse_qs(fmt.get('signatureCipher'))
   1629                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
   1630                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
   1631                 if not (sc and fmt_url and encrypted_sig):
   1632                     continue
   1633                 if not player_url:
   1634                     if not webpage:
   1635                         continue
   1636                     player_url = self._search_regex(
   1637                         r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
   1638                         webpage, 'player URL', fatal=False)
   1639                 if not player_url:
   1640                     continue
   1641                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
   1642                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
   1643                 fmt_url += '&' + sp + '=' + signature
   1644 
   1645             if itag:
   1646                 itags.append(itag)
   1647             tbr = float_or_none(
   1648                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
   1649             dct = {
   1650                 'asr': int_or_none(fmt.get('audioSampleRate')),
   1651                 'filesize': int_or_none(fmt.get('contentLength')),
   1652                 'format_id': itag,
   1653                 'format_note': fmt.get('qualityLabel') or quality,
   1654                 'fps': int_or_none(fmt.get('fps')),
   1655                 'height': int_or_none(fmt.get('height')),
   1656                 'quality': q(quality),
   1657                 'tbr': tbr,
   1658                 'url': fmt_url,
   1659                 'width': fmt.get('width'),
   1660             }
   1661             mimetype = fmt.get('mimeType')
   1662             if mimetype:
   1663                 mobj = re.match(
   1664                     r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
   1665                 if mobj:
   1666                     dct['ext'] = mimetype2ext(mobj.group(1))
   1667                     dct.update(parse_codecs(mobj.group(2)))
   1668             no_audio = dct.get('acodec') == 'none'
   1669             no_video = dct.get('vcodec') == 'none'
   1670             if no_audio:
   1671                 dct['vbr'] = tbr
   1672             if no_video:
   1673                 dct['abr'] = tbr
   1674             if no_audio or no_video:
   1675                 dct['downloader_options'] = {
   1676                     # Youtube throttles chunks >~10M
   1677                     'http_chunk_size': 10485760,
   1678                 }
   1679                 if dct.get('ext'):
   1680                     dct['container'] = dct['ext'] + '_dash'
   1681             formats.append(dct)
   1682 
   1683         hls_manifest_url = streaming_data.get('hlsManifestUrl')
   1684         if hls_manifest_url:
   1685             for f in self._extract_m3u8_formats(
   1686                     hls_manifest_url, video_id, 'mp4', fatal=False):
   1687                 itag = self._search_regex(
   1688                     r'/itag/(\d+)', f['url'], 'itag', default=None)
   1689                 if itag:
   1690                     f['format_id'] = itag
   1691                 formats.append(f)
   1692 
   1693         if self._downloader.params.get('youtube_include_dash_manifest', True):
   1694             dash_manifest_url = streaming_data.get('dashManifestUrl')
   1695             if dash_manifest_url:
   1696                 for f in self._extract_mpd_formats(
   1697                         dash_manifest_url, video_id, fatal=False):
   1698                     itag = f['format_id']
   1699                     if itag in itags:
   1700                         continue
   1701                     if itag in itag_qualities:
   1702                         f['quality'] = q(itag_qualities[itag])
   1703                     filesize = int_or_none(self._search_regex(
   1704                         r'/clen/(\d+)', f.get('fragment_base_url')
   1705                         or f['url'], 'file size', default=None))
   1706                     if filesize:
   1707                         f['filesize'] = filesize
   1708                     formats.append(f)
   1709 
   1710         if not formats:
   1711             if streaming_data.get('licenseInfos'):
   1712                 raise ExtractorError(
   1713                     'This video is DRM protected.', expected=True)
   1714             pemr = try_get(
   1715                 playability_status,
   1716                 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
   1717                 dict) or {}
   1718             reason = get_text(pemr.get('reason')) or playability_status.get('reason')
   1719             subreason = pemr.get('subreason')
   1720             if subreason:
   1721                 subreason = clean_html(get_text(subreason))
   1722                 if subreason == 'The uploader has not made this video available in your country.':
   1723                     countries = microformat.get('availableCountries')
   1724                     if not countries:
   1725                         regions_allowed = search_meta('regionsAllowed')
   1726                         countries = regions_allowed.split(',') if regions_allowed else None
   1727                     self.raise_geo_restricted(
   1728                         subreason, countries)
   1729                 reason += '\n' + subreason
   1730             if reason:
   1731                 raise ExtractorError(reason, expected=True)
   1732 
   1733         self._sort_formats(formats)
   1734 
   1735         keywords = video_details.get('keywords') or []
   1736         if not keywords and webpage:
   1737             keywords = [
   1738                 unescapeHTML(m.group('content'))
   1739                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
   1740         for keyword in keywords:
   1741             if keyword.startswith('yt:stretch='):
   1742                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
   1743                 if mobj:
   1744                     # NB: float is intentional for forcing float division
   1745                     w, h = (float(v) for v in mobj.groups())
   1746                     if w > 0 and h > 0:
   1747                         ratio = w / h
   1748                         for f in formats:
   1749                             if f.get('vcodec') != 'none':
   1750                                 f['stretched_ratio'] = ratio
   1751                         break
   1752 
   1753         thumbnails = []
   1754         for container in (video_details, microformat):
   1755             for thumbnail in (try_get(
   1756                     container,
   1757                     lambda x: x['thumbnail']['thumbnails'], list) or []):
   1758                 thumbnail_url = thumbnail.get('url')
   1759                 if not thumbnail_url:
   1760                     continue
   1761                 thumbnails.append({
   1762                     'height': int_or_none(thumbnail.get('height')),
   1763                     'url': thumbnail_url,
   1764                     'width': int_or_none(thumbnail.get('width')),
   1765                 })
   1766             if thumbnails:
   1767                 break
   1768         else:
   1769             thumbnail = search_meta(['og:image', 'twitter:image'])
   1770             if thumbnail:
   1771                 thumbnails = [{'url': thumbnail}]
   1772 
   1773         category = microformat.get('category') or search_meta('genre')
   1774         channel_id = video_details.get('channelId') \
   1775             or microformat.get('externalChannelId') \
   1776             or search_meta('channelId')
   1777         duration = int_or_none(
   1778             video_details.get('lengthSeconds')
   1779             or microformat.get('lengthSeconds')) \
   1780             or parse_duration(search_meta('duration'))
   1781         is_live = video_details.get('isLive')
   1782         owner_profile_url = microformat.get('ownerProfileUrl')
   1783 
   1784         info = {
   1785             'id': video_id,
   1786             'title': self._live_title(video_title) if is_live else video_title,
   1787             'formats': formats,
   1788             'thumbnails': thumbnails,
   1789             'description': video_description,
   1790             'upload_date': unified_strdate(
   1791                 microformat.get('uploadDate')
   1792                 or search_meta('uploadDate')),
   1793             'uploader': video_details['author'],
   1794             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
   1795             'uploader_url': owner_profile_url,
   1796             'channel_id': channel_id,
   1797             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
   1798             'duration': duration,
   1799             'view_count': int_or_none(
   1800                 video_details.get('viewCount')
   1801                 or microformat.get('viewCount')
   1802                 or search_meta('interactionCount')),
   1803             'average_rating': float_or_none(video_details.get('averageRating')),
   1804             'age_limit': 18 if (
   1805                 microformat.get('isFamilySafe') is False
   1806                 or search_meta('isFamilyFriendly') == 'false'
   1807                 or search_meta('og:restrictions:age') == '18+') else 0,
   1808             'webpage_url': webpage_url,
   1809             'categories': [category] if category else None,
   1810             'tags': keywords,
   1811             'is_live': is_live,
   1812         }
   1813 
   1814         pctr = try_get(
   1815             player_response,
   1816             lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
   1817         if pctr:
   1818             def process_language(container, base_url, lang_code, query):
   1819                 lang_subs = []
   1820                 for fmt in self._SUBTITLE_FORMATS:
   1821                     query.update({
   1822                         'fmt': fmt,
   1823                     })
   1824                     lang_subs.append({
   1825                         'ext': fmt,
   1826                         'url': update_url_query(base_url, query),
   1827                     })
   1828                 container[lang_code] = lang_subs
   1829 
   1830             subtitles = {}
   1831             for caption_track in (pctr.get('captionTracks') or []):
   1832                 base_url = caption_track.get('baseUrl')
   1833                 if not base_url:
   1834                     continue
   1835                 if caption_track.get('kind') != 'asr':
   1836                     lang_code = caption_track.get('languageCode')
   1837                     if not lang_code:
   1838                         continue
   1839                     process_language(
   1840                         subtitles, base_url, lang_code, {})
   1841                     continue
   1842                 automatic_captions = {}
   1843                 for translation_language in (pctr.get('translationLanguages') or []):
   1844                     translation_language_code = translation_language.get('languageCode')
   1845                     if not translation_language_code:
   1846                         continue
   1847                     process_language(
   1848                         automatic_captions, base_url, translation_language_code,
   1849                         {'tlang': translation_language_code})
   1850                 info['automatic_captions'] = automatic_captions
   1851             info['subtitles'] = subtitles
   1852 
   1853         parsed_url = compat_urllib_parse_urlparse(url)
   1854         for component in [parsed_url.fragment, parsed_url.query]:
   1855             query = compat_parse_qs(component)
   1856             for k, v in query.items():
   1857                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
   1858                     d_k += '_time'
   1859                     if d_k not in info and k in s_ks:
   1860                         info[d_k] = parse_duration(query[k][0])
   1861 
   1862         if video_description:
   1863             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
   1864             if mobj:
   1865                 release_year = mobj.group('release_year')
   1866                 release_date = mobj.group('release_date')
   1867                 if release_date:
   1868                     release_date = release_date.replace('-', '')
   1869                     if not release_year:
   1870                         release_year = release_date[:4]
   1871                 info.update({
   1872                     'album': mobj.group('album'.strip()),
   1873                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
   1874                     'track': mobj.group('track').strip(),
   1875                     'release_date': release_date,
   1876                     'release_year': int_or_none(release_year),
   1877                 })
   1878 
   1879         initial_data = None
   1880         if webpage:
   1881             initial_data = self._extract_yt_initial_variable(
   1882                 webpage, self._YT_INITIAL_DATA_RE, video_id,
   1883                 'yt initial data')
   1884         if not initial_data:
   1885             initial_data = self._call_api(
   1886                 'next', {'videoId': video_id}, video_id, fatal=False)
   1887 
   1888         if initial_data:
   1889             chapters = self._extract_chapters_from_json(
   1890                 initial_data, video_id, duration)
   1891             if not chapters:
   1892                 for engagment_pannel in (initial_data.get('engagementPanels') or []):
   1893                     contents = try_get(
   1894                         engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
   1895                         list)
   1896                     if not contents:
   1897                         continue
   1898 
   1899                     def chapter_time(mmlir):
   1900                         return parse_duration(
   1901                             get_text(mmlir.get('timeDescription')))
   1902 
   1903                     chapters = []
   1904                     for next_num, content in enumerate(contents, start=1):
   1905                         mmlir = content.get('macroMarkersListItemRenderer') or {}
   1906                         start_time = chapter_time(mmlir)
   1907                         end_time = chapter_time(try_get(
   1908                             contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
   1909                             if next_num < len(contents) else duration
   1910                         if start_time is None or end_time is None:
   1911                             continue
   1912                         chapters.append({
   1913                             'start_time': start_time,
   1914                             'end_time': end_time,
   1915                             'title': get_text(mmlir.get('title')),
   1916                         })
   1917                     if chapters:
   1918                         break
   1919             if chapters:
   1920                 info['chapters'] = chapters
   1921 
   1922             contents = try_get(
   1923                 initial_data,
   1924                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
   1925                 list) or []
   1926             for content in contents:
   1927                 vpir = content.get('videoPrimaryInfoRenderer')
   1928                 if vpir:
   1929                     stl = vpir.get('superTitleLink')
   1930                     if stl:
   1931                         stl = get_text(stl)
   1932                         if try_get(
   1933                                 vpir,
   1934                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
   1935                             info['location'] = stl
   1936                         else:
   1937                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
   1938                             if mobj:
   1939                                 info.update({
   1940                                     'series': mobj.group(1),
   1941                                     'season_number': int(mobj.group(2)),
   1942                                     'episode_number': int(mobj.group(3)),
   1943                                 })
   1944                     for tlb in (try_get(
   1945                             vpir,
   1946                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
   1947                             list) or []):
   1948                         tbr = tlb.get('toggleButtonRenderer') or {}
   1949                         for getter, regex in [(
   1950                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
   1951                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
   1952                                     lambda x: x['accessibility'],
   1953                                     lambda x: x['accessibilityData']['accessibilityData'],
   1954                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
   1955                             label = (try_get(tbr, getter, dict) or {}).get('label')
   1956                             if label:
   1957                                 mobj = re.match(regex, label)
   1958                                 if mobj:
   1959                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
   1960                                     break
   1961                     sbr_tooltip = try_get(
   1962                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
   1963                     if sbr_tooltip:
   1964                         like_count, dislike_count = sbr_tooltip.split(' / ')
   1965                         info.update({
   1966                             'like_count': str_to_int(like_count),
   1967                             'dislike_count': str_to_int(dislike_count),
   1968                         })
   1969                 vsir = content.get('videoSecondaryInfoRenderer')
   1970                 if vsir:
   1971                     info['channel'] = get_text(try_get(
   1972                         vsir,
   1973                         lambda x: x['owner']['videoOwnerRenderer']['title'],
   1974                         dict))
   1975                     rows = try_get(
   1976                         vsir,
   1977                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
   1978                         list) or []
   1979                     multiple_songs = False
   1980                     for row in rows:
   1981                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
   1982                             multiple_songs = True
   1983                             break
   1984                     for row in rows:
   1985                         mrr = row.get('metadataRowRenderer') or {}
   1986                         mrr_title = mrr.get('title')
   1987                         if not mrr_title:
   1988                             continue
   1989                         mrr_title = get_text(mrr['title'])
   1990                         mrr_contents_text = get_text(mrr['contents'][0])
   1991                         if mrr_title == 'License':
   1992                             info['license'] = mrr_contents_text
   1993                         elif not multiple_songs:
   1994                             if mrr_title == 'Album':
   1995                                 info['album'] = mrr_contents_text
   1996                             elif mrr_title == 'Artist':
   1997                                 info['artist'] = mrr_contents_text
   1998                             elif mrr_title == 'Song':
   1999                                 info['track'] = mrr_contents_text
   2000 
   2001         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
   2002             v = info.get(s_k)
   2003             if v:
   2004                 info[d_k] = v
   2005 
   2006         self.mark_watched(video_id, player_response)
   2007 
   2008         return info
   2009 
   2010 
   2011 class YoutubeTabIE(YoutubeBaseInfoExtractor):
   2012     IE_DESC = 'YouTube.com tab'
   2013     _VALID_URL = r'''(?x)
   2014                     https?://
   2015                         (?:\w+\.)?
   2016                         (?:
   2017                             youtube(?:kids)?\.com|
   2018                             invidio\.us
   2019                         )/
   2020                         (?:
   2021                             (?:channel|c|user|feed|hashtag)/|
   2022                             (?:playlist|watch)\?.*?\blist=|
   2023                             (?!(?:watch|embed|v|e)\b)
   2024                         )
   2025                         (?P<id>[^/?\#&]+)
   2026                     '''
   2027     IE_NAME = 'youtube:tab'
   2028 
   2029     _TESTS = [{
   2030         # playlists, multipage
   2031         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
   2032         'playlist_mincount': 94,
   2033         'info_dict': {
   2034             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
   2035             'title': 'Игорь Клейнер - Playlists',
   2036             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
   2037         },
   2038     }, {
   2039         # playlists, multipage, different order
   2040         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
   2041         'playlist_mincount': 94,
   2042         'info_dict': {
   2043             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
   2044             'title': 'Игорь Клейнер - Playlists',
   2045             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
   2046         },
   2047     }, {
   2048         # playlists, series
   2049         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
   2050         'playlist_mincount': 5,
   2051         'info_dict': {
   2052             'id': 'UCYO_jab_esuFRV4b17AJtAw',
   2053             'title': '3Blue1Brown - Playlists',
   2054             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
   2055         },
   2056     }, {
   2057         # playlists, singlepage
   2058         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
   2059         'playlist_mincount': 4,
   2060         'info_dict': {
   2061             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
   2062             'title': 'ThirstForScience - Playlists',
   2063             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
   2064         }
   2065     }, {
   2066         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
   2067         'only_matching': True,
   2068     }, {
   2069         # basic, single video playlist
   2070         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
   2071         'info_dict': {
   2072             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
   2073             'uploader': 'Sergey M.',
   2074             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
   2075             'title': 'youtube-dl public playlist',
   2076         },
   2077         'playlist_count': 1,
   2078     }, {
   2079         # empty playlist
   2080         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
   2081         'info_dict': {
   2082             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
   2083             'uploader': 'Sergey M.',
   2084             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
   2085             'title': 'youtube-dl empty playlist',
   2086         },
   2087         'playlist_count': 0,
   2088     }, {
   2089         # Home tab
   2090         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
   2091         'info_dict': {
   2092             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
   2093             'title': 'lex will - Home',
   2094             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
   2095         },
   2096         'playlist_mincount': 2,
   2097     }, {
   2098         # Videos tab
   2099         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
   2100         'info_dict': {
   2101             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
   2102             'title': 'lex will - Videos',
   2103             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
   2104         },
   2105         'playlist_mincount': 975,
   2106     }, {
   2107         # Videos tab, sorted by popular
   2108         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
   2109         'info_dict': {
   2110             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
   2111             'title': 'lex will - Videos',
   2112             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
   2113         },
   2114         'playlist_mincount': 199,
   2115     }, {
   2116         # Playlists tab
   2117         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
   2118         'info_dict': {
   2119             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
   2120             'title': 'lex will - Playlists',
   2121             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
   2122         },
   2123         'playlist_mincount': 17,
   2124     }, {
   2125         # Community tab
   2126         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
   2127         'info_dict': {
   2128             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
   2129             'title': 'lex will - Community',
   2130             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
   2131         },
   2132         'playlist_mincount': 18,
   2133     }, {
   2134         # Channels tab
   2135         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
   2136         'info_dict': {
   2137             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
   2138             'title': 'lex will - Channels',
   2139             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
   2140         },
   2141         'playlist_mincount': 138,
   2142     }, {
   2143         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
   2144         'only_matching': True,
   2145     }, {
   2146         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
   2147         'only_matching': True,
   2148     }, {
   2149         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
   2150         'only_matching': True,
   2151     }, {
   2152         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
   2153         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
   2154         'info_dict': {
   2155             'title': '29C3: Not my department',
   2156             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
   2157             'uploader': 'Christiaan008',
   2158             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
   2159         },
   2160         'playlist_count': 96,
   2161     }, {
   2162         'note': 'Large playlist',
   2163         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
   2164         'info_dict': {
   2165             'title': 'Uploads from Cauchemar',
   2166             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
   2167             'uploader': 'Cauchemar',
   2168             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
   2169         },
   2170         'playlist_mincount': 1123,
   2171     }, {
   2172         # even larger playlist, 8832 videos
   2173         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
   2174         'only_matching': True,
   2175     }, {
   2176         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
   2177         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
   2178         'info_dict': {
   2179             'title': 'Uploads from Interstellar Movie',
   2180             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
   2181             'uploader': 'Interstellar Movie',
   2182             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
   2183         },
   2184         'playlist_mincount': 21,
   2185     }, {
   2186         # https://github.com/ytdl-org/youtube-dl/issues/21844
   2187         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
   2188         'info_dict': {
   2189             'title': 'Data Analysis with Dr Mike Pound',
   2190             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
   2191             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
   2192             'uploader': 'Computerphile',
   2193         },
   2194         'playlist_mincount': 11,
   2195     }, {
   2196         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
   2197         'only_matching': True,
   2198     }, {
   2199         # Playlist URL that does not actually serve a playlist
   2200         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
   2201         'info_dict': {
   2202             'id': 'FqZTN594JQw',
   2203             'ext': 'webm',
   2204             'title': "Smiley's People 01 detective, Adventure Series, Action",
   2205             'uploader': 'STREEM',
   2206             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
   2207             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
   2208             'upload_date': '20150526',
   2209             'license': 'Standard YouTube License',
   2210             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
   2211             'categories': ['People & Blogs'],
   2212             'tags': list,
   2213             'view_count': int,
   2214             'like_count': int,
   2215             'dislike_count': int,
   2216         },
   2217         'params': {
   2218             'skip_download': True,
   2219         },
   2220         'skip': 'This video is not available.',
   2221         'add_ie': [YoutubeIE.ie_key()],
   2222     }, {
   2223         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
   2224         'only_matching': True,
   2225     }, {
   2226         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
   2227         'only_matching': True,
   2228     }, {
   2229         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
   2230         'info_dict': {
   2231             'id': '9Auq9mYxFEE',
   2232             'ext': 'mp4',
   2233             'title': 'Watch Sky News live',
   2234             'uploader': 'Sky News',
   2235             'uploader_id': 'skynews',
   2236             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
   2237             'upload_date': '20191102',
   2238             'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
   2239             'categories': ['News & Politics'],
   2240             'tags': list,
   2241             'like_count': int,
   2242             'dislike_count': int,
   2243         },
   2244         'params': {
   2245             'skip_download': True,
   2246         },
   2247     }, {
   2248         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
   2249         'info_dict': {
   2250             'id': 'a48o2S1cPoo',
   2251             'ext': 'mp4',
   2252             'title': 'The Young Turks - Live Main Show',
   2253             'uploader': 'The Young Turks',
   2254             'uploader_id': 'TheYoungTurks',
   2255             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
   2256             'upload_date': '20150715',
   2257             'license': 'Standard YouTube License',
   2258             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
   2259             'categories': ['News & Politics'],
   2260             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
   2261             'like_count': int,
   2262             'dislike_count': int,
   2263         },
   2264         'params': {
   2265             'skip_download': True,
   2266         },
   2267         'only_matching': True,
   2268     }, {
   2269         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
   2270         'only_matching': True,
   2271     }, {
   2272         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
   2273         'only_matching': True,
   2274     }, {
   2275         'url': 'https://www.youtube.com/feed/trending',
   2276         'only_matching': True,
   2277     }, {
   2278         # needs auth
   2279         'url': 'https://www.youtube.com/feed/library',
   2280         'only_matching': True,
   2281     }, {
   2282         # needs auth
   2283         'url': 'https://www.youtube.com/feed/history',
   2284         'only_matching': True,
   2285     }, {
   2286         # needs auth
   2287         'url': 'https://www.youtube.com/feed/subscriptions',
   2288         'only_matching': True,
   2289     }, {
   2290         # needs auth
   2291         'url': 'https://www.youtube.com/feed/watch_later',
   2292         'only_matching': True,
   2293     }, {
   2294         # no longer available?
   2295         'url': 'https://www.youtube.com/feed/recommended',
   2296         'only_matching': True,
   2297     }, {
   2298         # inline playlist with not always working continuations
   2299         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
   2300         'only_matching': True,
   2301     }, {
   2302         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
   2303         'only_matching': True,
   2304     }, {
   2305         'url': 'https://www.youtube.com/course',
   2306         'only_matching': True,
   2307     }, {
   2308         'url': 'https://www.youtube.com/zsecurity',
   2309         'only_matching': True,
   2310     }, {
   2311         'url': 'http://www.youtube.com/NASAgovVideo/videos',
   2312         'only_matching': True,
   2313     }, {
   2314         'url': 'https://www.youtube.com/TheYoungTurks/live',
   2315         'only_matching': True,
   2316     }, {
   2317         'url': 'https://www.youtube.com/hashtag/cctv9',
   2318         'info_dict': {
   2319             'id': 'cctv9',
   2320             'title': '#cctv9',
   2321         },
   2322         'playlist_mincount': 350,
   2323     }, {
   2324         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
   2325         'only_matching': True,
   2326     }]
   2327 
   2328     @classmethod
   2329     def suitable(cls, url):
   2330         return False if YoutubeIE.suitable(url) else super(
   2331             YoutubeTabIE, cls).suitable(url)
   2332 
   2333     def _extract_channel_id(self, webpage):
   2334         channel_id = self._html_search_meta(
   2335             'channelId', webpage, 'channel id', default=None)
   2336         if channel_id:
   2337             return channel_id
   2338         channel_url = self._html_search_meta(
   2339             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
   2340              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
   2341              'twitter:app:url:googleplay'), webpage, 'channel url')
   2342         return self._search_regex(
   2343             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
   2344             channel_url, 'channel id')
   2345 
   2346     @staticmethod
   2347     def _extract_grid_item_renderer(item):
   2348         assert isinstance(item, dict)
   2349         for key, renderer in item.items():
   2350             if not key.startswith('grid') or not key.endswith('Renderer'):
   2351                 continue
   2352             if not isinstance(renderer, dict):
   2353                 continue
   2354             return renderer
   2355 
   2356     def _grid_entries(self, grid_renderer):
   2357         for item in grid_renderer['items']:
   2358             if not isinstance(item, dict):
   2359                 continue
   2360             renderer = self._extract_grid_item_renderer(item)
   2361             if not isinstance(renderer, dict):
   2362                 continue
   2363             title = try_get(
   2364                 renderer, (lambda x: x['title']['runs'][0]['text'],
   2365                            lambda x: x['title']['simpleText']), compat_str)
   2366             # playlist
   2367             playlist_id = renderer.get('playlistId')
   2368             if playlist_id:
   2369                 yield self.url_result(
   2370                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
   2371                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
   2372                     video_title=title)
   2373                 continue
   2374             # video
   2375             video_id = renderer.get('videoId')
   2376             if video_id:
   2377                 yield self._extract_video(renderer)
   2378                 continue
   2379             # channel
   2380             channel_id = renderer.get('channelId')
   2381             if channel_id:
   2382                 title = try_get(
   2383                     renderer, lambda x: x['title']['simpleText'], compat_str)
   2384                 yield self.url_result(
   2385                     'https://www.youtube.com/channel/%s' % channel_id,
   2386                     ie=YoutubeTabIE.ie_key(), video_title=title)
   2387                 continue
   2388             # generic endpoint URL support
   2389             ep_url = urljoin('https://www.youtube.com/', try_get(
   2390                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
   2391                 compat_str))
   2392             if ep_url:
   2393                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
   2394                     if ie.suitable(ep_url):
   2395                         yield self.url_result(
   2396                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
   2397                         break
   2398 
   2399     def _shelf_entries_from_content(self, shelf_renderer):
   2400         content = shelf_renderer.get('content')
   2401         if not isinstance(content, dict):
   2402             return
   2403         renderer = content.get('gridRenderer')
   2404         if renderer:
   2405             # TODO: add support for nested playlists so each shelf is processed
   2406             # as separate playlist
   2407             # TODO: this includes only first N items
   2408             for entry in self._grid_entries(renderer):
   2409                 yield entry
   2410         renderer = content.get('horizontalListRenderer')
   2411         if renderer:
   2412             # TODO
   2413             pass
   2414 
   2415     def _shelf_entries(self, shelf_renderer, skip_channels=False):
   2416         ep = try_get(
   2417             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
   2418             compat_str)
   2419         shelf_url = urljoin('https://www.youtube.com', ep)
   2420         if shelf_url:
   2421             # Skipping links to another channels, note that checking for
   2422             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
   2423             # will not work
   2424             if skip_channels and '/channels?' in shelf_url:
   2425                 return
   2426             title = try_get(
   2427                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
   2428             yield self.url_result(shelf_url, video_title=title)
   2429         # Shelf may not contain shelf URL, fallback to extraction from content
   2430         for entry in self._shelf_entries_from_content(shelf_renderer):
   2431             yield entry
   2432 
   2433     def _playlist_entries(self, video_list_renderer):
   2434         for content in video_list_renderer['contents']:
   2435             if not isinstance(content, dict):
   2436                 continue
   2437             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
   2438             if not isinstance(renderer, dict):
   2439                 continue
   2440             video_id = renderer.get('videoId')
   2441             if not video_id:
   2442                 continue
   2443             yield self._extract_video(renderer)
   2444 
   2445     def _video_entry(self, video_renderer):
   2446         video_id = video_renderer.get('videoId')
   2447         if video_id:
   2448             return self._extract_video(video_renderer)
   2449 
   2450     def _post_thread_entries(self, post_thread_renderer):
   2451         post_renderer = try_get(
   2452             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
   2453         if not post_renderer:
   2454             return
   2455         # video attachment
   2456         video_renderer = try_get(
   2457             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
   2458         video_id = None
   2459         if video_renderer:
   2460             entry = self._video_entry(video_renderer)
   2461             if entry:
   2462                 yield entry
   2463         # inline video links
   2464         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
   2465         for run in runs:
   2466             if not isinstance(run, dict):
   2467                 continue
   2468             ep_url = try_get(
   2469                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
   2470             if not ep_url:
   2471                 continue
   2472             if not YoutubeIE.suitable(ep_url):
   2473                 continue
   2474             ep_video_id = YoutubeIE._match_id(ep_url)
   2475             if video_id == ep_video_id:
   2476                 continue
   2477             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
   2478 
   2479     def _post_thread_continuation_entries(self, post_thread_continuation):
   2480         contents = post_thread_continuation.get('contents')
   2481         if not isinstance(contents, list):
   2482             return
   2483         for content in contents:
   2484             renderer = content.get('backstagePostThreadRenderer')
   2485             if not isinstance(renderer, dict):
   2486                 continue
   2487             for entry in self._post_thread_entries(renderer):
   2488                 yield entry
   2489 
   2490     def _rich_grid_entries(self, contents):
   2491         for content in contents:
   2492             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
   2493             if video_renderer:
   2494                 entry = self._video_entry(video_renderer)
   2495                 if entry:
   2496                     yield entry
   2497 
   2498     @staticmethod
   2499     def _build_continuation_query(continuation, ctp=None):
   2500         query = {
   2501             'ctoken': continuation,
   2502             'continuation': continuation,
   2503         }
   2504         if ctp:
   2505             query['itct'] = ctp
   2506         return query
   2507 
   2508     @staticmethod
   2509     def _extract_next_continuation_data(renderer):
   2510         next_continuation = try_get(
   2511             renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
   2512         if not next_continuation:
   2513             return
   2514         continuation = next_continuation.get('continuation')
   2515         if not continuation:
   2516             return
   2517         ctp = next_continuation.get('clickTrackingParams')
   2518         return YoutubeTabIE._build_continuation_query(continuation, ctp)
   2519 
   2520     @classmethod
   2521     def _extract_continuation(cls, renderer):
   2522         next_continuation = cls._extract_next_continuation_data(renderer)
   2523         if next_continuation:
   2524             return next_continuation
   2525         contents = []
   2526         for key in ('contents', 'items'):
   2527             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
   2528         for content in contents:
   2529             if not isinstance(content, dict):
   2530                 continue
   2531             continuation_ep = try_get(
   2532                 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
   2533                 dict)
   2534             if not continuation_ep:
   2535                 continue
   2536             continuation = try_get(
   2537                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
   2538             if not continuation:
   2539                 continue
   2540             ctp = continuation_ep.get('clickTrackingParams')
   2541             return YoutubeTabIE._build_continuation_query(continuation, ctp)
   2542 
   2543     def _entries(self, tab, item_id, webpage):
   2544         tab_content = try_get(tab, lambda x: x['content'], dict)
   2545         if not tab_content:
   2546             return
   2547         slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
   2548         if slr_renderer:
   2549             is_channels_tab = tab.get('title') == 'Channels'
   2550             continuation = None
   2551             slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
   2552             for slr_content in slr_contents:
   2553                 if not isinstance(slr_content, dict):
   2554                     continue
   2555                 is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
   2556                 if not is_renderer:
   2557                     continue
   2558                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
   2559                 for isr_content in isr_contents:
   2560                     if not isinstance(isr_content, dict):
   2561                         continue
   2562                     renderer = isr_content.get('playlistVideoListRenderer')
   2563                     if renderer:
   2564                         for entry in self._playlist_entries(renderer):
   2565                             yield entry
   2566                         continuation = self._extract_continuation(renderer)
   2567                         continue
   2568                     renderer = isr_content.get('gridRenderer')
   2569                     if renderer:
   2570                         for entry in self._grid_entries(renderer):
   2571                             yield entry
   2572                         continuation = self._extract_continuation(renderer)
   2573                         continue
   2574                     renderer = isr_content.get('shelfRenderer')
   2575                     if renderer:
   2576                         for entry in self._shelf_entries(renderer, not is_channels_tab):
   2577                             yield entry
   2578                         continue
   2579                     renderer = isr_content.get('backstagePostThreadRenderer')
   2580                     if renderer:
   2581                         for entry in self._post_thread_entries(renderer):
   2582                             yield entry
   2583                         continuation = self._extract_continuation(renderer)
   2584                         continue
   2585                     renderer = isr_content.get('videoRenderer')
   2586                     if renderer:
   2587                         entry = self._video_entry(renderer)
   2588                         if entry:
   2589                             yield entry
   2590 
   2591                 if not continuation:
   2592                     continuation = self._extract_continuation(is_renderer)
   2593             if not continuation:
   2594                 continuation = self._extract_continuation(slr_renderer)
   2595         else:
   2596             rich_grid_renderer = tab_content.get('richGridRenderer')
   2597             if not rich_grid_renderer:
   2598                 return
   2599             for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
   2600                 yield entry
   2601             continuation = self._extract_continuation(rich_grid_renderer)
   2602 
   2603         ytcfg = self._extract_ytcfg(item_id, webpage)
   2604         client_version = try_get(
   2605             ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'
   2606 
   2607         headers = {
   2608             'x-youtube-client-name': '1',
   2609             'x-youtube-client-version': client_version,
   2610             'content-type': 'application/json',
   2611         }
   2612 
   2613         context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
   2614             'client': {
   2615                 'clientName': 'WEB',
   2616                 'clientVersion': client_version,
   2617             }
   2618         }
   2619         visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
   2620 
   2621         identity_token = self._extract_identity_token(ytcfg, webpage)
   2622         if identity_token:
   2623             headers['x-youtube-identity-token'] = identity_token
   2624 
   2625         data = {
   2626             'context': context,
   2627         }
   2628 
   2629         for page_num in itertools.count(1):
   2630             if not continuation:
   2631                 break
   2632             if visitor_data:
   2633                 headers['x-goog-visitor-id'] = visitor_data
   2634             data['continuation'] = continuation['continuation']
   2635             data['clickTracking'] = {
   2636                 'clickTrackingParams': continuation['itct']
   2637             }
   2638             count = 0
   2639             retries = 3
   2640             while count <= retries:
   2641                 try:
   2642                     # Downloading page may result in intermittent 5xx HTTP error
   2643                     # that is usually worked around with a retry
   2644                     response = self._download_json(
   2645                         'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
   2646                         None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
   2647                         headers=headers, data=json.dumps(data).encode('utf8'))
   2648                     break
   2649                 except ExtractorError as e:
   2650                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
   2651                         count += 1
   2652                         if count <= retries:
   2653                             continue
   2654                     raise
   2655             if not response:
   2656                 break
   2657 
   2658             visitor_data = try_get(
   2659                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
   2660 
   2661             continuation_contents = try_get(
   2662                 response, lambda x: x['continuationContents'], dict)
   2663             if continuation_contents:
   2664                 continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
   2665                 if continuation_renderer:
   2666                     for entry in self._playlist_entries(continuation_renderer):
   2667                         yield entry
   2668                     continuation = self._extract_continuation(continuation_renderer)
   2669                     continue
   2670                 continuation_renderer = continuation_contents.get('gridContinuation')
   2671                 if continuation_renderer:
   2672                     for entry in self._grid_entries(continuation_renderer):
   2673                         yield entry
   2674                     continuation = self._extract_continuation(continuation_renderer)
   2675                     continue
   2676                 continuation_renderer = continuation_contents.get('itemSectionContinuation')
   2677                 if continuation_renderer:
   2678                     for entry in self._post_thread_continuation_entries(continuation_renderer):
   2679                         yield entry
   2680                     continuation = self._extract_continuation(continuation_renderer)
   2681                     continue
   2682 
   2683             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
   2684             continuation_items = try_get(
   2685                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
   2686             if continuation_items:
   2687                 continuation_item = continuation_items[0]
   2688                 if not isinstance(continuation_item, dict):
   2689                     continue
   2690                 renderer = self._extract_grid_item_renderer(continuation_item)
   2691                 if renderer:
   2692                     grid_renderer = {'items': continuation_items}
   2693                     for entry in self._grid_entries(grid_renderer):
   2694                         yield entry
   2695                     continuation = self._extract_continuation(grid_renderer)
   2696                     continue
   2697                 renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
   2698                 if renderer:
   2699                     video_list_renderer = {'contents': continuation_items}
   2700                     for entry in self._playlist_entries(video_list_renderer):
   2701                         yield entry
   2702                     continuation = self._extract_continuation(video_list_renderer)
   2703                     continue
   2704                 renderer = continuation_item.get('backstagePostThreadRenderer')
   2705                 if renderer:
   2706                     continuation_renderer = {'contents': continuation_items}
   2707                     for entry in self._post_thread_continuation_entries(continuation_renderer):
   2708                         yield entry
   2709                     continuation = self._extract_continuation(continuation_renderer)
   2710                     continue
   2711                 renderer = continuation_item.get('richItemRenderer')
   2712                 if renderer:
   2713                     for entry in self._rich_grid_entries(continuation_items):
   2714                         yield entry
   2715                     continuation = self._extract_continuation({'contents': continuation_items})
   2716                     continue
   2717 
   2718             break
   2719 
   2720     @staticmethod
   2721     def _extract_selected_tab(tabs):
   2722         for tab in tabs:
   2723             if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
   2724                 return tab['tabRenderer']
   2725         else:
   2726             raise ExtractorError('Unable to find selected tab')
   2727 
   2728     @staticmethod
   2729     def _extract_uploader(data):
   2730         uploader = {}
   2731         sidebar_renderer = try_get(
   2732             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
   2733         if sidebar_renderer:
   2734             for item in sidebar_renderer:
   2735                 if not isinstance(item, dict):
   2736                     continue
   2737                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
   2738                 if not isinstance(renderer, dict):
   2739                     continue
   2740                 owner = try_get(
   2741                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
   2742                 if owner:
   2743                     uploader['uploader'] = owner.get('text')
   2744                     uploader['uploader_id'] = try_get(
   2745                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
   2746                     uploader['uploader_url'] = urljoin(
   2747                         'https://www.youtube.com/',
   2748                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
   2749         return uploader
   2750 
   2751     @staticmethod
   2752     def _extract_alert(data):
   2753         alerts = []
   2754         for alert in try_get(data, lambda x: x['alerts'], list) or []:
   2755             if not isinstance(alert, dict):
   2756                 continue
   2757             alert_text = try_get(
   2758                 alert, lambda x: x['alertRenderer']['text'], dict)
   2759             if not alert_text:
   2760                 continue
   2761             text = try_get(
   2762                 alert_text,
   2763                 (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
   2764                 compat_str)
   2765             if text:
   2766                 alerts.append(text)
   2767         return '\n'.join(alerts)
   2768 
   2769     def _extract_from_tabs(self, item_id, webpage, data, tabs):
   2770         selected_tab = self._extract_selected_tab(tabs)
   2771         renderer = try_get(
   2772             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
   2773         playlist_id = item_id
   2774         title = description = None
   2775         if renderer:
   2776             channel_title = renderer.get('title') or item_id
   2777             tab_title = selected_tab.get('title')
   2778             title = channel_title or item_id
   2779             if tab_title:
   2780                 title += ' - %s' % tab_title
   2781             description = renderer.get('description')
   2782             playlist_id = renderer.get('externalId')
   2783         else:
   2784             renderer = try_get(
   2785                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
   2786             if renderer:
   2787                 title = renderer.get('title')
   2788             else:
   2789                 renderer = try_get(
   2790                     data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
   2791                 if renderer:
   2792                     title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
   2793         playlist = self.playlist_result(
   2794             self._entries(selected_tab, item_id, webpage),
   2795             playlist_id=playlist_id, playlist_title=title,
   2796             playlist_description=description)
   2797         playlist.update(self._extract_uploader(data))
   2798         return playlist
   2799 
   2800     def _extract_from_playlist(self, item_id, url, data, playlist):
   2801         title = playlist.get('title') or try_get(
   2802             data, lambda x: x['titleText']['simpleText'], compat_str)
   2803         playlist_id = playlist.get('playlistId') or item_id
   2804         # Inline playlist rendition continuation does not always work
   2805         # at Youtube side, so delegating regular tab-based playlist URL
   2806         # processing whenever possible.
   2807         playlist_url = urljoin(url, try_get(
   2808             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
   2809             compat_str))
   2810         if playlist_url and playlist_url != url:
   2811             return self.url_result(
   2812                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
   2813                 video_title=title)
   2814         return self.playlist_result(
   2815             self._playlist_entries(playlist), playlist_id=playlist_id,
   2816             playlist_title=title)
   2817 
   2818     def _extract_identity_token(self, ytcfg, webpage):
   2819         if ytcfg:
   2820             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
   2821             if token:
   2822                 return token
   2823         return self._search_regex(
   2824             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
   2825             'identity token', default=None)
   2826 
   2827     def _real_extract(self, url):
   2828         item_id = self._match_id(url)
   2829         url = compat_urlparse.urlunparse(
   2830             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
   2831         # Handle both video/playlist URLs
   2832         qs = parse_qs(url)
   2833         video_id = qs.get('v', [None])[0]
   2834         playlist_id = qs.get('list', [None])[0]
   2835         if video_id and playlist_id:
   2836             if self._downloader.params.get('noplaylist'):
   2837                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
   2838                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
   2839             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
   2840         webpage = self._download_webpage(url, item_id)
   2841         data = self._extract_yt_initial_data(item_id, webpage)
   2842         tabs = try_get(
   2843             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
   2844         if tabs:
   2845             return self._extract_from_tabs(item_id, webpage, data, tabs)
   2846         playlist = try_get(
   2847             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
   2848         if playlist:
   2849             return self._extract_from_playlist(item_id, url, data, playlist)
   2850         # Fallback to video extraction if no playlist alike page is recognized.
   2851         # First check for the current video then try the v attribute of URL query.
   2852         video_id = try_get(
   2853             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
   2854             compat_str) or video_id
   2855         if video_id:
   2856             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
   2857         # Capture and output alerts
   2858         alert = self._extract_alert(data)
   2859         if alert:
   2860             raise ExtractorError(alert, expected=True)
   2861         # Failed to recognize
   2862         raise ExtractorError('Unable to recognize tab page')
   2863 
   2864 
   2865 class YoutubePlaylistIE(InfoExtractor):
   2866     IE_DESC = 'YouTube.com playlists'
   2867     _VALID_URL = r'''(?x)(?:
   2868                         (?:https?://)?
   2869                         (?:\w+\.)?
   2870                         (?:
   2871                             (?:
   2872                                 youtube(?:kids)?\.com|
   2873                                 invidio\.us
   2874                             )
   2875                             /.*?\?.*?\blist=
   2876                         )?
   2877                         (?P<id>%(playlist_id)s)
   2878                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
   2879     IE_NAME = 'youtube:playlist'
   2880     _TESTS = [{
   2881         'note': 'issue #673',
   2882         'url': 'PLBB231211A4F62143',
   2883         'info_dict': {
   2884             'title': '[OLD]Team Fortress 2 (Class-based LP)',
   2885             'id': 'PLBB231211A4F62143',
   2886             'uploader': 'Wickydoo',
   2887             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
   2888         },
   2889         'playlist_mincount': 29,
   2890     }, {
   2891         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
   2892         'info_dict': {
   2893             'title': 'YDL_safe_search',
   2894             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
   2895         },
   2896         'playlist_count': 2,
   2897         'skip': 'This playlist is private',
   2898     }, {
   2899         'note': 'embedded',
   2900         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
   2901         'playlist_count': 4,
   2902         'info_dict': {
   2903             'title': 'JODA15',
   2904             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
   2905             'uploader': 'milan',
   2906             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
   2907         }
   2908     }, {
   2909         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
   2910         'playlist_mincount': 982,
   2911         'info_dict': {
   2912             'title': '2018 Chinese New Singles (11/6 updated)',
   2913             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
   2914             'uploader': 'LBK',
   2915             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
   2916         }
   2917     }, {
   2918         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
   2919         'only_matching': True,
   2920     }, {
   2921         # music album playlist
   2922         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
   2923         'only_matching': True,
   2924     }]
   2925 
   2926     @classmethod
   2927     def suitable(cls, url):
   2928         if YoutubeTabIE.suitable(url):
   2929             return False
   2930         # Hack for lazy extractors until more generic solution is implemented
   2931         # (see #28780)
   2932         from .youtube import parse_qs
   2933         qs = parse_qs(url)
   2934         if qs.get('v', [None])[0]:
   2935             return False
   2936         return super(YoutubePlaylistIE, cls).suitable(url)
   2937 
   2938     def _real_extract(self, url):
   2939         playlist_id = self._match_id(url)
   2940         qs = parse_qs(url)
   2941         if not qs:
   2942             qs = {'list': playlist_id}
   2943         return self.url_result(
   2944             update_url_query('https://www.youtube.com/playlist', qs),
   2945             ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
   2946 
   2947 
   2948 class YoutubeYtBeIE(InfoExtractor):
   2949     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
   2950     _TESTS = [{
   2951         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
   2952         'info_dict': {
   2953             'id': 'yeWKywCrFtk',
   2954             'ext': 'mp4',
   2955             'title': 'Small Scale Baler and Braiding Rugs',
   2956             'uploader': 'Backus-Page House Museum',
   2957             'uploader_id': 'backuspagemuseum',
   2958             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
   2959             'upload_date': '20161008',
   2960             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
   2961             'categories': ['Nonprofits & Activism'],
   2962             'tags': list,
   2963             'like_count': int,
   2964             'dislike_count': int,
   2965         },
   2966         'params': {
   2967             'noplaylist': True,
   2968             'skip_download': True,
   2969         },
   2970     }, {
   2971         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
   2972         'only_matching': True,
   2973     }]
   2974 
   2975     def _real_extract(self, url):
   2976         mobj = re.match(self._VALID_URL, url)
   2977         video_id = mobj.group('id')
   2978         playlist_id = mobj.group('playlist_id')
   2979         return self.url_result(
   2980             update_url_query('https://www.youtube.com/watch', {
   2981                 'v': video_id,
   2982                 'list': playlist_id,
   2983                 'feature': 'youtu.be',
   2984             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
   2985 
   2986 
   2987 class YoutubeYtUserIE(InfoExtractor):
   2988     _VALID_URL = r'ytuser:(?P<id>.+)'
   2989     _TESTS = [{
   2990         'url': 'ytuser:phihag',
   2991         'only_matching': True,
   2992     }]
   2993 
   2994     def _real_extract(self, url):
   2995         user_id = self._match_id(url)
   2996         return self.url_result(
   2997             'https://www.youtube.com/user/%s' % user_id,
   2998             ie=YoutubeTabIE.ie_key(), video_id=user_id)
   2999 
   3000 
   3001 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
   3002     IE_NAME = 'youtube:favorites'
   3003     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
   3004     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
   3005     _LOGIN_REQUIRED = True
   3006     _TESTS = [{
   3007         'url': ':ytfav',
   3008         'only_matching': True,
   3009     }, {
   3010         'url': ':ytfavorites',
   3011         'only_matching': True,
   3012     }]
   3013 
   3014     def _real_extract(self, url):
   3015         return self.url_result(
   3016             'https://www.youtube.com/playlist?list=LL',
   3017             ie=YoutubeTabIE.ie_key())
   3018 
   3019 
   3020 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
   3021     IE_DESC = 'YouTube.com searches'
   3022     # there doesn't appear to be a real limit, for example if you search for
   3023     # 'python' you get more than 8.000.000 results
   3024     _MAX_RESULTS = float('inf')
   3025     IE_NAME = 'youtube:search'
   3026     _SEARCH_KEY = 'ytsearch'
   3027     _SEARCH_PARAMS = None
   3028     _TESTS = []
   3029 
   3030     def _entries(self, query, n):
   3031         data = {
   3032             'context': {
   3033                 'client': {
   3034                     'clientName': 'WEB',
   3035                     'clientVersion': '2.20201021.03.00',
   3036                 }
   3037             },
   3038             'query': query,
   3039         }
   3040         if self._SEARCH_PARAMS:
   3041             data['params'] = self._SEARCH_PARAMS
   3042         total = 0
   3043         for page_num in itertools.count(1):
   3044             search = self._download_json(
   3045                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
   3046                 video_id='query "%s"' % query,
   3047                 note='Downloading page %s' % page_num,
   3048                 errnote='Unable to download API page', fatal=False,
   3049                 data=json.dumps(data).encode('utf8'),
   3050                 headers={'content-type': 'application/json'})
   3051             if not search:
   3052                 break
   3053             slr_contents = try_get(
   3054                 search,
   3055                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
   3056                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
   3057                 list)
   3058             if not slr_contents:
   3059                 break
   3060             for slr_content in slr_contents:
   3061                 isr_contents = try_get(
   3062                     slr_content,
   3063                     lambda x: x['itemSectionRenderer']['contents'],
   3064                     list)
   3065                 if not isr_contents:
   3066                     continue
   3067                 for content in isr_contents:
   3068                     if not isinstance(content, dict):
   3069                         continue
   3070                     video = content.get('videoRenderer')
   3071                     if not isinstance(video, dict):
   3072                         continue
   3073                     video_id = video.get('videoId')
   3074                     if not video_id:
   3075                         continue
   3076                     yield self._extract_video(video)
   3077                     total += 1
   3078                     if total == n:
   3079                         return
   3080             token = try_get(
   3081                 slr_contents,
   3082                 lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
   3083                 compat_str)
   3084             if not token:
   3085                 break
   3086             data['continuation'] = token
   3087 
   3088     def _get_n_results(self, query, n):
   3089         """Get a specified number of results for a query"""
   3090         return self.playlist_result(self._entries(query, n), query)
   3091 
   3092 
   3093 class YoutubeSearchDateIE(YoutubeSearchIE):
   3094     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
   3095     _SEARCH_KEY = 'ytsearchdate'
   3096     IE_DESC = 'YouTube.com searches, newest videos first'
   3097     _SEARCH_PARAMS = 'CAI%3D'
   3098 
   3099 
   3100 r"""
   3101 class YoutubeSearchURLIE(YoutubeSearchIE):
   3102     IE_DESC = 'YouTube.com search URLs'
   3103     IE_NAME = 'youtube:search_url'
   3104     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
   3105     _TESTS = [{
   3106         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
   3107         'playlist_mincount': 5,
   3108         'info_dict': {
   3109             'title': 'youtube-dl test video',
   3110         }
   3111     }, {
   3112         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
   3113         'only_matching': True,
   3114     }]
   3115 
   3116     def _real_extract(self, url):
   3117         mobj = re.match(self._VALID_URL, url)
   3118         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
   3119         webpage = self._download_webpage(url, query)
   3120         return self.playlist_result(self._process_page(webpage), playlist_title=query)
   3121 """
   3122 
   3123 
   3124 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
   3125     """
   3126     Base class for feed extractors
   3127     Subclasses must define the _FEED_NAME property.
   3128     """
   3129     _LOGIN_REQUIRED = True
   3130 
   3131     @property
   3132     def IE_NAME(self):
   3133         return 'youtube:%s' % self._FEED_NAME
   3134 
   3135     def _real_initialize(self):
   3136         self._login()
   3137 
   3138     def _real_extract(self, url):
   3139         return self.url_result(
   3140             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
   3141             ie=YoutubeTabIE.ie_key())
   3142 
   3143 
   3144 class YoutubeWatchLaterIE(InfoExtractor):
   3145     IE_NAME = 'youtube:watchlater'
   3146     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
   3147     _VALID_URL = r':ytwatchlater'
   3148     _TESTS = [{
   3149         'url': ':ytwatchlater',
   3150         'only_matching': True,
   3151     }]
   3152 
   3153     def _real_extract(self, url):
   3154         return self.url_result(
   3155             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
   3156 
   3157 
   3158 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
   3159     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
   3160     _VALID_URL = r':ytrec(?:ommended)?'
   3161     _FEED_NAME = 'recommended'
   3162     _TESTS = [{
   3163         'url': ':ytrec',
   3164         'only_matching': True,
   3165     }, {
   3166         'url': ':ytrecommended',
   3167         'only_matching': True,
   3168     }]
   3169 
   3170 
   3171 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
   3172     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
   3173     _VALID_URL = r':ytsubs(?:criptions)?'
   3174     _FEED_NAME = 'subscriptions'
   3175     _TESTS = [{
   3176         'url': ':ytsubs',
   3177         'only_matching': True,
   3178     }, {
   3179         'url': ':ytsubscriptions',
   3180         'only_matching': True,
   3181     }]
   3182 
   3183 
   3184 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
   3185     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
   3186     _VALID_URL = r':ythistory'
   3187     _FEED_NAME = 'history'
   3188     _TESTS = [{
   3189         'url': ':ythistory',
   3190         'only_matching': True,
   3191     }]
   3192 
   3193 
   3194 class YoutubeTruncatedURLIE(InfoExtractor):
   3195     IE_NAME = 'youtube:truncated_url'
   3196     IE_DESC = False  # Do not list
   3197     _VALID_URL = r'''(?x)
   3198         (?:https?://)?
   3199         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
   3200         (?:watch\?(?:
   3201             feature=[a-z_]+|
   3202             annotation_id=annotation_[^&]+|
   3203             x-yt-cl=[0-9]+|
   3204             hl=[^&]*|
   3205             t=[0-9]+
   3206         )?
   3207         |
   3208             attribution_link\?a=[^&]+
   3209         )
   3210         $
   3211     '''
   3212 
   3213     _TESTS = [{
   3214         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
   3215         'only_matching': True,
   3216     }, {
   3217         'url': 'https://www.youtube.com/watch?',
   3218         'only_matching': True,
   3219     }, {
   3220         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
   3221         'only_matching': True,
   3222     }, {
   3223         'url': 'https://www.youtube.com/watch?feature=foo',
   3224         'only_matching': True,
   3225     }, {
   3226         'url': 'https://www.youtube.com/watch?hl=en-GB',
   3227         'only_matching': True,
   3228     }, {
   3229         'url': 'https://www.youtube.com/watch?t=2372',
   3230         'only_matching': True,
   3231     }]
   3232 
   3233     def _real_extract(self, url):
   3234         raise ExtractorError(
   3235             'Did you forget to quote the URL? Remember that & is a meta '
   3236             'character in most shells, so you want to put the URL in quotes, '
   3237             'like  youtube-dl '
   3238             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
   3239             ' or simply  youtube-dl BaW_jenozKc  .',
   3240             expected=True)
   3241 
   3242 
   3243 class YoutubeTruncatedIDIE(InfoExtractor):
   3244     IE_NAME = 'youtube:truncated_id'
   3245     IE_DESC = False  # Do not list
   3246     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
   3247 
   3248     _TESTS = [{
   3249         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
   3250         'only_matching': True,
   3251     }]
   3252 
   3253     def _real_extract(self, url):
   3254         video_id = self._match_id(url)
   3255         raise ExtractorError(
   3256             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
   3257             expected=True)
	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE