[YouTube] Fix nsig processing for player `b22ef6e7`
authordirkf <fieldhouse@gmx.net>
Wed, 10 Jul 2024 17:20:59 +0000 (18:20 +0100)
committerdirkf <fieldhouse@gmx.net>
Wed, 10 Jul 2024 23:50:46 +0000 (00:50 +0100)
* improve extraction of function name (like yt-dlp/yt-dlp#10390)
* always use JSInterp to extract function code (yt-dlp/yt-dlp#10396, thx seproDev, pukkandan)

test/test_youtube_signature.py
youtube_dl/extractor/youtube.py

index cafba7a5cddab38c48864c1792a8e0cba9c11673..cc18d0f7be30ba0d6ad14ec1fbf82ef550841a31 100644 (file)
@@ -162,6 +162,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
         '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
     ),
+    (
+        'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
+        'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
+    ),
 ]
 
 
index 90c16e172bd7fa3303a6089b74f06789b68dd1da..2e31a89798e96e62dad9b09d7501a8249ecf5091 100644 (file)
@@ -1636,7 +1636,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         try:
             jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
         except ExtractorError as e:
-            raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e)
+            raise ExtractorError('Unable to extract nsig function code', cause=e)
         if self.get_param('youtube_print_sig_code'):
             self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
                 player_id, func_code[1]))
@@ -1658,8 +1658,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _extract_n_function_name(self, jscode):
         func_name, idx = self._search_regex(
-            r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?\([\w$]+\)',
-            jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+            # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
+            # old: .get("n"))&&(b=nfunc[idx](b)
+            # older: .get("n"))&&(b=nfunc(b)
+            r'''(?x)
+                (?:\(\s*(?P<b>[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
+                \.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
+                (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
+            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
         if not idx:
             return func_name
 
@@ -1679,17 +1685,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         func_name = self._extract_n_function_name(jscode)
 
-        # For redundancy
-        func_code = self._search_regex(
-            r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
-                     # NB: The end of the regex is intentionally kept strict
-                     {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
-            jscode, 'nsig function', group=('var', 'code'), default=None)
-        if func_code:
-            func_code = ([func_code[0]], func_code[1])
-        else:
-            self.write_debug('Extracting nsig function with jsinterp')
-            func_code = jsi.extract_function_code(func_name)
+        func_code = jsi.extract_function_code(func_name)
 
         self.cache.store('youtube-nsig', player_id, func_code)
         return jsi, player_id, func_code