[Youtube] Fix nsig extraction for player 20dfca59 (#32891)
authorAiur Adept <151766879+aiur-adept@users.noreply.github.com>
Thu, 1 Aug 2024 18:18:34 +0000 (14:18 -0400)
committerGitHub <noreply@github.com>
Thu, 1 Aug 2024 18:18:34 +0000 (19:18 +0100)
* dirkf's patch for nsig extraction
* add generic search per  yt-dlp/yt-dlp/pull/10611 - thx bashonly

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
test/test_youtube_signature.py
youtube_dl/extractor/youtube.py

index 5b4aa3aa05bad3474160cd6aeebb7d0527da9bb8..1c5f667f57ac97a9420fd985434b5b1b7223367a 100644 (file)
@@ -174,6 +174,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
         '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
     ),
+    (
+        'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
+        '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
+    ),
 ]
 
 
index 84371ff063d9f8ce55a51ea7ee2769720d88d105..509e374a4d78c4679fb2f399316ea412d5a24884 100644 (file)
@@ -1659,18 +1659,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     def _extract_n_function_name(self, jscode):
         func_name, idx = self._search_regex(
             # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
-            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
+            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
+            # or:  (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
             # old: .get("n"))&&(b=nfunc[idx](b)
             # older: .get("n"))&&(b=nfunc(b)
             r'''(?x)
-                (?:\(\s*(?P<b>[a-z])\s*=\s*(?:
+                (?:\((?:[\w$()\s]+,)*?\s*(?P<b>[a-z])\s*=\s*(?:
                     String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
-                    "n+"\[\s*\+?s*[\w$.]+\s*]
-                )\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
-                \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
+                    "n+"\[\s*\+?s*[\w$.]+\s*]|
+                    (?P<b1>(?:[\w$]+\s*\.\s*)+n\b(?:(?!&&).)+\))
+                )\s*
+                    (?(b1)
+                          &&\s*\(\s*(?P=b)|
+                          (?:
+                              ,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
+                              \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*
+                              &&\s*\(\s*(?(c)(?P=c)|(?P=b))
+                          )
+                    )\s*=\s*
                 (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
-            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
+            default=(None, None))
+        # thx bashonly: yt-dlp/yt-dlp/pull/10611
+        if not func_name:
+            self.report_warning('Falling back to generic n function search')
+            return self._search_regex(
+                r'''(?xs)
+                    (?:(?<=[^\w$])|^)       # instead of \b, which ignores $
+                    (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
+                    \s*\{(?:(?!};).)+?["']enhanced_except_
+                ''', jscode, 'Initial JS player n function name', group='name')
         if not idx:
+            self.report_warning('Falling back to generic n function search')
             return func_name
 
         return self._parse_json(self._search_regex(