[utils] Recognize more patterns in strip_jsonp()
authorYen Chi Hsuan <yan12125@gmail.com>
Fri, 26 May 2017 13:58:18 +0000 (21:58 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Fri, 26 May 2017 13:58:18 +0000 (21:58 +0800)
Used in Youku Show pages

ChangeLog
test/test_utils.py
youtube_dl/utils.py

index 6a05657abba410753a03d2f76381332c688aa976..d6e980c5aa9ac3fab312ecd93bbea4128caff818 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
 version <unreleased>
 
 Core
++ [utils] strip_jsonp() can recognize more patterns
 * [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
 
 Extractors
index f31559e71f60f9474aa51ae36dbbc3e24affd5ce..d7e05817ccbdf9c51f53d68cf9694ac4591aac00 100644 (file)
@@ -678,6 +678,14 @@ class TestUtil(unittest.TestCase):
         d = json.loads(stripped)
         self.assertEqual(d, {'status': 'success'})
 
+        stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
+        stripped = strip_jsonp('window.cb && cb({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
     def test_uppercase_escape(self):
         self.assertEqual(uppercase_escape('aä'), 'aä')
         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
index 4293a77f569879964ce9019c44f8f85bfcaad982..6c84bfe0ffc5667d53a88fbae3ce76e76e36d909 100644 (file)
@@ -2211,7 +2211,12 @@ def parse_age_limit(s):
 
 def strip_jsonp(code):
     return re.sub(
-        r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+        r'''(?sx)^
+            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
+            (?:\s*&&\s*(?P=func_name))?
+            \s*\(\s*(?P<callback_data>.*)\);?
+            \s*?(?://[^\n]*)*$''',
+        r'\g<callback_data>', code)
 
 
 def js_to_json(code):