commit 5552c9eb0fece567f7dda13810939fca32d7d65a
parent 59ed87cbd9ea08c889514a05b646141004f432a1
Author: Yen Chi Hsuan <yan12125@gmail.com>
Date: Fri, 26 May 2017 21:58:18 +0800
[utils] Recognize more patterns in strip_jsonp()
Used in Youku Show pages
Diffstat:
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/ChangeLog b/ChangeLog
@@ -1,6 +1,7 @@
version <unreleased>
Core
++ [utils] strip_jsonp() can recognize more patterns
* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
Extractors
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -678,6 +678,14 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
+ stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ stripped = strip_jsonp('window.cb && cb({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
def test_uppercase_escape(self):
self.assertEqual(uppercase_escape('aä'), 'aä')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
@@ -2211,7 +2211,12 @@ def parse_age_limit(s):
def strip_jsonp(code):
return re.sub(
- r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+ r'''(?sx)^
+ (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
+ (?:\s*&&\s*(?P=func_name))?
+ \s*\(\s*(?P<callback_data>.*)\);?
+ \s*?(?://[^\n]*)*$''',
+ r'\g<callback_data>', code)
def js_to_json(code):