youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit bd1e484448c84904ce0d99fe05c3721053aa3c00
parent a834622b89031dad5afbf96e4a5939a66b0d054b
Author: felix <felix.von.s@posteo.de>
Date:   Sun, 13 Mar 2016 12:29:15 +0100

[utils] js_to_json: various improvements

now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON.

Diffstat:
Mtest/test_utils.py | 12++++++++++++
Myoutube_dl/utils.py | 30++++++++++++++++--------------
2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -640,6 +640,18 @@ class TestUtil(unittest.TestCase): on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json(r'["<p>x<\/p>"]') + self.assertEqual(json.loads(on), ['<p>x</p>']) + + on = js_to_json(r'["\xaa"]') + self.assertEqual(json.loads(on), ['\u00aa']) + + on = js_to_json("['a\\\nb']") + self.assertEqual(json.loads(on), ['ab']) + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1914,24 +1914,26 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - if v.startswith('"'): - v = re.sub(r"\\'", "'", v[1:-1]) - elif v.startswith("'"): - v = v[1:-1] - v = re.sub(r"\\\\|\\'|\"", lambda m: { - '\\\\': '\\\\', - "\\'": "'", + elif v.startswith('/*') or v == ',': + return "" + + if v[0] in ("'", '"'): + v = re.sub(r'(?s)\\.|"', lambda m: { '"': '\\"', - }[m.group(0)], v) + "\\'": "'", + '\\\n': '', + '\\x': '\\u00', + }.get(m.group(0), m.group(0)), v[1:-1]) + return '"%s"' % v - res = re.sub(r'''(?x) - "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| - '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| - [a-zA-Z_][.a-zA-Z_0-9]* + return re.sub(r'''(?sx) + "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| + /\*.*?\*/|,(?=\s*[\]}])| + [a-zA-Z_][.a-zA-Z_0-9]*| + [0-9]+(?=\s*:) ''', fix_kv, code) - res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res) - return res def qualities(quality_ids):