commit 91757b0f373ec3201f95066eeb0e09ebdcc1a067
parent fbfcc2972b3b24bda092eaed92b81113154c4327
Author: Naglis Jonaitis <njonaitis@gmail.com>
Date: Thu, 26 Mar 2015 17:15:27 +0200
[utils] Escape all HTML entities written in hexadecimal form
Diffstat:
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -200,6 +200,8 @@ class TestUtil(unittest.TestCase):
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
+ self.assertEqual(unescapeHTML('/'), '/')
+ self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(
unescapeHTML('é'), 'é')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
@@ -348,7 +348,7 @@ def _htmlentity_transform(entity):
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
- mobj = re.match(r'#(x?[0-9]+)', entity)
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith('x'):