commit 0d75ae2ce313c5738b2bdd9602ab3cc15e78810d
parent 2891932bf0a01acc025246438f890dca57f91c6b
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date: Thu, 29 Aug 2013 11:35:15 +0200
Fix detection of the webpage charset if it's declared using ' instead of "
Like in "<meta charset='utf-8'/>"
Diffstat:
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
@@ -150,7 +150,7 @@ class InfoExtractor(object):
if m:
encoding = m.group(1)
else:
- m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
+ m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
webpage_bytes[:1024])
if m:
encoding = m.group(1).decode('ascii')