[youtube] Correct invalid JSON (Fixes #2353) - youtube-dl - Another place where youtube-dl lives on

commit 81c2f20b5386d89a62dc27293654d75b77f47473
parent 1afe753462f0293122dc7a9b534b4f5cdb1e5c4e
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Sun,  9 Feb 2014 17:56:10 +0100

[youtube] Correct invalid JSON (Fixes #2353)

Diffstat:
M youtube_dl/extractor/common.py  | 5 ++++-
M youtube_dl/extractor/youtube.py  | 10 +++++-----
M youtube_dl/utils.py  | 6 ++++++

3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
@@ -271,8 +271,11 @@ class InfoExtractor(object):
 
     def _download_json(self, url_or_request, video_id,
                        note=u'Downloading JSON metadata',
-                       errnote=u'Unable to download JSON metadata'):
+                       errnote=u'Unable to download JSON metadata',
+                       transform_source=None):
         json_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        if transform_source:
+            json_string = transform_source(json_string)
         try:
             return json.loads(json_string)
         except ValueError as ve:
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
@@ -34,6 +34,7 @@ from ..utils import (
     unified_strdate,
     orderedSet,
     write_json_file,
+    uppercase_escape,
 )
 
 class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
             # Download all channel pages using the json-based channel_ajax query
             for pagenum in itertools.count(1):
                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
-                page = self._download_webpage(url, channel_id,
-                                              u'Downloading page #%s' % pagenum)
-    
-                page = json.loads(page)
-    
+                page = self._download_json(
+                    url, channel_id, note=u'Downloading page #%s' % pagenum,
+                    transform_source=uppercase_escape)
+
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
                 video_ids.extend(ids_in_page)
     
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
@@ -1214,3 +1214,9 @@ class PagedList(object):
             if end == nextfirstid:
                 break
         return res
+
+
+def uppercase_escape(s):
+    return re.sub(
+        r'\\U([0-9a-fA-F]{8})',
+        lambda m: compat_chr(int(m.group(1), base=16)), s)

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	youtube_dl/extractor/common.py	\|	5	++++-
M	youtube_dl/extractor/youtube.py	\|	10	+++++-----
M	youtube_dl/utils.py	\|	6	++++++