[9gag] Add extractor - youtube-dl - Another place where youtube-dl lives on

commit 7fc3fa0545f8a07414e8c97be9862a3c2f79bb98
parent 29030c0a4c2f4dded5a310add940aae0791f9d73
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu,  5 Dec 2013 14:29:08 +0100

[9gag] Add extractor

Diffstat:
M youtube_dl/YoutubeDL.py  | 34 +++++++++++++++++++++++++++++++---
M youtube_dl/extractor/__init__.py  | 1 +
A youtube_dl/extractor/ninegag.py  | 41 +++++++++++++++++++++++++++++++++++++++++

3 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
@@ -405,7 +405,8 @@ class YoutubeDL(object):
         for key, value in extra_info.items():
             info_dict.setdefault(key, value)
 
-    def extract_info(self, url, download=True, ie_key=None, extra_info={}):
+    def extract_info(self, url, download=True, ie_key=None, extra_info={},
+                     process=True):
         '''
         Returns a list with a dictionary for each video we find.
         If 'download', also downloads the videos.
@@ -441,7 +442,10 @@ class YoutubeDL(object):
                         'webpage_url': url,
                         'extractor_key': ie.ie_key(),
                     })
-                return self.process_ie_result(ie_result, download, extra_info)
+                if process:
+                    return self.process_ie_result(ie_result, download, extra_info)
+                else:
+                    return ie_result
             except ExtractorError as de: # An error we somewhat expected
                 self.report_error(compat_str(de), de.format_traceback())
                 break
@@ -474,8 +478,32 @@ class YoutubeDL(object):
                                      download,
                                      ie_key=ie_result.get('ie_key'),
                                      extra_info=extra_info)
+        elif result_type == 'url_transparent':
+            # Use the information from the embedding page
+            info = self.extract_info(
+                ie_result['url'], ie_key=ie_result.get('ie_key'),
+                extra_info=extra_info, download=False, process=False)
+
+            def make_result(embedded_info):
+                new_result = ie_result.copy()
+                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
+                          'entries', 'urlhandle', 'ie_key', 'duration',
+                          'subtitles', 'annotations', 'format'):
+                    if f in new_result:
+                        del new_result[f]
+                    if f in embedded_info:
+                        new_result[f] = embedded_info[f]
+                return new_result
+            new_result = make_result(info)
+
+            assert new_result.get('_type') != 'url_transparent'
+            if new_result.get('_type') == 'compat_list':
+                new_result['entries'] = [
+                    make_result(e) for e in new_result['entries']]
+
+            return self.process_ie_result(
+                new_result, download=download, extra_info=extra_info)
         elif result_type == 'playlist':
-
             # We process each entry in the playlist
             playlist = ie_result.get('title', None) or ie_result.get('id', None)
             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
@@ -102,6 +102,7 @@ from .nbc import NBCNewsIE
 from .newgrounds import NewgroundsIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
+from .ninegag import NineGagIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py
@@ -0,0 +1,41 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class NineGagIE(InfoExtractor):
+    IE_NAME = '9gag'
+    _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
+
+    _TEST = {
+        u"url": u"http://9gag.tv/v/1912",
+        u"file": u"1912.mp4",
+        u"info_dict": {
+            u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
+            u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
+        },
+        u'add_ie': [u'Youtube']
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        data_json = self._html_search_regex(r'''(?x)
+            <div\s*id="tv-video"\s*data-video-source="youtube"\s*
+                data-video-meta="([^"]+)"''', webpage, u'video metadata')
+
+        data = json.loads(data_json)
+
+        return {
+            '_type': 'url_transparent',
+            'url': data['youtubeVideoId'],
+            'ie_key': 'Youtube',
+            'id': video_id,
+            'title': data['title'],
+            'description': data['description'],
+            'view_count': int(data['view_count']),
+            'thumbnail': data['thumbnail_url'],
+        }

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	youtube_dl/YoutubeDL.py	\|	34	+++++++++++++++++++++++++++++++---
M	youtube_dl/extractor/__init__.py	\|	1	+
A	youtube_dl/extractor/ninegag.py	\|	41	+++++++++++++++++++++++++++++++++++++++++