youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 3bf5705316adb4e486ec76a9308198b499787947
parent 1c2528c8a3c599740558d5d752dfb368c8714a89
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu, 19 Feb 2015 01:43:20 +0100

[imgur] Add new extractor

Diffstat:
Myoutube_dl/extractor/__init__.py | 1+
Ayoutube_dl/extractor/imgur.py | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 85 insertions(+), 0 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -205,6 +205,7 @@ from .imdb import ( ImdbIE, ImdbListIE ) +from .imgur import ImgurIE from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py @@ -0,0 +1,84 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + mimetype2ext, +) + + +class ImgurIE(InfoExtractor): + _VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)' + + _TESTS = [{ + 'url': 'https://i.imgur.com/A61SaA1.gifv', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + width = int_or_none(self._search_regex( + r'<param name="width" value="([0-9]+)"', + webpage, 'width', fatal=False)) + height = int_or_none(self._search_regex( + r'<param name="height" value="([0-9]+)"', + webpage, 'height', fatal=False)) + + formats = [] + video_elements = self._search_regex( + r'(?s)<div class="video-elements">(.*?)</div>', + webpage, 'video elements') + formats = [] + for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): + formats.append({ + 'format_id': m.group('type').partition('/')[2], + 'url': self._proto_relative_url(m.group('src')), + 'ext': mimetype2ext(m.group('type')), + 'acodec': 'none', + 'width': width, + 'height': height, + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + gif_json = self._search_regex( + r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', + webpage, 'GIF code', fatal=False) + if gif_json: + gifd = self._parse_json( + gif_json, video_id, transform_source=js_to_json) + formats.append({ + 'format_id': 'gif', + 'preference': -10, + 'width': width, + 'height': height, + 'ext': 'gif', + 'acodec': 'none', + 'vcodec': 'gif', + 'container': 'gif', + 'url': self._proto_relative_url(gifd['gifUrl']), + 'filesize': gifd.get('size'), + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'title': self._og_search_title(webpage), + }