youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 89fb6a979765689a744e5e5bfbf3b397ed40b33a
parent b734a87112f26f8977b1a1e76edf1bbff9452928
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Sat, 13 Sep 2014 06:55:38 +0200

[spiegel] Add support for articles

Diffstat:
Myoutube_dl/extractor/__init__.py | 2+-
Myoutube_dl/extractor/spiegel.py | 32++++++++++++++++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -325,7 +325,7 @@ from .southpark import ( ) from .space import SpaceIE from .spankwire import SpankwireIE -from .spiegel import SpiegelIE +from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE from .spike import SpikeIE from .sportdeutschland import SportDeutschlandIE diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import compat_urlparse class SpiegelIE(InfoExtractor): @@ -82,3 +83,34 @@ class SpiegelIE(InfoExtractor): 'duration': duration, 'formats': formats, } + + +class SpiegelArticleIE(InfoExtractor): + _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' + IE_NAME = 'Spiegel:Article' + IE_DESC = 'Articles on spiegel.de' + _TEST = { + 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html', + 'info_dict': { + 'id': '1516455', + 'ext': 'mp4', + 'title': 'Faszination Badminton: Nennt es bloß nicht Federball', + 'description': 're:^Patrick Kämnitz gehört.{100,}', + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + webpage = self._download_webpage(url, video_id) + video_link = self._search_regex( + r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage, + 'video page URL') + video_url = compat_urlparse.urljoin( + self.http_scheme() + '//spiegel.de/', video_link) + + return { + '_type': 'url', + 'url': video_url, + }