From: Philipp Hagemeister Date: Sun, 26 Oct 2014 22:23:10 +0000 (+0100) Subject: [srmediathek] New extractor X-Git-Url: http://git.oshgnacknak.de/?a=commitdiff_plain;h=2bcae58d46b88200a2247a7e2bee999f459f75c4;p=youtube-dl [srmediathek] New extractor --- diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8e31de93d..17ab49283 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -355,6 +355,7 @@ from .spike import SpikeIE from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE +from .srmediathek import SRMediathekIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamcloud import StreamcloudIE diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py new file mode 100644 index 000000000..26bf9e34c --- /dev/null +++ b/youtube_dl/extractor/srmediathek.py @@ -0,0 +1,43 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import js_to_json + + +class SRMediathekIE(InfoExtractor): + IE_NAME = 'Süddeutscher Rundfunk' + _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P[0-9]+)' + + _TEST = { + 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455', + 'info_dict': { + 'id': '28455', + 'ext': 'mp4', + 'title': 'sportarena (26.10.2014)', + 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + urls = json.loads(js_to_json(self._search_regex( + r'var mediaURLs\s*=\s*(.*?);\n', webpage, 'video URLs'))) + formats = [{'url': url} for url in urls] + self._sort_formats(formats) + + title = json.loads(js_to_json(self._search_regex( + r'var mediaTitles\s*=\s*(.*?);\n', webpage, 'title')))[0] + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + }