From 3bf8c316a663741ab806f48cf9121209aba88b72 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Sergey=20M=E2=80=A4?= Date: Mon, 20 Jul 2015 00:01:22 +0600 Subject: [PATCH] [sportschau] Reimplement in terms of ard extractor --- youtube_dl/extractor/__init__.py | 7 +++-- youtube_dl/extractor/ard.py | 39 +++++++++++++++++++++++++ youtube_dl/extractor/sportschau.py | 47 ------------------------------ 3 files changed, 44 insertions(+), 49 deletions(-) delete mode 100644 youtube_dl/extractor/sportschau.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5033d67ed..50da08830 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -22,7 +22,11 @@ from .aparat import AparatIE from .appleconnect import AppleConnectIE from .appletrailers import AppleTrailersIE from .archiveorg import ArchiveOrgIE -from .ard import ARDIE, ARDMediathekIE +from .ard import ( + ARDIE, + ARDMediathekIE, + SportschauIE, +) from .arte import ( ArteTvIE, ArteTVPlus7IE, @@ -553,7 +557,6 @@ from .sportbox import ( SportBoxEmbedIE, ) from .sportdeutschland import SportDeutschlandIE -from .sportschau import SportschauIE from .srf import SrfIE from .srmediathek import SRMediathekIE from .ssa import SSAIE diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 55f940d57..2c368d833 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -8,6 +8,7 @@ from .generic import GenericIE from ..utils import ( determine_ext, ExtractorError, + get_element_by_attribute, qualities, int_or_none, parse_duration, @@ -246,3 +247,41 @@ class ARDIE(InfoExtractor): 'upload_date': upload_date, 'thumbnail': thumbnail, } + + +class SportschauIE(ARDMediathekIE): + IE_NAME = 'Sportschau' + _VALID_URL = r'(?Phttps?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P[^/#?]+))\.html' + _TEST = { + 'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html', + 'info_dict': { + 'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100', + 'ext': 'mp4', + 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + base_url = mobj.group('baseurl') + + webpage = self._download_webpage(url, video_id) + title = get_element_by_attribute('class', 'headline', webpage) + description = self._html_search_meta('description', webpage, 'description') + + info = self._extract_media_info( + base_url + '-mc_defaultQuality-h.json', webpage, video_id) + + info.update({ + 'title': title, + 'description': description, + }) + + return info diff --git a/youtube_dl/extractor/sportschau.py b/youtube_dl/extractor/sportschau.py deleted file mode 100644 index bf9b075db..000000000 --- a/youtube_dl/extractor/sportschau.py +++ /dev/null @@ -1,47 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import get_element_by_attribute - - -class SportschauIE(InfoExtractor): - IE_NAME = 'Sportschau' - _VALID_URL = r'https?://(?:www\.)?sportschau\.de/\w+(?:/\w+)?/video(?P\w+)\.html' - _TEST = { - 'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html', - 'info_dict': { - 'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100', - 'ext': 'mp4', - 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - ext = '-mc_defaultQuality-h.json' - json_url = url[:-5] + ext - - json = self._download_json(json_url, video_id) - thumb_url = json['_previewImage'] - - m3u8_url = json['_mediaArray'][1]['_mediaStreamArray'][0]['_stream'][0] - m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, ext="mp4") - - webpage = self._download_webpage(url, video_id) - title = get_element_by_attribute('class', 'headline', webpage) - desc = self._html_search_meta('description', webpage) - - return { - 'id': video_id, - 'title': title, - 'formats': m3u8_formats, - 'description': desc, - 'thumbnail': thumb_url, - } -- 2.22.2