youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 893f8832b52926847353f2b678e313687806a775
parent 878d11ec29d63b2c8ca32163f38762d1b12a52c6
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Mon, 24 Mar 2014 22:01:47 +0100

[arte] Add support for embedded videos (Fixes #2620)

Diffstat:
Myoutube_dl/extractor/__init__.py | 1+
Myoutube_dl/extractor/arte.py | 25++++++++++++++++++++++---
Myoutube_dl/extractor/generic.py | 22++++++++++++++++++++++
3 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -14,6 +14,7 @@ from .arte import ( ArteTVConcertIE, ArteTVFutureIE, ArteTVDDCIE, + ArteTVEmbedIE, ) from .auengine import AUEngineIE from .bambuser import BambuserIE, BambuserChannelIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py @@ -75,9 +75,7 @@ class ArteTVPlus7IE(InfoExtractor): return self._extract_from_json_url(json_url, video_id, lang) def _extract_from_json_url(self, json_url, video_id, lang): - json_info = self._download_webpage(json_url, video_id, 'Downloading info json') - self.report_extraction(video_id) - info = json.loads(json_info) + info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] info_dict = { @@ -99,6 +97,8 @@ class ArteTVPlus7IE(InfoExtractor): l = 'F' elif lang == 'de': l = 'A' + else: + l = lang regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url @@ -228,3 +228,22 @@ class ArteTVConcertIE(ArteTVPlus7IE): 'description': 'md5:486eb08f991552ade77439fe6d82c305', }, } + + +class ArteTVEmbedIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:embed' + _VALID_URL = r'''(?x) + http://www\.arte\.tv + /playerv2/embed\.php\?json_url= + (?P<json_url> + http://arte\.tv/papi/tvguide/videos/stream/player/ + (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]* + ) + ''' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + lang = mobj.group('lang') + json_url = mobj.group('json_url') + return self._extract_from_json_url(json_url, video_id, lang) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -197,6 +197,21 @@ class GenericIE(InfoExtractor): 'description': 'No description', }, }, + # arte embed + { + 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html', + 'md5': '7653032cbb25bf6c80d80f217055fa43', + 'info_dict': { + 'id': '048195-004_PLUS7-F', + 'ext': 'flv', + 'title': 'X:enius', + 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168', + 'upload_date': '20140320', + }, + 'params': { + 'skip_download': 'Requires rtmpdump' + } + }, ] def report_download_webpage(self, video_id): @@ -525,6 +540,13 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'TED') + # Look for embedded arte.tv player + mobj = re.search( + r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'ArteTVEmbed') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: