From: ngld Date: Wed, 12 Aug 2015 14:59:04 +0000 (+0200) Subject: [europa] Add new extractor X-Git-Url: http://git.oshgnacknak.de/?a=commitdiff_plain;h=3bb3f0410822d3d21c6199bb8915b598990628e6;p=youtube-dl [europa] Add new extractor --- diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a73a1317e..495a18c17 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -158,6 +158,7 @@ from .eroprofile import EroProfileIE from .escapist import EscapistIE from .espn import ESPNIE from .esri import EsriVideoIE +from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py new file mode 100644 index 000000000..c437c4886 --- /dev/null +++ b/youtube_dl/extractor/europa.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + xpath_text +) + + +class EuropaIE(InfoExtractor): + _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?(?:[^&]|&(?!ref))*ref=(?P[A-Za-z0-9]+)' + _TEST = { + 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', + 'md5': '728cca2fd41d5aa7350cec1141fbe620', + 'info_dict': { + 'id': 'I107758', + 'ext': 'mp4', + 'title': 'TRADE - Wikileaks on TTIP', + 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015', + 'thumbnail': 're:^http://defiris\.ec\.streamcloud\.be/findmedia/18/107758/THUMB_[0-9A-Z]+\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + lang = query.get('sitelang', ['en'])[0] + + playlist = self._download_xml('http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=' + video_id, video_id) + videos = {} + formats = [] + + for item in playlist.findall('info/title/item'): + videos[xpath_text(item, 'lg')] = {'title': xpath_text(item, 'label').strip()} + + for item in playlist.findall('info/description/item'): + videos[xpath_text(item, 'lg')]['description'] = xpath_text(item, 'label').strip() + + for item in playlist.findall('files/file'): + lg = xpath_text(item, 'lg') + vid = videos[lg] + vid['format_note'] = xpath_text(item, 'lglabel') + vid['url'] = xpath_text(item, 'url') + + if lg == lang: + vid['language_preference'] = 10 + + formats.append(vid) + + formats.reverse() + def_video = videos.get(lang, videos['int']) + + return { + 'id': video_id, + 'title': def_video['title'], + 'description': def_video['description'], + 'thumbnail': xpath_text(playlist, 'info/thumburl', 'thumburl'), + 'formats': formats + }