europa.py (3415B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..compat import compat_urlparse 6 from ..utils import ( 7 int_or_none, 8 orderedSet, 9 parse_duration, 10 qualities, 11 unified_strdate, 12 xpath_text 13 ) 14 15 16 class EuropaIE(InfoExtractor): 17 _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)' 18 _TESTS = [{ 19 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', 20 'md5': '574f080699ddd1e19a675b0ddf010371', 21 'info_dict': { 22 'id': 'I107758', 23 'ext': 'mp4', 24 'title': 'TRADE - Wikileaks on TTIP', 25 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015', 26 'thumbnail': r're:^https?://.*\.jpg$', 27 'upload_date': '20150811', 28 'duration': 34, 29 'view_count': int, 30 'formats': 'mincount:3', 31 } 32 }, { 33 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786', 34 'only_matching': True, 35 }, { 36 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en', 37 'only_matching': True, 38 }] 39 40 def _real_extract(self, url): 41 video_id = self._match_id(url) 42 43 playlist = self._download_xml( 44 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id) 45 46 def get_item(type_, preference): 47 items = {} 48 for item in playlist.findall('./info/%s/item' % type_): 49 lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None) 50 if lang and label: 51 items[lang] = label.strip() 52 for p in preference: 53 if items.get(p): 54 return items[p] 55 56 query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) 57 preferred_lang = query.get('sitelang', ('en', ))[0] 58 59 preferred_langs = orderedSet((preferred_lang, 'en', 'int')) 60 61 title = get_item('title', preferred_langs) or video_id 62 description = get_item('description', preferred_langs) 63 thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail') 64 upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) 65 duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) 66 view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) 67 68 language_preference = qualities(preferred_langs[::-1]) 69 70 formats = [] 71 for file_ in playlist.findall('./files/file'): 72 video_url = xpath_text(file_, './url') 73 if not video_url: 74 continue 75 lang = xpath_text(file_, './lg') 76 formats.append({ 77 'url': video_url, 78 'format_id': lang, 79 'format_note': xpath_text(file_, './lglabel'), 80 'language_preference': language_preference(lang) 81 }) 82 self._sort_formats(formats) 83 84 return { 85 'id': video_id, 86 'title': title, 87 'description': description, 88 'thumbnail': thumbnail, 89 'upload_date': upload_date, 90 'duration': duration, 91 'view_count': view_count, 92 'formats': formats 93 }