ina.py (2948B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 determine_ext, 7 int_or_none, 8 strip_or_none, 9 xpath_attr, 10 xpath_text, 11 ) 12 13 14 class InaIE(InfoExtractor): 15 _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)' 16 _TESTS = [{ 17 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', 18 'md5': 'a667021bf2b41f8dc6049479d9bb38a3', 19 'info_dict': { 20 'id': 'I12055569', 21 'ext': 'mp4', 22 'title': 'François Hollande "Je crois que c\'est clair"', 23 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663', 24 } 25 }, { 26 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html', 27 'only_matching': True, 28 }, { 29 'url': 'https://www.ina.fr/audio/P16173408', 30 'only_matching': True, 31 }, { 32 'url': 'https://www.ina.fr/video/P16173408-video.html', 33 'only_matching': True, 34 }, { 35 'url': 'http://m.ina.fr/video/I12055569', 36 'only_matching': True, 37 }] 38 39 def _real_extract(self, url): 40 video_id = self._match_id(url) 41 info_doc = self._download_xml( 42 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id) 43 item = info_doc.find('channel/item') 44 title = xpath_text(item, 'title', fatal=True) 45 media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/') 46 content = item.find(media_ns_xpath('content')) 47 48 get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url') 49 formats = [] 50 for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)): 51 q_url = get_furl(q) 52 if not q_url: 53 continue 54 formats.append({ 55 'format_id': q, 56 'url': q_url, 57 'width': w, 58 'height': h, 59 }) 60 if not formats: 61 furl = get_furl('player') or content.attrib['url'] 62 ext = determine_ext(furl) 63 formats = [{ 64 'url': furl, 65 'vcodec': 'none' if ext == 'mp3' else None, 66 'ext': ext, 67 }] 68 69 thumbnails = [] 70 for thumbnail in content.findall(media_ns_xpath('thumbnail')): 71 thumbnail_url = thumbnail.get('url') 72 if not thumbnail_url: 73 continue 74 thumbnails.append({ 75 'url': thumbnail_url, 76 'height': int_or_none(thumbnail.get('height')), 77 'width': int_or_none(thumbnail.get('width')), 78 }) 79 80 return { 81 'id': video_id, 82 'formats': formats, 83 'title': title, 84 'description': strip_or_none(xpath_text(item, 'description')), 85 'thumbnails': thumbnails, 86 }