xstream.py (3981B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 int_or_none, 9 parse_iso8601, 10 xpath_with_ns, 11 xpath_text, 12 find_xpath_attr, 13 ) 14 15 16 class XstreamIE(InfoExtractor): 17 _VALID_URL = r'''(?x) 18 (?: 19 xstream:| 20 https?://frontend\.xstream\.(?:dk|net)/ 21 ) 22 (?P<partner_id>[^/]+) 23 (?: 24 :| 25 /feed/video/\?.*?\bid= 26 ) 27 (?P<id>\d+) 28 ''' 29 _TESTS = [{ 30 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588', 31 'md5': 'd7d17e3337dc80de6d3a540aefbe441b', 32 'info_dict': { 33 'id': '86588', 34 'ext': 'mov', 35 'title': 'Otto Wollertsen', 36 'description': 'Vestlendingen Otto Fredrik Wollertsen', 37 'timestamp': 1430473209, 38 'upload_date': '20150501', 39 }, 40 }, { 41 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039', 42 'only_matching': True, 43 }] 44 45 def _extract_video_info(self, partner_id, video_id): 46 data = self._download_xml( 47 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s' 48 % (partner_id, video_id), 49 video_id) 50 51 NS_MAP = { 52 'atom': 'http://www.w3.org/2005/Atom', 53 'xt': 'http://xstream.dk/', 54 'media': 'http://search.yahoo.com/mrss/', 55 } 56 57 entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) 58 59 title = xpath_text( 60 entry, xpath_with_ns('./atom:title', NS_MAP), 'title') 61 description = xpath_text( 62 entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') 63 timestamp = parse_iso8601(xpath_text( 64 entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) 65 66 formats = [] 67 media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) 68 for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): 69 media_url = media_content.get('url') 70 if not media_url: 71 continue 72 tbr = int_or_none(media_content.get('bitrate')) 73 mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url) 74 if mobj: 75 formats.append({ 76 'url': mobj.group('url'), 77 'play_path': 'mp4:%s' % mobj.group('playpath'), 78 'app': mobj.group('app'), 79 'ext': 'flv', 80 'tbr': tbr, 81 'format_id': 'rtmp-%d' % tbr, 82 }) 83 else: 84 formats.append({ 85 'url': media_url, 86 'tbr': tbr, 87 }) 88 self._sort_formats(formats) 89 90 link = find_xpath_attr( 91 entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') 92 if link is not None: 93 formats.append({ 94 'url': link.get('href'), 95 'format_id': link.get('rel'), 96 'preference': 1, 97 }) 98 99 thumbnails = [{ 100 'url': splash.get('url'), 101 'width': int_or_none(splash.get('width')), 102 'height': int_or_none(splash.get('height')), 103 } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] 104 105 return { 106 'id': video_id, 107 'title': title, 108 'description': description, 109 'timestamp': timestamp, 110 'formats': formats, 111 'thumbnails': thumbnails, 112 } 113 114 def _real_extract(self, url): 115 mobj = re.match(self._VALID_URL, url) 116 partner_id = mobj.group('partner_id') 117 video_id = mobj.group('id') 118 119 return self._extract_video_info(partner_id, video_id)