golem.py (2209B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..compat import ( 6 compat_str, 7 compat_urlparse, 8 ) 9 from ..utils import ( 10 determine_ext, 11 ) 12 13 14 class GolemIE(InfoExtractor): 15 _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/' 16 _TEST = { 17 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html', 18 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf', 19 'info_dict': { 20 'id': '14095', 21 'format_id': 'high', 22 'ext': 'mp4', 23 'title': 'iPhone 6 und 6 Plus - Test', 24 'duration': 300.44, 25 'filesize': 65309548, 26 } 27 } 28 29 _PREFIX = 'http://video.golem.de' 30 31 def _real_extract(self, url): 32 video_id = self._match_id(url) 33 34 config = self._download_xml( 35 'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id) 36 37 info = { 38 'id': video_id, 39 'title': config.findtext('./title', 'golem'), 40 'duration': self._float(config.findtext('./playtime'), 'duration'), 41 } 42 43 formats = [] 44 for e in config: 45 url = e.findtext('./url') 46 if not url: 47 continue 48 49 formats.append({ 50 'format_id': compat_str(e.tag), 51 'url': compat_urlparse.urljoin(self._PREFIX, url), 52 'height': self._int(e.get('height'), 'height'), 53 'width': self._int(e.get('width'), 'width'), 54 'filesize': self._int(e.findtext('filesize'), 'filesize'), 55 'ext': determine_ext(e.findtext('./filename')), 56 }) 57 self._sort_formats(formats) 58 info['formats'] = formats 59 60 thumbnails = [] 61 for e in config.findall('.//teaser'): 62 url = e.findtext('./url') 63 if not url: 64 continue 65 thumbnails.append({ 66 'url': compat_urlparse.urljoin(self._PREFIX, url), 67 'width': self._int(e.get('width'), 'thumbnail width'), 68 'height': self._int(e.get('height'), 'thumbnail height'), 69 }) 70 info['thumbnails'] = thumbnails 71 72 return info