clipsyndicate.py (1812B)
1 from __future__ import unicode_literals 2 3 from .common import InfoExtractor 4 from ..utils import ( 5 find_xpath_attr, 6 fix_xml_ampersands 7 ) 8 9 10 class ClipsyndicateIE(InfoExtractor): 11 _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' 12 13 _TESTS = [{ 14 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', 15 'md5': '4d7d549451bad625e0ff3d7bd56d776c', 16 'info_dict': { 17 'id': '4629301', 18 'ext': 'mp4', 19 'title': 'Brick Briscoe', 20 'duration': 612, 21 'thumbnail': r're:^https?://.+\.jpg', 22 }, 23 }, { 24 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack', 25 'only_matching': True, 26 }] 27 28 def _real_extract(self, url): 29 video_id = self._match_id(url) 30 js_player = self._download_webpage( 31 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id, 32 video_id, 'Downlaoding player') 33 # it includes a required token 34 flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars') 35 36 pdoc = self._download_xml( 37 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, 38 video_id, 'Downloading video info', 39 transform_source=fix_xml_ampersands) 40 41 track_doc = pdoc.find('trackList/track') 42 43 def find_param(name): 44 node = find_xpath_attr(track_doc, './/param', 'name', name) 45 if node is not None: 46 return node.attrib['value'] 47 48 return { 49 'id': video_id, 50 'title': find_param('title'), 51 'url': track_doc.find('location').text, 52 'thumbnail': find_param('thumbnail'), 53 'duration': int(find_param('duration')), 54 }