zapiks.py (3832B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 parse_duration, 9 parse_iso8601, 10 xpath_with_ns, 11 xpath_text, 12 int_or_none, 13 ) 14 15 16 class ZapiksIE(InfoExtractor): 17 _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))' 18 _TESTS = [ 19 { 20 'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', 21 'md5': 'aeb3c473b2d564b2d46d664d28d5f050', 22 'info_dict': { 23 'id': '80798', 24 'ext': 'mp4', 25 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', 26 'description': 'md5:7054d6f6f620c6519be1fe710d4da847', 27 'thumbnail': r're:^https?://.*\.jpg$', 28 'duration': 528, 29 'timestamp': 1359044972, 30 'upload_date': '20130124', 31 'view_count': int, 32 }, 33 }, 34 { 35 'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', 36 'only_matching': True, 37 }, 38 { 39 'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html', 40 'only_matching': True, 41 }, 42 { 43 'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr', 44 'only_matching': True, 45 }, 46 ] 47 48 def _real_extract(self, url): 49 mobj = re.match(self._VALID_URL, url) 50 video_id = mobj.group('id') 51 display_id = mobj.group('display_id') or video_id 52 53 webpage = self._download_webpage(url, display_id) 54 55 if not video_id: 56 video_id = self._search_regex( 57 r'data-media-id="(\d+)"', webpage, 'video id') 58 59 playlist = self._download_xml( 60 'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id, 61 display_id) 62 63 NS_MAP = { 64 'jwplayer': 'http://rss.jwpcdn.com/' 65 } 66 67 def ns(path): 68 return xpath_with_ns(path, NS_MAP) 69 70 item = playlist.find('./channel/item') 71 72 title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage) 73 description = self._og_search_description(webpage, default=None) 74 thumbnail = xpath_text( 75 item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None) 76 duration = parse_duration(self._html_search_meta( 77 'duration', webpage, 'duration', default=None)) 78 timestamp = parse_iso8601(self._html_search_meta( 79 'uploadDate', webpage, 'upload date', default=None), ' ') 80 81 view_count = int_or_none(self._search_regex( 82 r'UserPlays:(\d+)', webpage, 'view count', default=None)) 83 comment_count = int_or_none(self._search_regex( 84 r'UserComments:(\d+)', webpage, 'comment count', default=None)) 85 86 formats = [] 87 for source in item.findall(ns('./jwplayer:source')): 88 format_id = source.attrib['label'] 89 f = { 90 'url': source.attrib['file'], 91 'format_id': format_id, 92 } 93 m = re.search(r'^(?P<height>\d+)[pP]', format_id) 94 if m: 95 f['height'] = int(m.group('height')) 96 formats.append(f) 97 self._sort_formats(formats) 98 99 return { 100 'id': video_id, 101 'title': title, 102 'description': description, 103 'thumbnail': thumbnail, 104 'duration': duration, 105 'timestamp': timestamp, 106 'view_count': view_count, 107 'comment_count': comment_count, 108 'formats': formats, 109 }