walla.py (2817B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 xpath_text, 9 int_or_none, 10 ) 11 12 13 class WallaIE(InfoExtractor): 14 _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)' 15 _TEST = { 16 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one', 17 'info_dict': { 18 'id': '2642630', 19 'display_id': 'one-direction-all-for-one', 20 'ext': 'flv', 21 'title': 'וואן דיירקשן: ההיסטריה', 22 'description': 'md5:de9e2512a92442574cdb0913c49bc4d8', 23 'thumbnail': r're:^https?://.*\.jpg', 24 'duration': 3600, 25 }, 26 'params': { 27 # rtmp download 28 'skip_download': True, 29 } 30 } 31 32 _SUBTITLE_LANGS = { 33 'עברית': 'heb', 34 } 35 36 def _real_extract(self, url): 37 mobj = re.match(self._VALID_URL, url) 38 video_id = mobj.group('id') 39 display_id = mobj.group('display_id') 40 41 video = self._download_xml( 42 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id, 43 display_id) 44 45 item = video.find('./items/item') 46 47 title = xpath_text(item, './title', 'title') 48 description = xpath_text(item, './synopsis', 'description') 49 thumbnail = xpath_text(item, './preview_pic', 'thumbnail') 50 duration = int_or_none(xpath_text(item, './duration', 'duration')) 51 52 subtitles = {} 53 for subtitle in item.findall('./subtitles/subtitle'): 54 lang = xpath_text(subtitle, './title') 55 subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ 56 'ext': 'srt', 57 'url': xpath_text(subtitle, './src'), 58 }] 59 60 formats = [] 61 for quality in item.findall('./qualities/quality'): 62 format_id = xpath_text(quality, './title') 63 fmt = { 64 'url': 'rtmp://wafla.walla.co.il/vod', 65 'play_path': xpath_text(quality, './src'), 66 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf', 67 'page_url': url, 68 'ext': 'flv', 69 'format_id': xpath_text(quality, './title'), 70 } 71 m = re.search(r'^(?P<height>\d+)[Pp]', format_id) 72 if m: 73 fmt['height'] = int(m.group('height')) 74 formats.append(fmt) 75 self._sort_formats(formats) 76 77 return { 78 'id': video_id, 79 'display_id': display_id, 80 'title': title, 81 'description': description, 82 'thumbnail': thumbnail, 83 'duration': duration, 84 'formats': formats, 85 'subtitles': subtitles, 86 }