webcaster.py (3833B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 determine_ext, 9 xpath_text, 10 ) 11 12 13 class WebcasterIE(InfoExtractor): 14 _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)' 15 _TESTS = [{ 16 # http://video.khl.ru/quotes/393859 17 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18', 18 'md5': '0c162f67443f30916ff1c89425dcd4cd', 19 'info_dict': { 20 'id': 'c8cefd240aa593681c8d068cff59f407_hd', 21 'ext': 'mp4', 22 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода', 23 'thumbnail': r're:^https?://.*\.jpg$', 24 }, 25 }, { 26 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126', 27 'only_matching': True, 28 }] 29 30 def _real_extract(self, url): 31 video_id = self._match_id(url) 32 33 video = self._download_xml(url, video_id) 34 35 title = xpath_text(video, './/event_name', 'event name', fatal=True) 36 37 def make_id(parts, separator): 38 return separator.join(filter(None, parts)) 39 40 formats = [] 41 for format_id in (None, 'noise'): 42 track_tag = make_id(('track', format_id), '_') 43 for track in video.findall('.//iphone/%s' % track_tag): 44 track_url = track.text 45 if not track_url: 46 continue 47 if determine_ext(track_url) == 'm3u8': 48 m3u8_formats = self._extract_m3u8_formats( 49 track_url, video_id, 'mp4', 50 entry_protocol='m3u8_native', 51 m3u8_id=make_id(('hls', format_id), '-'), fatal=False) 52 for f in m3u8_formats: 53 f.update({ 54 'source_preference': 0 if format_id == 'noise' else 1, 55 'format_note': track.get('title'), 56 }) 57 formats.extend(m3u8_formats) 58 self._sort_formats(formats) 59 60 thumbnail = xpath_text(video, './/image', 'thumbnail') 61 62 return { 63 'id': video_id, 64 'title': title, 65 'thumbnail': thumbnail, 66 'formats': formats, 67 } 68 69 70 class WebcasterFeedIE(InfoExtractor): 71 _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)' 72 _TEST = { 73 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104', 74 'only_matching': True, 75 } 76 77 @staticmethod 78 def _extract_url(ie, webpage): 79 mobj = re.search( 80 r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)', 81 webpage) 82 if mobj: 83 return mobj.group('url') 84 for secure in (True, False): 85 video_url = ie._og_search_video_url( 86 webpage, secure=secure, default=None) 87 if video_url: 88 mobj = re.search( 89 r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)', 90 video_url) 91 if mobj: 92 return mobj.group('url') 93 94 def _real_extract(self, url): 95 video_id = self._match_id(url) 96 97 feed = self._download_xml(url, video_id) 98 99 video_url = xpath_text( 100 feed, ('video_hd', 'video'), 'video url', fatal=True) 101 102 return self.url_result(video_url, WebcasterIE.ie_key())