atvat.py (2512B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 determine_ext, 7 int_or_none, 8 unescapeHTML, 9 ) 10 11 12 class ATVAtIE(InfoExtractor): 13 _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)' 14 _TESTS = [{ 15 'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', 16 'md5': 'c3b6b975fb3150fc628572939df205f2', 17 'info_dict': { 18 'id': '1698447', 19 'ext': 'mp4', 20 'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', 21 } 22 }, { 23 'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', 24 'only_matching': True, 25 }] 26 27 def _real_extract(self, url): 28 display_id = self._match_id(url) 29 webpage = self._download_webpage(url, display_id) 30 video_data = self._parse_json(unescapeHTML(self._search_regex( 31 [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1', 32 r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'], 33 webpage, 'player data', group='json')), 34 display_id)['config']['initial_video'] 35 36 video_id = video_data['id'] 37 video_title = video_data['title'] 38 39 parts = [] 40 for part in video_data.get('parts', []): 41 part_id = part['id'] 42 part_title = part['title'] 43 44 formats = [] 45 for source in part.get('sources', []): 46 source_url = source.get('src') 47 if not source_url: 48 continue 49 ext = determine_ext(source_url) 50 if ext == 'm3u8': 51 formats.extend(self._extract_m3u8_formats( 52 source_url, part_id, 'mp4', 'm3u8_native', 53 m3u8_id='hls', fatal=False)) 54 else: 55 formats.append({ 56 'format_id': source.get('delivery'), 57 'url': source_url, 58 }) 59 self._sort_formats(formats) 60 61 parts.append({ 62 'id': part_id, 63 'title': part_title, 64 'thumbnail': part.get('preview_image_url'), 65 'duration': int_or_none(part.get('duration')), 66 'is_live': part.get('is_livestream'), 67 'formats': formats, 68 }) 69 70 return { 71 '_type': 'multi_video', 72 'id': video_id, 73 'title': video_title, 74 'entries': parts, 75 }