massengeschmacktv.py (2688B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 clean_html, 8 determine_ext, 9 int_or_none, 10 js_to_json, 11 mimetype2ext, 12 parse_filesize, 13 ) 14 15 16 class MassengeschmackTVIE(InfoExtractor): 17 IE_NAME = 'massengeschmack.tv' 18 _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)' 19 20 _TEST = { 21 'url': 'https://massengeschmack.tv/play/fktv202', 22 'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3', 23 'info_dict': { 24 'id': 'fktv202', 25 'ext': 'mp4', 26 'title': 'Fernsehkritik-TV - Folge 202', 27 }, 28 } 29 30 def _real_extract(self, url): 31 episode = self._match_id(url) 32 33 webpage = self._download_webpage(url, episode) 34 title = clean_html(self._html_search_regex( 35 '<h3>([^<]+)</h3>', webpage, 'title')) 36 thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False) 37 sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json) 38 39 formats = [] 40 for source in sources: 41 furl = source.get('src') 42 if not furl: 43 continue 44 furl = self._proto_relative_url(furl) 45 ext = determine_ext(furl) or mimetype2ext(source.get('type')) 46 if ext == 'm3u8': 47 formats.extend(self._extract_m3u8_formats( 48 furl, episode, 'mp4', 'm3u8_native', 49 m3u8_id='hls', fatal=False)) 50 else: 51 formats.append({ 52 'url': furl, 53 'format_id': determine_ext(furl), 54 }) 55 56 for (durl, format_id, width, height, filesize) in re.findall(r'''(?x) 57 <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*? 58 <strong>(?P<format_id>.+?)</strong>.*? 59 <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small> 60 ''', webpage): 61 formats.append({ 62 'url': durl, 63 'format_id': format_id, 64 'width': int_or_none(width), 65 'height': int_or_none(height), 66 'filesize': parse_filesize(filesize), 67 'vcodec': 'none' if format_id.startswith('Audio') else None, 68 }) 69 70 self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr')) 71 72 return { 73 'id': episode, 74 'title': title, 75 'formats': formats, 76 'thumbnail': thumbnail, 77 }