youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

massengeschmacktv.py (2688B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..utils import (
      7     clean_html,
      8     determine_ext,
      9     int_or_none,
     10     js_to_json,
     11     mimetype2ext,
     12     parse_filesize,
     13 )
     14 
     15 
     16 class MassengeschmackTVIE(InfoExtractor):
     17     IE_NAME = 'massengeschmack.tv'
     18     _VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)'
     19 
     20     _TEST = {
     21         'url': 'https://massengeschmack.tv/play/fktv202',
     22         'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
     23         'info_dict': {
     24             'id': 'fktv202',
     25             'ext': 'mp4',
     26             'title': 'Fernsehkritik-TV - Folge 202',
     27         },
     28     }
     29 
     30     def _real_extract(self, url):
     31         episode = self._match_id(url)
     32 
     33         webpage = self._download_webpage(url, episode)
     34         title = clean_html(self._html_search_regex(
     35             '<h3>([^<]+)</h3>', webpage, 'title'))
     36         thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
     37         sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
     38 
     39         formats = []
     40         for source in sources:
     41             furl = source.get('src')
     42             if not furl:
     43                 continue
     44             furl = self._proto_relative_url(furl)
     45             ext = determine_ext(furl) or mimetype2ext(source.get('type'))
     46             if ext == 'm3u8':
     47                 formats.extend(self._extract_m3u8_formats(
     48                     furl, episode, 'mp4', 'm3u8_native',
     49                     m3u8_id='hls', fatal=False))
     50             else:
     51                 formats.append({
     52                     'url': furl,
     53                     'format_id': determine_ext(furl),
     54                 })
     55 
     56         for (durl, format_id, width, height, filesize) in re.findall(r'''(?x)
     57                                    <a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*?
     58                                    <strong>(?P<format_id>.+?)</strong>.*?
     59                                    <small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small>
     60                                 ''', webpage):
     61             formats.append({
     62                 'url': durl,
     63                 'format_id': format_id,
     64                 'width': int_or_none(width),
     65                 'height': int_or_none(height),
     66                 'filesize': parse_filesize(filesize),
     67                 'vcodec': 'none' if format_id.startswith('Audio') else None,
     68             })
     69 
     70         self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
     71 
     72         return {
     73             'id': episode,
     74             'title': title,
     75             'formats': formats,
     76             'thumbnail': thumbnail,
     77         }