youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

servus.py (5663B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     determine_ext,
      7     float_or_none,
      8     int_or_none,
      9     unified_timestamp,
     10     urlencode_postdata,
     11     url_or_none,
     12 )
     13 
     14 
     15 class ServusIE(InfoExtractor):
     16     _VALID_URL = r'''(?x)
     17                     https?://
     18                         (?:www\.)?
     19                         (?:
     20                             servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
     21                             (?:servustv|pm-wissen)\.com/videos
     22                         )
     23                         /(?P<id>[aA]{2}-\w+|\d+-\d+)
     24                     '''
     25     _TESTS = [{
     26         # new URL schema
     27         'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
     28         'md5': '60474d4c21f3eb148838f215c37f02b9',
     29         'info_dict': {
     30             'id': 'AA-1T6VBU5PW1W12',
     31             'ext': 'mp4',
     32             'title': 'Die GrĂ¼nen aus Sicht des Volkes',
     33             'alt_title': 'Talk im Hangar-7 Voxpops Gruene',
     34             'description': 'md5:1247204d85783afe3682644398ff2ec4',
     35             'thumbnail': r're:^https?://.*\.jpg',
     36             'duration': 62.442,
     37             'timestamp': 1605193976,
     38             'upload_date': '20201112',
     39             'series': 'Talk im Hangar-7',
     40             'season': 'Season 9',
     41             'season_number': 9,
     42             'episode': 'Episode 31 - September 14',
     43             'episode_number': 31,
     44         }
     45     }, {
     46         # old URL schema
     47         'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
     48         'only_matching': True,
     49     }, {
     50         'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
     51         'only_matching': True,
     52     }, {
     53         'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
     54         'only_matching': True,
     55     }, {
     56         'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
     57         'only_matching': True,
     58     }, {
     59         'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
     60         'only_matching': True,
     61     }]
     62 
     63     def _real_extract(self, url):
     64         video_id = self._match_id(url).upper()
     65 
     66         token = self._download_json(
     67             'https://auth.redbullmediahouse.com/token', video_id,
     68             'Downloading token', data=urlencode_postdata({
     69                 'grant_type': 'client_credentials',
     70             }), headers={
     71                 'Authorization': 'Basic SVgtMjJYNEhBNFdEM1cxMTpEdDRVSkFLd2ZOMG5IMjB1NGFBWTBmUFpDNlpoQ1EzNA==',
     72             })
     73         access_token = token['access_token']
     74         token_type = token.get('token_type', 'Bearer')
     75 
     76         video = self._download_json(
     77             'https://sparkle-api.liiift.io/api/v1/stv/channels/international/assets/%s' % video_id,
     78             video_id, 'Downloading video JSON', headers={
     79                 'Authorization': '%s %s' % (token_type, access_token),
     80             })
     81 
     82         formats = []
     83         thumbnail = None
     84         for resource in video['resources']:
     85             if not isinstance(resource, dict):
     86                 continue
     87             format_url = url_or_none(resource.get('url'))
     88             if not format_url:
     89                 continue
     90             extension = resource.get('extension')
     91             type_ = resource.get('type')
     92             if extension == 'jpg' or type_ == 'reference_keyframe':
     93                 thumbnail = format_url
     94                 continue
     95             ext = determine_ext(format_url)
     96             if type_ == 'dash' or ext == 'mpd':
     97                 formats.extend(self._extract_mpd_formats(
     98                     format_url, video_id, mpd_id='dash', fatal=False))
     99             elif type_ == 'hls' or ext == 'm3u8':
    100                 formats.extend(self._extract_m3u8_formats(
    101                     format_url, video_id, 'mp4', entry_protocol='m3u8_native',
    102                     m3u8_id='hls', fatal=False))
    103             elif extension == 'mp4' or ext == 'mp4':
    104                 formats.append({
    105                     'url': format_url,
    106                     'format_id': type_,
    107                     'width': int_or_none(resource.get('width')),
    108                     'height': int_or_none(resource.get('height')),
    109                 })
    110         self._sort_formats(formats)
    111 
    112         attrs = {}
    113         for attribute in video['attributes']:
    114             if not isinstance(attribute, dict):
    115                 continue
    116             key = attribute.get('fieldKey')
    117             value = attribute.get('fieldValue')
    118             if not key or not value:
    119                 continue
    120             attrs[key] = value
    121 
    122         title = attrs.get('title_stv') or video_id
    123         alt_title = attrs.get('title')
    124         description = attrs.get('long_description') or attrs.get('short_description')
    125         series = attrs.get('label')
    126         season = attrs.get('season')
    127         episode = attrs.get('chapter')
    128         duration = float_or_none(attrs.get('duration'), scale=1000)
    129         season_number = int_or_none(self._search_regex(
    130             r'Season (\d+)', season or '', 'season number', default=None))
    131         episode_number = int_or_none(self._search_regex(
    132             r'Episode (\d+)', episode or '', 'episode number', default=None))
    133 
    134         return {
    135             'id': video_id,
    136             'title': title,
    137             'alt_title': alt_title,
    138             'description': description,
    139             'thumbnail': thumbnail,
    140             'duration': duration,
    141             'timestamp': unified_timestamp(video.get('lastPublished')),
    142             'series': series,
    143             'season': season,
    144             'season_number': season_number,
    145             'episode': episode,
    146             'episode_number': episode_number,
    147             'formats': formats,
    148         }