youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

dplay.py (14744B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import json
      5 import re
      6 
      7 from .common import InfoExtractor
      8 from ..compat import compat_HTTPError
      9 from ..utils import (
     10     determine_ext,
     11     ExtractorError,
     12     float_or_none,
     13     int_or_none,
     14     strip_or_none,
     15     unified_timestamp,
     16 )
     17 
     18 
     19 class DPlayIE(InfoExtractor):
     20     _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
     21     _VALID_URL = r'''(?x)https?://
     22         (?P<domain>
     23             (?:www\.)?(?P<host>d
     24                 (?:
     25                     play\.(?P<country>dk|fi|jp|se|no)|
     26                     iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
     27                 )
     28             )|
     29             (?P<subdomain_country>es|it)\.dplay\.com
     30         )/[^/]+''' + _PATH_REGEX
     31 
     32     _TESTS = [{
     33         # non geo restricted, via secure api, unsigned download hls URL
     34         'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
     35         'info_dict': {
     36             'id': '13628',
     37             'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
     38             'ext': 'mp4',
     39             'title': 'Svensken lär sig njuta av livet',
     40             'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
     41             'duration': 2649.856,
     42             'timestamp': 1365453720,
     43             'upload_date': '20130408',
     44             'creator': 'Kanal 5',
     45             'series': 'Nugammalt - 77 händelser som format Sverige',
     46             'season_number': 1,
     47             'episode_number': 1,
     48         },
     49         'params': {
     50             'format': 'bestvideo',
     51             'skip_download': True,
     52         },
     53     }, {
     54         # geo restricted, via secure api, unsigned download hls URL
     55         'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
     56         'info_dict': {
     57             'id': '104465',
     58             'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
     59             'ext': 'mp4',
     60             'title': 'Ted Bundy: Mind Of A Monster',
     61             'description': 'md5:8b780f6f18de4dae631668b8a9637995',
     62             'duration': 5290.027,
     63             'timestamp': 1570694400,
     64             'upload_date': '20191010',
     65             'creator': 'ID - Investigation Discovery',
     66             'series': 'Ted Bundy: Mind Of A Monster',
     67             'season_number': 1,
     68             'episode_number': 1,
     69         },
     70         'params': {
     71             'format': 'bestvideo',
     72             'skip_download': True,
     73         },
     74     }, {
     75         # disco-api
     76         'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
     77         'info_dict': {
     78             'id': '40206',
     79             'display_id': 'i-kongens-klr/sesong-1-episode-7',
     80             'ext': 'mp4',
     81             'title': 'Episode 7',
     82             'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
     83             'duration': 2611.16,
     84             'timestamp': 1516726800,
     85             'upload_date': '20180123',
     86             'series': 'I kongens klær',
     87             'season_number': 1,
     88             'episode_number': 7,
     89         },
     90         'params': {
     91             'format': 'bestvideo',
     92             'skip_download': True,
     93         },
     94         'skip': 'Available for Premium users',
     95     }, {
     96         'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
     97         'md5': '2b808ffb00fc47b884a172ca5d13053c',
     98         'info_dict': {
     99             'id': '6918',
    100             'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
    101             'ext': 'mp4',
    102             'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
    103             'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
    104             'thumbnail': r're:^https?://.*\.jpe?g',
    105             'upload_date': '20160524',
    106             'timestamp': 1464076800,
    107             'series': 'Biografie imbarazzanti',
    108             'season_number': 1,
    109             'episode': 'Episode 1',
    110             'episode_number': 1,
    111         },
    112     }, {
    113         'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
    114         'info_dict': {
    115             'id': '21652',
    116             'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
    117             'ext': 'mp4',
    118             'title': 'Episodio 1',
    119             'description': 'md5:b9dcff2071086e003737485210675f69',
    120             'thumbnail': r're:^https?://.*\.png',
    121             'upload_date': '20180709',
    122             'timestamp': 1531173540,
    123             'series': 'La fiebre del oro',
    124             'season_number': 8,
    125             'episode': 'Episode 1',
    126             'episode_number': 1,
    127         },
    128         'params': {
    129             'skip_download': True,
    130         },
    131     }, {
    132         'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
    133         'only_matching': True,
    134     }, {
    135         'url': 'https://www.dplay.jp/video/gold-rush/24086',
    136         'only_matching': True,
    137     }, {
    138         'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
    139         'only_matching': True,
    140     }, {
    141         'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
    142         'only_matching': True,
    143     }, {
    144         'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
    145         'only_matching': True,
    146     }, {
    147         'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
    148         'only_matching': True,
    149     }, {
    150         'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
    151         'only_matching': True,
    152     }, {
    153         'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
    154         'only_matching': True,
    155     }]
    156 
    157     def _process_errors(self, e, geo_countries):
    158         info = self._parse_json(e.cause.read().decode('utf-8'), None)
    159         error = info['errors'][0]
    160         error_code = error.get('code')
    161         if error_code == 'access.denied.geoblocked':
    162             self.raise_geo_restricted(countries=geo_countries)
    163         elif error_code in ('access.denied.missingpackage', 'invalid.token'):
    164             raise ExtractorError(
    165                 'This video is only available for registered users. You may want to use --cookies.', expected=True)
    166         raise ExtractorError(info['errors'][0]['detail'], expected=True)
    167 
    168     def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
    169         headers['Authorization'] = 'Bearer ' + self._download_json(
    170             disco_base + 'token', display_id, 'Downloading token',
    171             query={
    172                 'realm': realm,
    173             })['data']['attributes']['token']
    174 
    175     def _download_video_playback_info(self, disco_base, video_id, headers):
    176         streaming = self._download_json(
    177             disco_base + 'playback/videoPlaybackInfo/' + video_id,
    178             video_id, headers=headers)['data']['attributes']['streaming']
    179         streaming_list = []
    180         for format_id, format_dict in streaming.items():
    181             streaming_list.append({
    182                 'type': format_id,
    183                 'url': format_dict.get('url'),
    184             })
    185         return streaming_list
    186 
    187     def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
    188         geo_countries = [country.upper()]
    189         self._initialize_geo_bypass({
    190             'countries': geo_countries,
    191         })
    192         disco_base = 'https://%s/' % disco_host
    193         headers = {
    194             'Referer': url,
    195         }
    196         self._update_disco_api_headers(headers, disco_base, display_id, realm)
    197         try:
    198             video = self._download_json(
    199                 disco_base + 'content/videos/' + display_id, display_id,
    200                 headers=headers, query={
    201                     'fields[channel]': 'name',
    202                     'fields[image]': 'height,src,width',
    203                     'fields[show]': 'name',
    204                     'fields[tag]': 'name',
    205                     'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
    206                     'include': 'images,primaryChannel,show,tags'
    207                 })
    208         except ExtractorError as e:
    209             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
    210                 self._process_errors(e, geo_countries)
    211             raise
    212         video_id = video['data']['id']
    213         info = video['data']['attributes']
    214         title = info['name'].strip()
    215         formats = []
    216         try:
    217             streaming = self._download_video_playback_info(
    218                 disco_base, video_id, headers)
    219         except ExtractorError as e:
    220             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
    221                 self._process_errors(e, geo_countries)
    222             raise
    223         for format_dict in streaming:
    224             if not isinstance(format_dict, dict):
    225                 continue
    226             format_url = format_dict.get('url')
    227             if not format_url:
    228                 continue
    229             format_id = format_dict.get('type')
    230             ext = determine_ext(format_url)
    231             if format_id == 'dash' or ext == 'mpd':
    232                 formats.extend(self._extract_mpd_formats(
    233                     format_url, display_id, mpd_id='dash', fatal=False))
    234             elif format_id == 'hls' or ext == 'm3u8':
    235                 formats.extend(self._extract_m3u8_formats(
    236                     format_url, display_id, 'mp4',
    237                     entry_protocol='m3u8_native', m3u8_id='hls',
    238                     fatal=False))
    239             else:
    240                 formats.append({
    241                     'url': format_url,
    242                     'format_id': format_id,
    243                 })
    244         self._sort_formats(formats)
    245 
    246         creator = series = None
    247         tags = []
    248         thumbnails = []
    249         included = video.get('included') or []
    250         if isinstance(included, list):
    251             for e in included:
    252                 attributes = e.get('attributes')
    253                 if not attributes:
    254                     continue
    255                 e_type = e.get('type')
    256                 if e_type == 'channel':
    257                     creator = attributes.get('name')
    258                 elif e_type == 'image':
    259                     src = attributes.get('src')
    260                     if src:
    261                         thumbnails.append({
    262                             'url': src,
    263                             'width': int_or_none(attributes.get('width')),
    264                             'height': int_or_none(attributes.get('height')),
    265                         })
    266                 if e_type == 'show':
    267                     series = attributes.get('name')
    268                 elif e_type == 'tag':
    269                     name = attributes.get('name')
    270                     if name:
    271                         tags.append(name)
    272 
    273         return {
    274             'id': video_id,
    275             'display_id': display_id,
    276             'title': title,
    277             'description': strip_or_none(info.get('description')),
    278             'duration': float_or_none(info.get('videoDuration'), 1000),
    279             'timestamp': unified_timestamp(info.get('publishStart')),
    280             'series': series,
    281             'season_number': int_or_none(info.get('seasonNumber')),
    282             'episode_number': int_or_none(info.get('episodeNumber')),
    283             'creator': creator,
    284             'tags': tags,
    285             'thumbnails': thumbnails,
    286             'formats': formats,
    287         }
    288 
    289     def _real_extract(self, url):
    290         mobj = re.match(self._VALID_URL, url)
    291         display_id = mobj.group('id')
    292         domain = mobj.group('domain').lstrip('www.')
    293         country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
    294         host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
    295         return self._get_disco_api_info(
    296             url, display_id, host, 'dplay' + country, country)
    297 
    298 
    299 class DiscoveryPlusIE(DPlayIE):
    300     _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
    301     _TESTS = [{
    302         'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
    303         'info_dict': {
    304             'id': '1140794',
    305             'display_id': 'property-brothers-forever-home/food-and-family',
    306             'ext': 'mp4',
    307             'title': 'Food and Family',
    308             'description': 'The brothers help a Richmond family expand their single-level home.',
    309             'duration': 2583.113,
    310             'timestamp': 1609304400,
    311             'upload_date': '20201230',
    312             'creator': 'HGTV',
    313             'series': 'Property Brothers: Forever Home',
    314             'season_number': 1,
    315             'episode_number': 1,
    316         },
    317         'skip': 'Available for Premium users',
    318     }]
    319 
    320     def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
    321         headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
    322 
    323     def _download_video_playback_info(self, disco_base, video_id, headers):
    324         return self._download_json(
    325             disco_base + 'playback/v3/videoPlaybackInfo',
    326             video_id, headers=headers, data=json.dumps({
    327                 'deviceInfo': {
    328                     'adBlocker': False,
    329                 },
    330                 'videoId': video_id,
    331                 'wisteriaProperties': {
    332                     'platform': 'desktop',
    333                     'product': 'dplus_us',
    334                 },
    335             }).encode('utf-8'))['data']['attributes']['streaming']
    336 
    337     def _real_extract(self, url):
    338         display_id = self._match_id(url)
    339         return self._get_disco_api_info(
    340             url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
    341 
    342 
    343 class HGTVDeIE(DPlayIE):
    344     _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
    345     _TESTS = [{
    346         'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
    347         'info_dict': {
    348             'id': '151205',
    349             'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
    350             'ext': 'mp4',
    351             'title': 'Wer braucht schon eine Toilette',
    352             'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
    353             'duration': 1177.024,
    354             'timestamp': 1595705400,
    355             'upload_date': '20200725',
    356             'creator': 'HGTV',
    357             'series': 'Tiny House - klein, aber oho',
    358             'season_number': 3,
    359             'episode_number': 3,
    360         },
    361         'params': {
    362             'format': 'bestvideo',
    363         },
    364     }]
    365 
    366     def _real_extract(self, url):
    367         display_id = self._match_id(url)
    368         return self._get_disco_api_info(
    369             url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')