youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

tvnow.py (18470B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import compat_str
      8 from ..utils import (
      9     ExtractorError,
     10     int_or_none,
     11     parse_iso8601,
     12     parse_duration,
     13     str_or_none,
     14     update_url_query,
     15     urljoin,
     16 )
     17 
     18 
     19 class TVNowBaseIE(InfoExtractor):
     20     _VIDEO_FIELDS = (
     21         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
     22         'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
     23         'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
     24         'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
     25 
     26     def _call_api(self, path, video_id, query):
     27         return self._download_json(
     28             'https://api.tvnow.de/v3/' + path, video_id, query=query)
     29 
     30     def _extract_video(self, info, display_id):
     31         video_id = compat_str(info['id'])
     32         title = info['title']
     33 
     34         paths = []
     35         for manifest_url in (info.get('manifest') or {}).values():
     36             if not manifest_url:
     37                 continue
     38             manifest_url = update_url_query(manifest_url, {'filter': ''})
     39             path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
     40             if path in paths:
     41                 continue
     42             paths.append(path)
     43 
     44             def url_repl(proto, suffix):
     45                 return re.sub(
     46                     r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
     47                         r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
     48                         '.ism/' + suffix, manifest_url))
     49 
     50             def make_urls(proto, suffix):
     51                 urls = [url_repl(proto, suffix)]
     52                 hd_url = urls[0].replace('/manifest/', '/ngvod/')
     53                 if hd_url != urls[0]:
     54                     urls.append(hd_url)
     55                 return urls
     56 
     57             for man_url in make_urls('dash', '.mpd'):
     58                 formats = self._extract_mpd_formats(
     59                     man_url, video_id, mpd_id='dash', fatal=False)
     60             for man_url in make_urls('hss', 'Manifest'):
     61                 formats.extend(self._extract_ism_formats(
     62                     man_url, video_id, ism_id='mss', fatal=False))
     63             for man_url in make_urls('hls', '.m3u8'):
     64                 formats.extend(self._extract_m3u8_formats(
     65                     man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
     66                     fatal=False))
     67             if formats:
     68                 break
     69         else:
     70             if info.get('isDrm'):
     71                 raise ExtractorError(
     72                     'Video %s is DRM protected' % video_id, expected=True)
     73             if info.get('geoblocked'):
     74                 raise self.raise_geo_restricted()
     75             if not info.get('free', True):
     76                 raise ExtractorError(
     77                     'Video %s is not available for free' % video_id, expected=True)
     78         self._sort_formats(formats)
     79 
     80         description = info.get('articleLong') or info.get('articleShort')
     81         timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
     82         duration = parse_duration(info.get('duration'))
     83 
     84         f = info.get('format', {})
     85 
     86         thumbnails = [{
     87             'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
     88         }]
     89         thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
     90         if thumbnail:
     91             thumbnails.append({
     92                 'url': thumbnail,
     93             })
     94 
     95         return {
     96             'id': video_id,
     97             'display_id': display_id,
     98             'title': title,
     99             'description': description,
    100             'thumbnails': thumbnails,
    101             'timestamp': timestamp,
    102             'duration': duration,
    103             'series': f.get('title'),
    104             'season_number': int_or_none(info.get('season')),
    105             'episode_number': int_or_none(info.get('episode')),
    106             'episode': title,
    107             'formats': formats,
    108         }
    109 
    110 
    111 class TVNowIE(TVNowBaseIE):
    112     _VALID_URL = r'''(?x)
    113                     https?://
    114                         (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
    115                         (?P<show_id>[^/]+)/
    116                         (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
    117                     '''
    118 
    119     @classmethod
    120     def suitable(cls, url):
    121         return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
    122                 else super(TVNowIE, cls).suitable(url))
    123 
    124     _TESTS = [{
    125         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
    126         'info_dict': {
    127             'id': '331082',
    128             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
    129             'ext': 'mp4',
    130             'title': 'Der neue Porsche 911 GT 3',
    131             'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
    132             'timestamp': 1495994400,
    133             'upload_date': '20170528',
    134             'duration': 5283,
    135             'series': 'GRIP - Das Motormagazin',
    136             'season_number': 14,
    137             'episode_number': 405,
    138             'episode': 'Der neue Porsche 911 GT 3',
    139         },
    140     }, {
    141         # rtl2
    142         'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
    143         'only_matching': True,
    144     }, {
    145         # rtlnitro
    146         'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
    147         'only_matching': True,
    148     }, {
    149         # superrtl
    150         'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
    151         'only_matching': True,
    152     }, {
    153         # ntv
    154         'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
    155         'only_matching': True,
    156     }, {
    157         # vox
    158         'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
    159         'only_matching': True,
    160     }, {
    161         # rtlplus
    162         'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
    163         'only_matching': True,
    164     }, {
    165         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
    166         'only_matching': True,
    167     }]
    168 
    169     def _real_extract(self, url):
    170         mobj = re.match(self._VALID_URL, url)
    171         display_id = '%s/%s' % mobj.group(2, 3)
    172 
    173         info = self._call_api(
    174             'movies/' + display_id, display_id, query={
    175                 'fields': ','.join(self._VIDEO_FIELDS),
    176             })
    177 
    178         return self._extract_video(info, display_id)
    179 
    180 
    181 class TVNowNewIE(InfoExtractor):
    182     _VALID_URL = r'''(?x)
    183                     (?P<base_url>https?://
    184                         (?:www\.)?tvnow\.(?:de|at|ch)/
    185                         (?:shows|serien))/
    186                         (?P<show>[^/]+)-\d+/
    187                         [^/]+/
    188                         episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
    189                     '''
    190 
    191     _TESTS = [{
    192         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
    193         'only_matching': True,
    194     }]
    195 
    196     def _real_extract(self, url):
    197         mobj = re.match(self._VALID_URL, url)
    198         base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
    199         show, episode = mobj.group('show', 'episode')
    200         return self.url_result(
    201             # Rewrite new URLs to the old format and use extraction via old API
    202             # at api.tvnow.de as a loophole for bypassing premium content checks
    203             '%s/%s/%s' % (base_url, show, episode),
    204             ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
    205 
    206 
    207 class TVNowNewBaseIE(InfoExtractor):
    208     def _call_api(self, path, video_id, query={}):
    209         result = self._download_json(
    210             'https://apigw.tvnow.de/module/' + path, video_id, query=query)
    211         error = result.get('error')
    212         if error:
    213             raise ExtractorError(
    214                 '%s said: %s' % (self.IE_NAME, error), expected=True)
    215         return result
    216 
    217 
    218 r"""
    219 TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
    220 when api.tvnow.de is shut down. This version can't bypass premium checks though.
    221 class TVNowIE(TVNowNewBaseIE):
    222     _VALID_URL = r'''(?x)
    223                     https?://
    224                         (?:www\.)?tvnow\.(?:de|at|ch)/
    225                         (?:shows|serien)/[^/]+/
    226                         (?:[^/]+/)+
    227                         (?P<display_id>[^/?$&]+)-(?P<id>\d+)
    228                     '''
    229 
    230     _TESTS = [{
    231         # episode with annual navigation
    232         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
    233         'info_dict': {
    234             'id': '331082',
    235             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
    236             'ext': 'mp4',
    237             'title': 'Der neue Porsche 911 GT 3',
    238             'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
    239             'thumbnail': r're:^https?://.*\.jpg$',
    240             'timestamp': 1495994400,
    241             'upload_date': '20170528',
    242             'duration': 5283,
    243             'series': 'GRIP - Das Motormagazin',
    244             'season_number': 14,
    245             'episode_number': 405,
    246             'episode': 'Der neue Porsche 911 GT 3',
    247         },
    248     }, {
    249         # rtl2, episode with season navigation
    250         'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
    251         'only_matching': True,
    252     }, {
    253         # rtlnitro
    254         'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
    255         'only_matching': True,
    256     }, {
    257         # superrtl
    258         'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
    259         'only_matching': True,
    260     }, {
    261         # ntv
    262         'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
    263         'only_matching': True,
    264     }, {
    265         # vox
    266         'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
    267         'only_matching': True,
    268     }, {
    269         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
    270         'only_matching': True,
    271     }]
    272 
    273     def _extract_video(self, info, url, display_id):
    274         config = info['config']
    275         source = config['source']
    276 
    277         video_id = compat_str(info.get('id') or source['videoId'])
    278         title = source['title'].strip()
    279 
    280         paths = []
    281         for manifest_url in (info.get('manifest') or {}).values():
    282             if not manifest_url:
    283                 continue
    284             manifest_url = update_url_query(manifest_url, {'filter': ''})
    285             path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
    286             if path in paths:
    287                 continue
    288             paths.append(path)
    289 
    290             def url_repl(proto, suffix):
    291                 return re.sub(
    292                     r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
    293                         r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
    294                         '.ism/' + suffix, manifest_url))
    295 
    296             formats = self._extract_mpd_formats(
    297                 url_repl('dash', '.mpd'), video_id,
    298                 mpd_id='dash', fatal=False)
    299             formats.extend(self._extract_ism_formats(
    300                 url_repl('hss', 'Manifest'),
    301                 video_id, ism_id='mss', fatal=False))
    302             formats.extend(self._extract_m3u8_formats(
    303                 url_repl('hls', '.m3u8'), video_id, 'mp4',
    304                 'm3u8_native', m3u8_id='hls', fatal=False))
    305             if formats:
    306                 break
    307         else:
    308             if try_get(info, lambda x: x['rights']['isDrm']):
    309                 raise ExtractorError(
    310                     'Video %s is DRM protected' % video_id, expected=True)
    311             if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
    312                 raise self.raise_geo_restricted()
    313             if not info.get('free', True):
    314                 raise ExtractorError(
    315                     'Video %s is not available for free' % video_id, expected=True)
    316         self._sort_formats(formats)
    317 
    318         description = source.get('description')
    319         thumbnail = url_or_none(source.get('poster'))
    320         timestamp = unified_timestamp(source.get('previewStart'))
    321         duration = parse_duration(source.get('length'))
    322 
    323         series = source.get('format')
    324         season_number = int_or_none(self._search_regex(
    325             r'staffel-(\d+)', url, 'season number', default=None))
    326         episode_number = int_or_none(self._search_regex(
    327             r'episode-(\d+)', url, 'episode number', default=None))
    328 
    329         return {
    330             'id': video_id,
    331             'display_id': display_id,
    332             'title': title,
    333             'description': description,
    334             'thumbnail': thumbnail,
    335             'timestamp': timestamp,
    336             'duration': duration,
    337             'series': series,
    338             'season_number': season_number,
    339             'episode_number': episode_number,
    340             'episode': title,
    341             'formats': formats,
    342         }
    343 
    344     def _real_extract(self, url):
    345         display_id, video_id = re.match(self._VALID_URL, url).groups()
    346         info = self._call_api('player/' + video_id, video_id)
    347         return self._extract_video(info, video_id, display_id)
    348 """
    349 
    350 
    351 class TVNowListBaseIE(TVNowNewBaseIE):
    352     _SHOW_VALID_URL = r'''(?x)
    353                     (?P<base_url>
    354                         https?://
    355                             (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
    356                             [^/?#&]+-(?P<show_id>\d+)
    357                     )
    358                     '''
    359 
    360     @classmethod
    361     def suitable(cls, url):
    362         return (False if TVNowNewIE.suitable(url)
    363                 else super(TVNowListBaseIE, cls).suitable(url))
    364 
    365     def _extract_items(self, url, show_id, list_id, query):
    366         items = self._call_api(
    367             'teaserrow/format/episode/' + show_id, list_id,
    368             query=query)['items']
    369 
    370         entries = []
    371         for item in items:
    372             if not isinstance(item, dict):
    373                 continue
    374             item_url = urljoin(url, item.get('url'))
    375             if not item_url:
    376                 continue
    377             video_id = str_or_none(item.get('id') or item.get('videoId'))
    378             item_title = item.get('subheadline') or item.get('text')
    379             entries.append(self.url_result(
    380                 item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
    381                 video_title=item_title))
    382 
    383         return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
    384 
    385 
    386 class TVNowSeasonIE(TVNowListBaseIE):
    387     _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
    388     _TESTS = [{
    389         'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
    390         'info_dict': {
    391             'id': '1815/13',
    392         },
    393         'playlist_mincount': 22,
    394     }]
    395 
    396     def _real_extract(self, url):
    397         _, show_id, season_id = re.match(self._VALID_URL, url).groups()
    398         return self._extract_items(
    399             url, show_id, season_id, {'season': season_id})
    400 
    401 
    402 class TVNowAnnualIE(TVNowListBaseIE):
    403     _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
    404     _TESTS = [{
    405         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
    406         'info_dict': {
    407             'id': '1669/2017-05',
    408         },
    409         'playlist_mincount': 2,
    410     }]
    411 
    412     def _real_extract(self, url):
    413         _, show_id, year, month = re.match(self._VALID_URL, url).groups()
    414         return self._extract_items(
    415             url, show_id, '%s-%s' % (year, month), {
    416                 'year': int(year),
    417                 'month': int(month),
    418             })
    419 
    420 
    421 class TVNowShowIE(TVNowListBaseIE):
    422     _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
    423     _TESTS = [{
    424         # annual navigationType
    425         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
    426         'info_dict': {
    427             'id': '1669',
    428         },
    429         'playlist_mincount': 73,
    430     }, {
    431         # season navigationType
    432         'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
    433         'info_dict': {
    434             'id': '11471',
    435         },
    436         'playlist_mincount': 3,
    437     }]
    438 
    439     @classmethod
    440     def suitable(cls, url):
    441         return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
    442                 else super(TVNowShowIE, cls).suitable(url))
    443 
    444     def _real_extract(self, url):
    445         base_url, show_id = re.match(self._VALID_URL, url).groups()
    446 
    447         result = self._call_api(
    448             'teaserrow/format/navigation/' + show_id, show_id)
    449 
    450         items = result['items']
    451 
    452         entries = []
    453         navigation = result.get('navigationType')
    454         if navigation == 'annual':
    455             for item in items:
    456                 if not isinstance(item, dict):
    457                     continue
    458                 year = int_or_none(item.get('year'))
    459                 if year is None:
    460                     continue
    461                 months = item.get('months')
    462                 if not isinstance(months, list):
    463                     continue
    464                 for month_dict in months:
    465                     if not isinstance(month_dict, dict) or not month_dict:
    466                         continue
    467                     month_number = int_or_none(list(month_dict.keys())[0])
    468                     if month_number is None:
    469                         continue
    470                     entries.append(self.url_result(
    471                         '%s/%04d-%02d' % (base_url, year, month_number),
    472                         ie=TVNowAnnualIE.ie_key()))
    473         elif navigation == 'season':
    474             for item in items:
    475                 if not isinstance(item, dict):
    476                     continue
    477                 season_number = int_or_none(item.get('season'))
    478                 if season_number is None:
    479                     continue
    480                 entries.append(self.url_result(
    481                     '%s/staffel-%d' % (base_url, season_number),
    482                     ie=TVNowSeasonIE.ie_key()))
    483         else:
    484             raise ExtractorError('Unknown navigationType')
    485 
    486         return self.playlist_result(entries, show_id)