youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

ndr.py (16258B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     determine_ext,
      9     int_or_none,
     10     merge_dicts,
     11     parse_iso8601,
     12     qualities,
     13     try_get,
     14     urljoin,
     15 )
     16 
     17 
     18 class NDRBaseIE(InfoExtractor):
     19     def _real_extract(self, url):
     20         mobj = re.match(self._VALID_URL, url)
     21         display_id = next(group for group in mobj.groups() if group)
     22         webpage = self._download_webpage(url, display_id)
     23         return self._extract_embed(webpage, display_id)
     24 
     25 
     26 class NDRIE(NDRBaseIE):
     27     IE_NAME = 'ndr'
     28     IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
     29     _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
     30     _TESTS = [{
     31         # httpVideo, same content id
     32         'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
     33         'md5': '6515bc255dc5c5f8c85bbc38e035a659',
     34         'info_dict': {
     35             'id': 'hafengeburtstag988',
     36             'display_id': 'Party-Poette-und-Parade',
     37             'ext': 'mp4',
     38             'title': 'Party, Pötte und Parade',
     39             'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
     40             'uploader': 'ndrtv',
     41             'timestamp': 1431108900,
     42             'upload_date': '20150510',
     43             'duration': 3498,
     44         },
     45         'params': {
     46             'skip_download': True,
     47         },
     48     }, {
     49         # httpVideo, different content id
     50         'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
     51         'md5': '1043ff203eab307f0c51702ec49e9a71',
     52         'info_dict': {
     53             'id': 'osna272',
     54             'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
     55             'ext': 'mp4',
     56             'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
     57             'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
     58             'uploader': 'ndrtv',
     59             'timestamp': 1442059200,
     60             'upload_date': '20150912',
     61             'duration': 510,
     62         },
     63         'params': {
     64             'skip_download': True,
     65         },
     66     }, {
     67         # httpAudio, same content id
     68         'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
     69         'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
     70         'info_dict': {
     71             'id': 'audio51535',
     72             'display_id': 'La-Valette-entgeht-der-Hinrichtung',
     73             'ext': 'mp3',
     74             'title': 'La Valette entgeht der Hinrichtung',
     75             'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
     76             'uploader': 'ndrinfo',
     77             'timestamp': 1290626100,
     78             'upload_date': '20140729',
     79             'duration': 884,
     80         },
     81         'params': {
     82             'skip_download': True,
     83         },
     84     }, {
     85         # with subtitles
     86         'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
     87         'info_dict': {
     88             'id': 'extra18674',
     89             'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
     90             'ext': 'mp4',
     91             'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
     92             'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
     93             'uploader': 'ndrtv',
     94             'upload_date': '20201113',
     95             'duration': 1749,
     96             'subtitles': {
     97                 'de': [{
     98                     'ext': 'ttml',
     99                     'url': r're:^https://www\.ndr\.de.+',
    100                 }],
    101             },
    102         },
    103         'params': {
    104             'skip_download': True,
    105         },
    106         'expected_warnings': ['Unable to download f4m manifest'],
    107     }, {
    108         'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
    109         'only_matching': True,
    110     }]
    111 
    112     def _extract_embed(self, webpage, display_id):
    113         embed_url = self._html_search_meta(
    114             'embedURL', webpage, 'embed URL',
    115             default=None) or self._search_regex(
    116             r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
    117             'embed URL', group='url')
    118         description = self._search_regex(
    119             r'<p[^>]+itemprop="description">([^<]+)</p>',
    120             webpage, 'description', default=None) or self._og_search_description(webpage)
    121         timestamp = parse_iso8601(
    122             self._search_regex(
    123                 r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
    124                 webpage, 'upload date', default=None))
    125         info = self._search_json_ld(webpage, display_id, default={})
    126         return merge_dicts({
    127             '_type': 'url_transparent',
    128             'url': embed_url,
    129             'display_id': display_id,
    130             'description': description,
    131             'timestamp': timestamp,
    132         }, info)
    133 
    134 
    135 class NJoyIE(NDRBaseIE):
    136     IE_NAME = 'njoy'
    137     IE_DESC = 'N-JOY'
    138     _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
    139     _TESTS = [{
    140         # httpVideo, same content id
    141         'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
    142         'md5': 'cb63be60cd6f9dd75218803146d8dc67',
    143         'info_dict': {
    144             'id': 'comedycontest2480',
    145             'display_id': 'Benaissa-beim-NDR-Comedy-Contest',
    146             'ext': 'mp4',
    147             'title': 'Benaissa beim NDR Comedy Contest',
    148             'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39',
    149             'uploader': 'ndrtv',
    150             'upload_date': '20141129',
    151             'duration': 654,
    152         },
    153         'params': {
    154             'skip_download': True,
    155         },
    156     }, {
    157         # httpVideo, different content id
    158         'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
    159         'md5': '417660fffa90e6df2fda19f1b40a64d8',
    160         'info_dict': {
    161             'id': 'dockville882',
    162             'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
    163             'ext': 'mp4',
    164             'title': '"Ich hab noch nie" mit Felix Jaehn',
    165             'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
    166             'uploader': 'njoy',
    167             'upload_date': '20150822',
    168             'duration': 211,
    169         },
    170         'params': {
    171             'skip_download': True,
    172         },
    173     }, {
    174         'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html',
    175         'only_matching': True,
    176     }]
    177 
    178     def _extract_embed(self, webpage, display_id):
    179         video_id = self._search_regex(
    180             r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
    181         description = self._search_regex(
    182             r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
    183             webpage, 'description', fatal=False)
    184         return {
    185             '_type': 'url_transparent',
    186             'ie_key': 'NDREmbedBase',
    187             'url': 'ndr:%s' % video_id,
    188             'display_id': display_id,
    189             'description': description,
    190         }
    191 
    192 
    193 class NDREmbedBaseIE(InfoExtractor):
    194     IE_NAME = 'ndr:embed:base'
    195     _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
    196     _TESTS = [{
    197         'url': 'ndr:soundcheck3366',
    198         'only_matching': True,
    199     }, {
    200         'url': 'http://www.ndr.de/soundcheck3366-ppjson.json',
    201         'only_matching': True,
    202     }]
    203 
    204     def _real_extract(self, url):
    205         mobj = re.match(self._VALID_URL, url)
    206         video_id = mobj.group('id') or mobj.group('id_s')
    207 
    208         ppjson = self._download_json(
    209             'http://www.ndr.de/%s-ppjson.json' % video_id, video_id)
    210 
    211         playlist = ppjson['playlist']
    212 
    213         formats = []
    214         quality_key = qualities(('xs', 's', 'm', 'l', 'xl'))
    215 
    216         for format_id, f in playlist.items():
    217             src = f.get('src')
    218             if not src:
    219                 continue
    220             ext = determine_ext(src, None)
    221             if ext == 'f4m':
    222                 formats.extend(self._extract_f4m_formats(
    223                     src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
    224                     f4m_id='hds', fatal=False))
    225             elif ext == 'm3u8':
    226                 formats.extend(self._extract_m3u8_formats(
    227                     src, video_id, 'mp4', m3u8_id='hls',
    228                     entry_protocol='m3u8_native', fatal=False))
    229             else:
    230                 quality = f.get('quality')
    231                 ff = {
    232                     'url': src,
    233                     'format_id': quality or format_id,
    234                     'quality': quality_key(quality),
    235                 }
    236                 type_ = f.get('type')
    237                 if type_ and type_.split('/')[0] == 'audio':
    238                     ff['vcodec'] = 'none'
    239                     ff['ext'] = ext or 'mp3'
    240                 formats.append(ff)
    241         self._sort_formats(formats)
    242 
    243         config = playlist['config']
    244 
    245         live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive']
    246         title = config['title']
    247         if live:
    248             title = self._live_title(title)
    249         uploader = ppjson.get('config', {}).get('branding')
    250         upload_date = ppjson.get('config', {}).get('publicationDate')
    251         duration = int_or_none(config.get('duration'))
    252 
    253         thumbnails = []
    254         poster = try_get(config, lambda x: x['poster'], dict) or {}
    255         for thumbnail_id, thumbnail in poster.items():
    256             thumbnail_url = urljoin(url, thumbnail.get('src'))
    257             if not thumbnail_url:
    258                 continue
    259             thumbnails.append({
    260                 'id': thumbnail.get('quality') or thumbnail_id,
    261                 'url': thumbnail_url,
    262                 'preference': quality_key(thumbnail.get('quality')),
    263             })
    264 
    265         subtitles = {}
    266         tracks = config.get('tracks')
    267         if tracks and isinstance(tracks, list):
    268             for track in tracks:
    269                 if not isinstance(track, dict):
    270                     continue
    271                 track_url = urljoin(url, track.get('src'))
    272                 if not track_url:
    273                     continue
    274                 subtitles.setdefault(track.get('srclang') or 'de', []).append({
    275                     'url': track_url,
    276                     'ext': 'ttml',
    277                 })
    278 
    279         return {
    280             'id': video_id,
    281             'title': title,
    282             'is_live': live,
    283             'uploader': uploader if uploader != '-' else None,
    284             'upload_date': upload_date[0:8] if upload_date else None,
    285             'duration': duration,
    286             'thumbnails': thumbnails,
    287             'formats': formats,
    288             'subtitles': subtitles,
    289         }
    290 
    291 
    292 class NDREmbedIE(NDREmbedBaseIE):
    293     IE_NAME = 'ndr:embed'
    294     _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
    295     _TESTS = [{
    296         'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
    297         'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
    298         'info_dict': {
    299             'id': 'ndraktuell28488',
    300             'ext': 'mp4',
    301             'title': 'Norddeutschland begrüßt Flüchtlinge',
    302             'is_live': False,
    303             'uploader': 'ndrtv',
    304             'upload_date': '20150907',
    305             'duration': 132,
    306         },
    307     }, {
    308         'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
    309         'md5': '002085c44bae38802d94ae5802a36e78',
    310         'info_dict': {
    311             'id': 'soundcheck3366',
    312             'ext': 'mp4',
    313             'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen',
    314             'is_live': False,
    315             'uploader': 'ndr2',
    316             'upload_date': '20150912',
    317             'duration': 3554,
    318         },
    319         'params': {
    320             'skip_download': True,
    321         },
    322     }, {
    323         'url': 'http://www.ndr.de/info/audio51535-player.html',
    324         'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
    325         'info_dict': {
    326             'id': 'audio51535',
    327             'ext': 'mp3',
    328             'title': 'La Valette entgeht der Hinrichtung',
    329             'is_live': False,
    330             'uploader': 'ndrinfo',
    331             'upload_date': '20140729',
    332             'duration': 884,
    333         },
    334         'params': {
    335             'skip_download': True,
    336         },
    337     }, {
    338         'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html',
    339         'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c',
    340         'info_dict': {
    341             'id': 'visite11010',
    342             'ext': 'mp4',
    343             'title': 'Visite - die ganze Sendung',
    344             'is_live': False,
    345             'uploader': 'ndrtv',
    346             'upload_date': '20150902',
    347             'duration': 3525,
    348         },
    349         'params': {
    350             'skip_download': True,
    351         },
    352     }, {
    353         # httpVideoLive
    354         'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
    355         'info_dict': {
    356             'id': 'livestream217',
    357             'ext': 'flv',
    358             'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
    359             'is_live': True,
    360             'upload_date': '20150910',
    361         },
    362         'params': {
    363             'skip_download': True,
    364         },
    365     }, {
    366         'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html',
    367         'only_matching': True,
    368     }, {
    369         'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html',
    370         'only_matching': True,
    371     }, {
    372         'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html',
    373         'only_matching': True,
    374     }, {
    375         'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html',
    376         'only_matching': True,
    377     }, {
    378         'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html',
    379         'only_matching': True,
    380     }, {
    381         'url': 'http://www.ndr.de/fernsehen/doku952-player.html',
    382         'only_matching': True,
    383     }]
    384 
    385 
    386 class NJoyEmbedIE(NDREmbedBaseIE):
    387     IE_NAME = 'njoy:embed'
    388     _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
    389     _TESTS = [{
    390         # httpVideo
    391         'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
    392         'md5': '8483cbfe2320bd4d28a349d62d88bd74',
    393         'info_dict': {
    394             'id': 'doku948',
    395             'ext': 'mp4',
    396             'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
    397             'is_live': False,
    398             'upload_date': '20150807',
    399             'duration': 1011,
    400         },
    401     }, {
    402         # httpAudio
    403         'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
    404         'md5': 'd989f80f28ac954430f7b8a48197188a',
    405         'info_dict': {
    406             'id': 'stefanrichter100',
    407             'ext': 'mp3',
    408             'title': 'Interview mit einem Augenzeugen',
    409             'is_live': False,
    410             'uploader': 'njoy',
    411             'upload_date': '20150909',
    412             'duration': 140,
    413         },
    414         'params': {
    415             'skip_download': True,
    416         },
    417     }, {
    418         # httpAudioLive, no explicit ext
    419         'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
    420         'info_dict': {
    421             'id': 'webradioweltweit100',
    422             'ext': 'mp3',
    423             'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
    424             'is_live': True,
    425             'uploader': 'njoy',
    426             'upload_date': '20150810',
    427         },
    428         'params': {
    429             'skip_download': True,
    430         },
    431     }, {
    432         'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html',
    433         'only_matching': True,
    434     }, {
    435         'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html',
    436         'only_matching': True,
    437     }, {
    438         'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html',
    439         'only_matching': True,
    440     }]