youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

rtlnl.py (5896B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     int_or_none,
      7     parse_duration,
      8 )
      9 
     10 
     11 class RtlNlIE(InfoExtractor):
     12     IE_NAME = 'rtl.nl'
     13     IE_DESC = 'rtl.nl and rtlxl.nl'
     14     _VALID_URL = r'''(?x)
     15         https?://(?:(?:www|static)\.)?
     16         (?:
     17             rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/|
     18             rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)|
     19             embed\.rtl\.nl/\#uuid=
     20         )
     21         (?P<id>[0-9a-f-]+)'''
     22 
     23     _TESTS = [{
     24         # new URL schema
     25         'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f',
     26         'md5': '490428f1187b60d714f34e1f2e3af0b6',
     27         'info_dict': {
     28             'id': '0bd1384d-d970-3086-98bb-5c104e10c26f',
     29             'ext': 'mp4',
     30             'title': 'RTL Nieuws',
     31             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
     32             'timestamp': 1593293400,
     33             'upload_date': '20200627',
     34             'duration': 661.08,
     35         },
     36     }, {
     37         # old URL schema
     38         'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
     39         'md5': '473d1946c1fdd050b2c0161a4b13c373',
     40         'info_dict': {
     41             'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
     42             'ext': 'mp4',
     43             'title': 'RTL Nieuws',
     44             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
     45             'timestamp': 1461951000,
     46             'upload_date': '20160429',
     47             'duration': 1167.96,
     48         },
     49         'skip': '404',
     50     }, {
     51         # best format available a3t
     52         'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
     53         'md5': 'dea7474214af1271d91ef332fb8be7ea',
     54         'info_dict': {
     55             'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
     56             'ext': 'mp4',
     57             'timestamp': 1424039400,
     58             'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
     59             'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
     60             'upload_date': '20150215',
     61             'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
     62         }
     63     }, {
     64         # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
     65         # best format available nettv
     66         'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
     67         'info_dict': {
     68             'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
     69             'ext': 'mp4',
     70             'title': 'RTL Nieuws - Meer beelden van overval juwelier',
     71             'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
     72             'timestamp': 1437233400,
     73             'upload_date': '20150718',
     74             'duration': 30.474,
     75         },
     76         'params': {
     77             'skip_download': True,
     78         },
     79     }, {
     80         # encrypted m3u8 streams, georestricted
     81         'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
     82         'only_matching': True,
     83     }, {
     84         'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
     85         'only_matching': True,
     86     }, {
     87         'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
     88         'only_matching': True,
     89     }, {
     90         'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
     91         'only_matching': True,
     92     }, {
     93         'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
     94         'only_matching': True,
     95     }, {
     96         # new embed URL schema
     97         'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
     98         'only_matching': True,
     99     }]
    100 
    101     def _real_extract(self, url):
    102         uuid = self._match_id(url)
    103         info = self._download_json(
    104             'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid,
    105             uuid)
    106 
    107         material = info['material'][0]
    108         title = info['abstracts'][0]['name']
    109         subtitle = material.get('title')
    110         if subtitle:
    111             title += ' - %s' % subtitle
    112         description = material.get('synopsis')
    113 
    114         meta = info.get('meta', {})
    115 
    116         videopath = material['videopath']
    117         m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
    118 
    119         formats = self._extract_m3u8_formats(
    120             m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
    121         self._sort_formats(formats)
    122 
    123         thumbnails = []
    124 
    125         for p in ('poster_base_url', '"thumb_base_url"'):
    126             if not meta.get(p):
    127                 continue
    128 
    129             thumbnails.append({
    130                 'url': self._proto_relative_url(meta[p] + uuid),
    131                 'width': int_or_none(self._search_regex(
    132                     r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
    133                 'height': int_or_none(self._search_regex(
    134                     r'/sz=[0-9]+x([0-9]+)',
    135                     meta[p], 'thumbnail height', fatal=False))
    136             })
    137 
    138         return {
    139             'id': uuid,
    140             'title': title,
    141             'formats': formats,
    142             'timestamp': material['original_date'],
    143             'description': description,
    144             'duration': parse_duration(material.get('duration')),
    145             'thumbnails': thumbnails,
    146         }