rtlnl.py (5896B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 int_or_none, 7 parse_duration, 8 ) 9 10 11 class RtlNlIE(InfoExtractor): 12 IE_NAME = 'rtl.nl' 13 IE_DESC = 'rtl.nl and rtlxl.nl' 14 _VALID_URL = r'''(?x) 15 https?://(?:(?:www|static)\.)? 16 (?: 17 rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| 18 rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| 19 embed\.rtl\.nl/\#uuid= 20 ) 21 (?P<id>[0-9a-f-]+)''' 22 23 _TESTS = [{ 24 # new URL schema 25 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', 26 'md5': '490428f1187b60d714f34e1f2e3af0b6', 27 'info_dict': { 28 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', 29 'ext': 'mp4', 30 'title': 'RTL Nieuws', 31 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 32 'timestamp': 1593293400, 33 'upload_date': '20200627', 34 'duration': 661.08, 35 }, 36 }, { 37 # old URL schema 38 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 39 'md5': '473d1946c1fdd050b2c0161a4b13c373', 40 'info_dict': { 41 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 42 'ext': 'mp4', 43 'title': 'RTL Nieuws', 44 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 45 'timestamp': 1461951000, 46 'upload_date': '20160429', 47 'duration': 1167.96, 48 }, 49 'skip': '404', 50 }, { 51 # best format available a3t 52 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', 53 'md5': 'dea7474214af1271d91ef332fb8be7ea', 54 'info_dict': { 55 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', 56 'ext': 'mp4', 57 'timestamp': 1424039400, 58 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', 59 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', 60 'upload_date': '20150215', 61 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', 62 } 63 }, { 64 # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) 65 # best format available nettv 66 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', 67 'info_dict': { 68 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', 69 'ext': 'mp4', 70 'title': 'RTL Nieuws - Meer beelden van overval juwelier', 71 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', 72 'timestamp': 1437233400, 73 'upload_date': '20150718', 74 'duration': 30.474, 75 }, 76 'params': { 77 'skip_download': True, 78 }, 79 }, { 80 # encrypted m3u8 streams, georestricted 81 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7', 82 'only_matching': True, 83 }, { 84 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', 85 'only_matching': True, 86 }, { 87 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', 88 'only_matching': True, 89 }, { 90 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/', 91 'only_matching': True, 92 }, { 93 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', 94 'only_matching': True, 95 }, { 96 # new embed URL schema 97 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', 98 'only_matching': True, 99 }] 100 101 def _real_extract(self, url): 102 uuid = self._match_id(url) 103 info = self._download_json( 104 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, 105 uuid) 106 107 material = info['material'][0] 108 title = info['abstracts'][0]['name'] 109 subtitle = material.get('title') 110 if subtitle: 111 title += ' - %s' % subtitle 112 description = material.get('synopsis') 113 114 meta = info.get('meta', {}) 115 116 videopath = material['videopath'] 117 m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath 118 119 formats = self._extract_m3u8_formats( 120 m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) 121 self._sort_formats(formats) 122 123 thumbnails = [] 124 125 for p in ('poster_base_url', '"thumb_base_url"'): 126 if not meta.get(p): 127 continue 128 129 thumbnails.append({ 130 'url': self._proto_relative_url(meta[p] + uuid), 131 'width': int_or_none(self._search_regex( 132 r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), 133 'height': int_or_none(self._search_regex( 134 r'/sz=[0-9]+x([0-9]+)', 135 meta[p], 'thumbnail height', fatal=False)) 136 }) 137 138 return { 139 'id': uuid, 140 'title': title, 141 'formats': formats, 142 'timestamp': material['original_date'], 143 'description': description, 144 'duration': parse_duration(material.get('duration')), 145 'thumbnails': thumbnails, 146 }