ndr.py (16258B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 determine_ext, 9 int_or_none, 10 merge_dicts, 11 parse_iso8601, 12 qualities, 13 try_get, 14 urljoin, 15 ) 16 17 18 class NDRBaseIE(InfoExtractor): 19 def _real_extract(self, url): 20 mobj = re.match(self._VALID_URL, url) 21 display_id = next(group for group in mobj.groups() if group) 22 webpage = self._download_webpage(url, display_id) 23 return self._extract_embed(webpage, display_id) 24 25 26 class NDRIE(NDRBaseIE): 27 IE_NAME = 'ndr' 28 IE_DESC = 'NDR.de - Norddeutscher Rundfunk' 29 _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' 30 _TESTS = [{ 31 # httpVideo, same content id 32 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', 33 'md5': '6515bc255dc5c5f8c85bbc38e035a659', 34 'info_dict': { 35 'id': 'hafengeburtstag988', 36 'display_id': 'Party-Poette-und-Parade', 37 'ext': 'mp4', 38 'title': 'Party, Pötte und Parade', 39 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c', 40 'uploader': 'ndrtv', 41 'timestamp': 1431108900, 42 'upload_date': '20150510', 43 'duration': 3498, 44 }, 45 'params': { 46 'skip_download': True, 47 }, 48 }, { 49 # httpVideo, different content id 50 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html', 51 'md5': '1043ff203eab307f0c51702ec49e9a71', 52 'info_dict': { 53 'id': 'osna272', 54 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch', 55 'ext': 'mp4', 56 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights', 57 'description': 'md5:32e9b800b3d2d4008103752682d5dc01', 58 'uploader': 'ndrtv', 59 'timestamp': 1442059200, 60 'upload_date': '20150912', 61 'duration': 510, 62 }, 63 'params': { 64 'skip_download': True, 65 }, 66 }, { 67 # httpAudio, same content id 68 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', 69 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', 70 'info_dict': { 71 'id': 'audio51535', 72 'display_id': 'La-Valette-entgeht-der-Hinrichtung', 73 'ext': 'mp3', 74 'title': 'La Valette entgeht der Hinrichtung', 75 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', 76 'uploader': 'ndrinfo', 77 'timestamp': 1290626100, 78 'upload_date': '20140729', 79 'duration': 884, 80 }, 81 'params': { 82 'skip_download': True, 83 }, 84 }, { 85 # with subtitles 86 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html', 87 'info_dict': { 88 'id': 'extra18674', 89 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring', 90 'ext': 'mp4', 91 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring', 92 'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6', 93 'uploader': 'ndrtv', 94 'upload_date': '20201113', 95 'duration': 1749, 96 'subtitles': { 97 'de': [{ 98 'ext': 'ttml', 99 'url': r're:^https://www\.ndr\.de.+', 100 }], 101 }, 102 }, 103 'params': { 104 'skip_download': True, 105 }, 106 'expected_warnings': ['Unable to download f4m manifest'], 107 }, { 108 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html', 109 'only_matching': True, 110 }] 111 112 def _extract_embed(self, webpage, display_id): 113 embed_url = self._html_search_meta( 114 'embedURL', webpage, 'embed URL', 115 default=None) or self._search_regex( 116 r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 117 'embed URL', group='url') 118 description = self._search_regex( 119 r'<p[^>]+itemprop="description">([^<]+)</p>', 120 webpage, 'description', default=None) or self._og_search_description(webpage) 121 timestamp = parse_iso8601( 122 self._search_regex( 123 r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"', 124 webpage, 'upload date', default=None)) 125 info = self._search_json_ld(webpage, display_id, default={}) 126 return merge_dicts({ 127 '_type': 'url_transparent', 128 'url': embed_url, 129 'display_id': display_id, 130 'description': description, 131 'timestamp': timestamp, 132 }, info) 133 134 135 class NJoyIE(NDRBaseIE): 136 IE_NAME = 'njoy' 137 IE_DESC = 'N-JOY' 138 _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' 139 _TESTS = [{ 140 # httpVideo, same content id 141 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', 142 'md5': 'cb63be60cd6f9dd75218803146d8dc67', 143 'info_dict': { 144 'id': 'comedycontest2480', 145 'display_id': 'Benaissa-beim-NDR-Comedy-Contest', 146 'ext': 'mp4', 147 'title': 'Benaissa beim NDR Comedy Contest', 148 'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39', 149 'uploader': 'ndrtv', 150 'upload_date': '20141129', 151 'duration': 654, 152 }, 153 'params': { 154 'skip_download': True, 155 }, 156 }, { 157 # httpVideo, different content id 158 'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html', 159 'md5': '417660fffa90e6df2fda19f1b40a64d8', 160 'info_dict': { 161 'id': 'dockville882', 162 'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-', 163 'ext': 'mp4', 164 'title': '"Ich hab noch nie" mit Felix Jaehn', 165 'description': 'md5:85dd312d53be1b99e1f998a16452a2f3', 166 'uploader': 'njoy', 167 'upload_date': '20150822', 168 'duration': 211, 169 }, 170 'params': { 171 'skip_download': True, 172 }, 173 }, { 174 'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html', 175 'only_matching': True, 176 }] 177 178 def _extract_embed(self, webpage, display_id): 179 video_id = self._search_regex( 180 r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id') 181 description = self._search_regex( 182 r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>', 183 webpage, 'description', fatal=False) 184 return { 185 '_type': 'url_transparent', 186 'ie_key': 'NDREmbedBase', 187 'url': 'ndr:%s' % video_id, 188 'display_id': display_id, 189 'description': description, 190 } 191 192 193 class NDREmbedBaseIE(InfoExtractor): 194 IE_NAME = 'ndr:embed:base' 195 _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)' 196 _TESTS = [{ 197 'url': 'ndr:soundcheck3366', 198 'only_matching': True, 199 }, { 200 'url': 'http://www.ndr.de/soundcheck3366-ppjson.json', 201 'only_matching': True, 202 }] 203 204 def _real_extract(self, url): 205 mobj = re.match(self._VALID_URL, url) 206 video_id = mobj.group('id') or mobj.group('id_s') 207 208 ppjson = self._download_json( 209 'http://www.ndr.de/%s-ppjson.json' % video_id, video_id) 210 211 playlist = ppjson['playlist'] 212 213 formats = [] 214 quality_key = qualities(('xs', 's', 'm', 'l', 'xl')) 215 216 for format_id, f in playlist.items(): 217 src = f.get('src') 218 if not src: 219 continue 220 ext = determine_ext(src, None) 221 if ext == 'f4m': 222 formats.extend(self._extract_f4m_formats( 223 src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 224 f4m_id='hds', fatal=False)) 225 elif ext == 'm3u8': 226 formats.extend(self._extract_m3u8_formats( 227 src, video_id, 'mp4', m3u8_id='hls', 228 entry_protocol='m3u8_native', fatal=False)) 229 else: 230 quality = f.get('quality') 231 ff = { 232 'url': src, 233 'format_id': quality or format_id, 234 'quality': quality_key(quality), 235 } 236 type_ = f.get('type') 237 if type_ and type_.split('/')[0] == 'audio': 238 ff['vcodec'] = 'none' 239 ff['ext'] = ext or 'mp3' 240 formats.append(ff) 241 self._sort_formats(formats) 242 243 config = playlist['config'] 244 245 live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive'] 246 title = config['title'] 247 if live: 248 title = self._live_title(title) 249 uploader = ppjson.get('config', {}).get('branding') 250 upload_date = ppjson.get('config', {}).get('publicationDate') 251 duration = int_or_none(config.get('duration')) 252 253 thumbnails = [] 254 poster = try_get(config, lambda x: x['poster'], dict) or {} 255 for thumbnail_id, thumbnail in poster.items(): 256 thumbnail_url = urljoin(url, thumbnail.get('src')) 257 if not thumbnail_url: 258 continue 259 thumbnails.append({ 260 'id': thumbnail.get('quality') or thumbnail_id, 261 'url': thumbnail_url, 262 'preference': quality_key(thumbnail.get('quality')), 263 }) 264 265 subtitles = {} 266 tracks = config.get('tracks') 267 if tracks and isinstance(tracks, list): 268 for track in tracks: 269 if not isinstance(track, dict): 270 continue 271 track_url = urljoin(url, track.get('src')) 272 if not track_url: 273 continue 274 subtitles.setdefault(track.get('srclang') or 'de', []).append({ 275 'url': track_url, 276 'ext': 'ttml', 277 }) 278 279 return { 280 'id': video_id, 281 'title': title, 282 'is_live': live, 283 'uploader': uploader if uploader != '-' else None, 284 'upload_date': upload_date[0:8] if upload_date else None, 285 'duration': duration, 286 'thumbnails': thumbnails, 287 'formats': formats, 288 'subtitles': subtitles, 289 } 290 291 292 class NDREmbedIE(NDREmbedBaseIE): 293 IE_NAME = 'ndr:embed' 294 _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' 295 _TESTS = [{ 296 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 297 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', 298 'info_dict': { 299 'id': 'ndraktuell28488', 300 'ext': 'mp4', 301 'title': 'Norddeutschland begrüßt Flüchtlinge', 302 'is_live': False, 303 'uploader': 'ndrtv', 304 'upload_date': '20150907', 305 'duration': 132, 306 }, 307 }, { 308 'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html', 309 'md5': '002085c44bae38802d94ae5802a36e78', 310 'info_dict': { 311 'id': 'soundcheck3366', 312 'ext': 'mp4', 313 'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen', 314 'is_live': False, 315 'uploader': 'ndr2', 316 'upload_date': '20150912', 317 'duration': 3554, 318 }, 319 'params': { 320 'skip_download': True, 321 }, 322 }, { 323 'url': 'http://www.ndr.de/info/audio51535-player.html', 324 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', 325 'info_dict': { 326 'id': 'audio51535', 327 'ext': 'mp3', 328 'title': 'La Valette entgeht der Hinrichtung', 329 'is_live': False, 330 'uploader': 'ndrinfo', 331 'upload_date': '20140729', 332 'duration': 884, 333 }, 334 'params': { 335 'skip_download': True, 336 }, 337 }, { 338 'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html', 339 'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c', 340 'info_dict': { 341 'id': 'visite11010', 342 'ext': 'mp4', 343 'title': 'Visite - die ganze Sendung', 344 'is_live': False, 345 'uploader': 'ndrtv', 346 'upload_date': '20150902', 347 'duration': 3525, 348 }, 349 'params': { 350 'skip_download': True, 351 }, 352 }, { 353 # httpVideoLive 354 'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html', 355 'info_dict': { 356 'id': 'livestream217', 357 'ext': 'flv', 358 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 359 'is_live': True, 360 'upload_date': '20150910', 361 }, 362 'params': { 363 'skip_download': True, 364 }, 365 }, { 366 'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html', 367 'only_matching': True, 368 }, { 369 'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html', 370 'only_matching': True, 371 }, { 372 'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html', 373 'only_matching': True, 374 }, { 375 'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html', 376 'only_matching': True, 377 }, { 378 'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html', 379 'only_matching': True, 380 }, { 381 'url': 'http://www.ndr.de/fernsehen/doku952-player.html', 382 'only_matching': True, 383 }] 384 385 386 class NJoyEmbedIE(NDREmbedBaseIE): 387 IE_NAME = 'njoy:embed' 388 _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' 389 _TESTS = [{ 390 # httpVideo 391 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', 392 'md5': '8483cbfe2320bd4d28a349d62d88bd74', 393 'info_dict': { 394 'id': 'doku948', 395 'ext': 'mp4', 396 'title': 'Zehn Jahre Reeperbahn Festival - die Doku', 397 'is_live': False, 398 'upload_date': '20150807', 399 'duration': 1011, 400 }, 401 }, { 402 # httpAudio 403 'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html', 404 'md5': 'd989f80f28ac954430f7b8a48197188a', 405 'info_dict': { 406 'id': 'stefanrichter100', 407 'ext': 'mp3', 408 'title': 'Interview mit einem Augenzeugen', 409 'is_live': False, 410 'uploader': 'njoy', 411 'upload_date': '20150909', 412 'duration': 140, 413 }, 414 'params': { 415 'skip_download': True, 416 }, 417 }, { 418 # httpAudioLive, no explicit ext 419 'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html', 420 'info_dict': { 421 'id': 'webradioweltweit100', 422 'ext': 'mp3', 423 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 424 'is_live': True, 425 'uploader': 'njoy', 426 'upload_date': '20150810', 427 }, 428 'params': { 429 'skip_download': True, 430 }, 431 }, { 432 'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html', 433 'only_matching': True, 434 }, { 435 'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html', 436 'only_matching': True, 437 }, { 438 'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html', 439 'only_matching': True, 440 }]