tvnow.py (18470B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_str 8 from ..utils import ( 9 ExtractorError, 10 int_or_none, 11 parse_iso8601, 12 parse_duration, 13 str_or_none, 14 update_url_query, 15 urljoin, 16 ) 17 18 19 class TVNowBaseIE(InfoExtractor): 20 _VIDEO_FIELDS = ( 21 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', 22 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode', 23 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear', 24 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo') 25 26 def _call_api(self, path, video_id, query): 27 return self._download_json( 28 'https://api.tvnow.de/v3/' + path, video_id, query=query) 29 30 def _extract_video(self, info, display_id): 31 video_id = compat_str(info['id']) 32 title = info['title'] 33 34 paths = [] 35 for manifest_url in (info.get('manifest') or {}).values(): 36 if not manifest_url: 37 continue 38 manifest_url = update_url_query(manifest_url, {'filter': ''}) 39 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path') 40 if path in paths: 41 continue 42 paths.append(path) 43 44 def url_repl(proto, suffix): 45 return re.sub( 46 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub( 47 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', 48 '.ism/' + suffix, manifest_url)) 49 50 def make_urls(proto, suffix): 51 urls = [url_repl(proto, suffix)] 52 hd_url = urls[0].replace('/manifest/', '/ngvod/') 53 if hd_url != urls[0]: 54 urls.append(hd_url) 55 return urls 56 57 for man_url in make_urls('dash', '.mpd'): 58 formats = self._extract_mpd_formats( 59 man_url, video_id, mpd_id='dash', fatal=False) 60 for man_url in make_urls('hss', 'Manifest'): 61 formats.extend(self._extract_ism_formats( 62 man_url, video_id, ism_id='mss', fatal=False)) 63 for man_url in make_urls('hls', '.m3u8'): 64 formats.extend(self._extract_m3u8_formats( 65 man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', 66 fatal=False)) 67 if formats: 68 break 69 else: 70 if info.get('isDrm'): 71 raise ExtractorError( 72 'Video %s is DRM protected' % video_id, expected=True) 73 if info.get('geoblocked'): 74 raise self.raise_geo_restricted() 75 if not info.get('free', True): 76 raise ExtractorError( 77 'Video %s is not available for free' % video_id, expected=True) 78 self._sort_formats(formats) 79 80 description = info.get('articleLong') or info.get('articleShort') 81 timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') 82 duration = parse_duration(info.get('duration')) 83 84 f = info.get('format', {}) 85 86 thumbnails = [{ 87 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id, 88 }] 89 thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') 90 if thumbnail: 91 thumbnails.append({ 92 'url': thumbnail, 93 }) 94 95 return { 96 'id': video_id, 97 'display_id': display_id, 98 'title': title, 99 'description': description, 100 'thumbnails': thumbnails, 101 'timestamp': timestamp, 102 'duration': duration, 103 'series': f.get('title'), 104 'season_number': int_or_none(info.get('season')), 105 'episode_number': int_or_none(info.get('episode')), 106 'episode': title, 107 'formats': formats, 108 } 109 110 111 class TVNowIE(TVNowBaseIE): 112 _VALID_URL = r'''(?x) 113 https?:// 114 (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/ 115 (?P<show_id>[^/]+)/ 116 (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+) 117 ''' 118 119 @classmethod 120 def suitable(cls, url): 121 return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url) 122 else super(TVNowIE, cls).suitable(url)) 123 124 _TESTS = [{ 125 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player', 126 'info_dict': { 127 'id': '331082', 128 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', 129 'ext': 'mp4', 130 'title': 'Der neue Porsche 911 GT 3', 131 'description': 'md5:6143220c661f9b0aae73b245e5d898bb', 132 'timestamp': 1495994400, 133 'upload_date': '20170528', 134 'duration': 5283, 135 'series': 'GRIP - Das Motormagazin', 136 'season_number': 14, 137 'episode_number': 405, 138 'episode': 'Der neue Porsche 911 GT 3', 139 }, 140 }, { 141 # rtl2 142 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player', 143 'only_matching': True, 144 }, { 145 # rtlnitro 146 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player', 147 'only_matching': True, 148 }, { 149 # superrtl 150 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player', 151 'only_matching': True, 152 }, { 153 # ntv 154 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player', 155 'only_matching': True, 156 }, { 157 # vox 158 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player', 159 'only_matching': True, 160 }, { 161 # rtlplus 162 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player', 163 'only_matching': True, 164 }, { 165 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3', 166 'only_matching': True, 167 }] 168 169 def _real_extract(self, url): 170 mobj = re.match(self._VALID_URL, url) 171 display_id = '%s/%s' % mobj.group(2, 3) 172 173 info = self._call_api( 174 'movies/' + display_id, display_id, query={ 175 'fields': ','.join(self._VIDEO_FIELDS), 176 }) 177 178 return self._extract_video(info, display_id) 179 180 181 class TVNowNewIE(InfoExtractor): 182 _VALID_URL = r'''(?x) 183 (?P<base_url>https?:// 184 (?:www\.)?tvnow\.(?:de|at|ch)/ 185 (?:shows|serien))/ 186 (?P<show>[^/]+)-\d+/ 187 [^/]+/ 188 episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+) 189 ''' 190 191 _TESTS = [{ 192 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 193 'only_matching': True, 194 }] 195 196 def _real_extract(self, url): 197 mobj = re.match(self._VALID_URL, url) 198 base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url')) 199 show, episode = mobj.group('show', 'episode') 200 return self.url_result( 201 # Rewrite new URLs to the old format and use extraction via old API 202 # at api.tvnow.de as a loophole for bypassing premium content checks 203 '%s/%s/%s' % (base_url, show, episode), 204 ie=TVNowIE.ie_key(), video_id=mobj.group('id')) 205 206 207 class TVNowNewBaseIE(InfoExtractor): 208 def _call_api(self, path, video_id, query={}): 209 result = self._download_json( 210 'https://apigw.tvnow.de/module/' + path, video_id, query=query) 211 error = result.get('error') 212 if error: 213 raise ExtractorError( 214 '%s said: %s' % (self.IE_NAME, error), expected=True) 215 return result 216 217 218 r""" 219 TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it 220 when api.tvnow.de is shut down. This version can't bypass premium checks though. 221 class TVNowIE(TVNowNewBaseIE): 222 _VALID_URL = r'''(?x) 223 https?:// 224 (?:www\.)?tvnow\.(?:de|at|ch)/ 225 (?:shows|serien)/[^/]+/ 226 (?:[^/]+/)+ 227 (?P<display_id>[^/?$&]+)-(?P<id>\d+) 228 ''' 229 230 _TESTS = [{ 231 # episode with annual navigation 232 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 233 'info_dict': { 234 'id': '331082', 235 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', 236 'ext': 'mp4', 237 'title': 'Der neue Porsche 911 GT 3', 238 'description': 'md5:6143220c661f9b0aae73b245e5d898bb', 239 'thumbnail': r're:^https?://.*\.jpg$', 240 'timestamp': 1495994400, 241 'upload_date': '20170528', 242 'duration': 5283, 243 'series': 'GRIP - Das Motormagazin', 244 'season_number': 14, 245 'episode_number': 405, 246 'episode': 'Der neue Porsche 911 GT 3', 247 }, 248 }, { 249 # rtl2, episode with season navigation 250 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124', 251 'only_matching': True, 252 }, { 253 # rtlnitro 254 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822', 255 'only_matching': True, 256 }, { 257 # superrtl 258 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120', 259 'only_matching': True, 260 }, { 261 # ntv 262 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630', 263 'only_matching': True, 264 }, { 265 # vox 266 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072', 267 'only_matching': True, 268 }, { 269 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 270 'only_matching': True, 271 }] 272 273 def _extract_video(self, info, url, display_id): 274 config = info['config'] 275 source = config['source'] 276 277 video_id = compat_str(info.get('id') or source['videoId']) 278 title = source['title'].strip() 279 280 paths = [] 281 for manifest_url in (info.get('manifest') or {}).values(): 282 if not manifest_url: 283 continue 284 manifest_url = update_url_query(manifest_url, {'filter': ''}) 285 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path') 286 if path in paths: 287 continue 288 paths.append(path) 289 290 def url_repl(proto, suffix): 291 return re.sub( 292 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub( 293 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', 294 '.ism/' + suffix, manifest_url)) 295 296 formats = self._extract_mpd_formats( 297 url_repl('dash', '.mpd'), video_id, 298 mpd_id='dash', fatal=False) 299 formats.extend(self._extract_ism_formats( 300 url_repl('hss', 'Manifest'), 301 video_id, ism_id='mss', fatal=False)) 302 formats.extend(self._extract_m3u8_formats( 303 url_repl('hls', '.m3u8'), video_id, 'mp4', 304 'm3u8_native', m3u8_id='hls', fatal=False)) 305 if formats: 306 break 307 else: 308 if try_get(info, lambda x: x['rights']['isDrm']): 309 raise ExtractorError( 310 'Video %s is DRM protected' % video_id, expected=True) 311 if try_get(config, lambda x: x['boards']['geoBlocking']['block']): 312 raise self.raise_geo_restricted() 313 if not info.get('free', True): 314 raise ExtractorError( 315 'Video %s is not available for free' % video_id, expected=True) 316 self._sort_formats(formats) 317 318 description = source.get('description') 319 thumbnail = url_or_none(source.get('poster')) 320 timestamp = unified_timestamp(source.get('previewStart')) 321 duration = parse_duration(source.get('length')) 322 323 series = source.get('format') 324 season_number = int_or_none(self._search_regex( 325 r'staffel-(\d+)', url, 'season number', default=None)) 326 episode_number = int_or_none(self._search_regex( 327 r'episode-(\d+)', url, 'episode number', default=None)) 328 329 return { 330 'id': video_id, 331 'display_id': display_id, 332 'title': title, 333 'description': description, 334 'thumbnail': thumbnail, 335 'timestamp': timestamp, 336 'duration': duration, 337 'series': series, 338 'season_number': season_number, 339 'episode_number': episode_number, 340 'episode': title, 341 'formats': formats, 342 } 343 344 def _real_extract(self, url): 345 display_id, video_id = re.match(self._VALID_URL, url).groups() 346 info = self._call_api('player/' + video_id, video_id) 347 return self._extract_video(info, video_id, display_id) 348 """ 349 350 351 class TVNowListBaseIE(TVNowNewBaseIE): 352 _SHOW_VALID_URL = r'''(?x) 353 (?P<base_url> 354 https?:// 355 (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/ 356 [^/?#&]+-(?P<show_id>\d+) 357 ) 358 ''' 359 360 @classmethod 361 def suitable(cls, url): 362 return (False if TVNowNewIE.suitable(url) 363 else super(TVNowListBaseIE, cls).suitable(url)) 364 365 def _extract_items(self, url, show_id, list_id, query): 366 items = self._call_api( 367 'teaserrow/format/episode/' + show_id, list_id, 368 query=query)['items'] 369 370 entries = [] 371 for item in items: 372 if not isinstance(item, dict): 373 continue 374 item_url = urljoin(url, item.get('url')) 375 if not item_url: 376 continue 377 video_id = str_or_none(item.get('id') or item.get('videoId')) 378 item_title = item.get('subheadline') or item.get('text') 379 entries.append(self.url_result( 380 item_url, ie=TVNowNewIE.ie_key(), video_id=video_id, 381 video_title=item_title)) 382 383 return self.playlist_result(entries, '%s/%s' % (show_id, list_id)) 384 385 386 class TVNowSeasonIE(TVNowListBaseIE): 387 _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL 388 _TESTS = [{ 389 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13', 390 'info_dict': { 391 'id': '1815/13', 392 }, 393 'playlist_mincount': 22, 394 }] 395 396 def _real_extract(self, url): 397 _, show_id, season_id = re.match(self._VALID_URL, url).groups() 398 return self._extract_items( 399 url, show_id, season_id, {'season': season_id}) 400 401 402 class TVNowAnnualIE(TVNowListBaseIE): 403 _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL 404 _TESTS = [{ 405 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05', 406 'info_dict': { 407 'id': '1669/2017-05', 408 }, 409 'playlist_mincount': 2, 410 }] 411 412 def _real_extract(self, url): 413 _, show_id, year, month = re.match(self._VALID_URL, url).groups() 414 return self._extract_items( 415 url, show_id, '%s-%s' % (year, month), { 416 'year': int(year), 417 'month': int(month), 418 }) 419 420 421 class TVNowShowIE(TVNowListBaseIE): 422 _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL 423 _TESTS = [{ 424 # annual navigationType 425 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669', 426 'info_dict': { 427 'id': '1669', 428 }, 429 'playlist_mincount': 73, 430 }, { 431 # season navigationType 432 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471', 433 'info_dict': { 434 'id': '11471', 435 }, 436 'playlist_mincount': 3, 437 }] 438 439 @classmethod 440 def suitable(cls, url): 441 return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) 442 else super(TVNowShowIE, cls).suitable(url)) 443 444 def _real_extract(self, url): 445 base_url, show_id = re.match(self._VALID_URL, url).groups() 446 447 result = self._call_api( 448 'teaserrow/format/navigation/' + show_id, show_id) 449 450 items = result['items'] 451 452 entries = [] 453 navigation = result.get('navigationType') 454 if navigation == 'annual': 455 for item in items: 456 if not isinstance(item, dict): 457 continue 458 year = int_or_none(item.get('year')) 459 if year is None: 460 continue 461 months = item.get('months') 462 if not isinstance(months, list): 463 continue 464 for month_dict in months: 465 if not isinstance(month_dict, dict) or not month_dict: 466 continue 467 month_number = int_or_none(list(month_dict.keys())[0]) 468 if month_number is None: 469 continue 470 entries.append(self.url_result( 471 '%s/%04d-%02d' % (base_url, year, month_number), 472 ie=TVNowAnnualIE.ie_key())) 473 elif navigation == 'season': 474 for item in items: 475 if not isinstance(item, dict): 476 continue 477 season_number = int_or_none(item.get('season')) 478 if season_number is None: 479 continue 480 entries.append(self.url_result( 481 '%s/staffel-%d' % (base_url, season_number), 482 ie=TVNowSeasonIE.ie_key())) 483 else: 484 raise ExtractorError('Unknown navigationType') 485 486 return self.playlist_result(entries, show_id)