From: Remita Amine Date: Sun, 3 Jan 2021 09:04:32 +0000 (+0100) Subject: [stv] improve episode id extraction(closes #23083) X-Git-Url: http://git.oshgnacknak.de/?a=commitdiff_plain;h=ac71fd5919302f0d42c0cd79e04522cab8ab0318;p=youtube-dl [stv] improve episode id extraction(closes #23083) --- diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py index bae8b71f4..539220a94 100644 --- a/youtube_dl/extractor/stv.py +++ b/youtube_dl/extractor/stv.py @@ -8,13 +8,17 @@ from ..utils import ( compat_str, float_or_none, int_or_none, + smuggle_url, + str_or_none, + try_get, ) class STVPlayerIE(InfoExtractor): IE_NAME = 'stv:player' _VALID_URL = r'https?://player\.stv\.tv/(?Pepisode|video)/(?P[a-z0-9]{4})' - _TEST = { + _TESTS = [{ + # shortform 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/', 'md5': '5adf9439c31d554f8be0707c7abe7e0a', 'info_dict': { @@ -27,7 +31,11 @@ class STVPlayerIE(InfoExtractor): 'uploader_id': '1486976045', }, 'skip': 'this resource is unavailable outside of the UK', - } + }, { + # episodes + 'url': 'https://player.stv.tv/episode/4125/jennifer-saunders-memory-lane', + 'only_matching': True, + }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s' _PTYPE_MAP = { 'episode': 'episodes', @@ -36,11 +44,31 @@ class STVPlayerIE(InfoExtractor): def _real_extract(self, url): ptype, video_id = re.match(self._VALID_URL, url).groups() - resp = self._download_json( - 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id), - video_id) - result = resp['results'] + webpage = self._download_webpage(url, video_id, fatal=False) or '' + props = (self._parse_json(self._search_regex( + r']+id="__NEXT_DATA__"[^>]*>({.+?})', + webpage, 'next data', default='{}'), video_id, + fatal=False) or {}).get('props') or {} + player_api_cache = try_get( + props, lambda x: x['initialReduxState']['playerApiCache']) or {} + + api_path, resp = None, {} + for k, v in player_api_cache.items(): + if k.startswith('/episodes/') or k.startswith('/shortform/'): + api_path, resp = k, v + break + else: + episode_id = str_or_none(try_get( + props, lambda x: x['pageProps']['episodeId'])) + api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id) + + result = resp.get('results') + if not result: + resp = self._download_json( + 'https://player.api.stv.tv/v1' + api_path, video_id) + result = resp['results'] + video = result['video'] video_id = compat_str(video['id']) @@ -57,7 +85,7 @@ class STVPlayerIE(InfoExtractor): return { '_type': 'url_transparent', 'id': video_id, - 'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id, + 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['GB']}), 'description': result.get('summary'), 'duration': float_or_none(video.get('length'), 1000), 'subtitles': subtitles,