youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit ca227c8698340ad9170698cef81ab4bf4d832a55
parent 32f9036447d1211f9ce0750203d71671f0ee99dc
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Fri,  1 Jan 2016 14:32:00 +0100

[yahoo] Support pages that use an alias (fixes #8084)

Diffstat:
Myoutube_dl/extractor/yahoo.py | 34++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py @@ -155,7 +155,16 @@ class YahooIE(InfoExtractor): 'description': 'md5:8fc39608213295748e1e289807838c97', 'duration': 1646, }, - } + }, { + # it uses an alias to get the video_id + 'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html', + 'info_dict': { + 'id': '40eda9c8-8e5f-3552-8745-830f67d0c737', + 'ext': 'mp4', + 'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking', + 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', + }, + }, ] def _real_extract(self, url): @@ -199,13 +208,22 @@ class YahooIE(InfoExtractor): r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, default=None) if items_json is None: - CONTENT_ID_REGEXES = [ - r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', - r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', - r'"first_videoid"\s*:\s*"([^"]+)"', - r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), - ] - video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') + alias = self._search_regex( + r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None) + if alias is not None: + alias_info = self._download_json( + 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias, + display_id, 'Downloading alias info') + video_id = alias_info[0]['id'] + else: + CONTENT_ID_REGEXES = [ + r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', + r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', + r'"first_videoid"\s*:\s*"([^"]+)"', + r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), + ] + video_id = self._search_regex( + CONTENT_ID_REGEXES, webpage, 'content ID') else: items = json.loads(items_json) info = items['mediaItems']['query']['results']['mediaObj'][0]