youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit a22b2fd19bd8c08d50f884d1903486d4f00f76ec
parent c54c01f82dba6d3e982c73c81ad71c49f31d8af1
Author: Yen Chi Hsuan <yan12125@gmail.com>
Date:   Fri,  3 Feb 2017 01:28:24 +0800

[youtube] Fix ytsearch* when cookies are provided

Closes #11924

The API with `page` is no longer used in browsers, and YouTube always
returns {'reload': 'now'} when cookies are provided.

See http://youtube.github.io/spfjs/documentation/start/ for how SPF
works. Basically appending static link with a `spf` parameter yields the
corresponding dynamic link.

Diffstat:
MChangeLog | 1+
Myoutube_dl/extractor/youtube.py | 22++++++++++++++--------
2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/ChangeLog b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [youtube] Fix ytsearch when cookies are provided (#11924) + [bilibili] Support new Bangumi URLs (#11845) version 2017.02.01 diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -2348,18 +2348,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): videos = [] limit = n + url_query = { + 'search_query': query.encode('utf-8'), + } + url_query.update(self._EXTRA_QUERY_ARGS) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) + for pagenum in itertools.count(1): - url_query = { - 'search_query': query.encode('utf-8'), - 'page': pagenum, - 'spf': 'navigate', - } - url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data = self._download_json( result_url, video_id='query "%s"' % query, note='Downloading page %s' % pagenum, - errnote='Unable to download API page') + errnote='Unable to download API page', + query={'spf': 'navigate'}) html_content = data[1]['body']['content'] if 'class="search-message' in html_content: @@ -2371,6 +2371,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): videos += new_videos if not new_videos or len(videos) > limit: break + next_link = self._html_search_regex( + r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', + html_content, 'next link', default=None) + if next_link is None: + break + result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) if len(videos) > n: videos = videos[:n]