[youtube] Fix ytsearch* when cookies are provided - youtube-dl - Another place where youtube-dl lives on

commit a22b2fd19bd8c08d50f884d1903486d4f00f76ec
parent c54c01f82dba6d3e982c73c81ad71c49f31d8af1
Author: Yen Chi Hsuan <yan12125@gmail.com>
Date:   Fri,  3 Feb 2017 01:28:24 +0800

[youtube] Fix ytsearch* when cookies are provided

Closes #11924

The API with `page` is no longer used in browsers, and YouTube always
returns {'reload': 'now'} when cookies are provided.

See http://youtube.github.io/spfjs/documentation/start/ for how SPF
works. Basically appending static link with a `spf` parameter yields the
corresponding dynamic link.

Diffstat:
M ChangeLog  | 1 +
M youtube_dl/extractor/youtube.py  | 22 ++++++++++++++--------

2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -1,6 +1,7 @@
 version <unreleased>
 
 Extractors
+* [youtube] Fix ytsearch when cookies are provided (#11924)
 + [bilibili] Support new Bangumi URLs (#11845)
 
 version 2017.02.01
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
@@ -2348,18 +2348,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
         videos = []
         limit = n
 
+        url_query = {
+            'search_query': query.encode('utf-8'),
+        }
+        url_query.update(self._EXTRA_QUERY_ARGS)
+        result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
+
         for pagenum in itertools.count(1):
-            url_query = {
-                'search_query': query.encode('utf-8'),
-                'page': pagenum,
-                'spf': 'navigate',
-            }
-            url_query.update(self._EXTRA_QUERY_ARGS)
-            result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
             data = self._download_json(
                 result_url, video_id='query "%s"' % query,
                 note='Downloading page %s' % pagenum,
-                errnote='Unable to download API page')
+                errnote='Unable to download API page',
+                query={'spf': 'navigate'})
             html_content = data[1]['body']['content']
 
             if 'class="search-message' in html_content:
@@ -2371,6 +2371,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
             videos += new_videos
             if not new_videos or len(videos) > limit:
                 break
+            next_link = self._html_search_regex(
+                r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
+                html_content, 'next link', default=None)
+            if next_link is None:
+                break
+            result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
 
         if len(videos) > n:
             videos = videos[:n]

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	ChangeLog	\|	1	+
M	youtube_dl/extractor/youtube.py	\|	22	++++++++++++++--------