youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 23d3608c6b5b50cbbc81314d18824c4951f8af27
parent baa7081d68996377e44225c74a1ec05e801617a2
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Fri, 12 Dec 2014 22:23:54 +0100

[youtube:channel] Fix extraction (fixes #4435)

It uses now the same pagination system as playlists

Diffstat:
Myoutube_dl/extractor/youtube.py | 21+++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -1269,8 +1269,6 @@ class YoutubeTopListIE(YoutubePlaylistIE): class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' - _MORE_PAGES_INDICATOR = 'yt-uix-load-more' - _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' IE_NAME = 'youtube:channel' _TESTS = [{ 'note': 'paginated channel', @@ -1307,20 +1305,27 @@ class YoutubeChannelIE(InfoExtractor): return self.playlist_result(entries, channel_id) def _entries(): + more_widget_html = content_html = channel_page for pagenum in itertools.count(1): - url = self._MORE_PAGES_URL % (pagenum, channel_id) - page = self._download_json( - url, channel_id, note='Downloading page #%s' % pagenum, - transform_source=uppercase_escape) - ids_in_page = self.extract_videos_from_page(page['content_html']) + ids_in_page = self.extract_videos_from_page(content_html) for video_id in ids_in_page: yield self.url_result( video_id, 'Youtube', video_id=video_id) - if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: + mobj = re.search( + r'data-uix-load-more-href="/?(?P<more>[^"]+)"', + more_widget_html) + if not mobj: break + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), channel_id, + 'Downloading page #%s' % (pagenum + 1), + transform_source=uppercase_escape) + content_html = more['content_html'] + more_widget_html = more['load_more_widget_html'] + return self.playlist_result(_entries(), channel_id)