youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 2bc43303031215436b201e656094b60ab3ec7e9e
parent 12675275a1d2158fbe409361888569e4cb52ef07
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Thu, 14 May 2015 23:41:27 +0200

[youtube:history] Fix extraction (fixes #5702)

It uses the same method as YoutubeSubscriptionsIE, if other feed starts using it we should consider using base class.

Diffstat:
Myoutube_dl/extractor/youtube.py | 37+++++++++++++++++++++++++++++++++----
1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -1667,13 +1667,42 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): return self._extract_playlist('WL') -class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): +class YoutubeHistoryIE(YoutubePlaylistIE): IE_NAME = 'youtube:history' IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' - _FEED_NAME = 'history' - _PERSONAL_FEED = True - _PLAYLIST_TITLE = 'Youtube Watch History' + _TESTS = [] + + def _real_extract(self, url): + title = 'Youtube History' + page = self._download_webpage('https://www.youtube.com/feed/history', title) + + # The extraction process is the same as for playlists, but the regex + # for the video ids doesn't contain an index + ids = [] + more_widget_html = content_html = page + + for page_num in itertools.count(1): + matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) + new_ids = orderedSet(matches) + ids.extend(new_ids) + + mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) + if not mobj: + break + + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), title, + 'Downloading page #%s' % page_num, + transform_source=uppercase_escape) + content_html = more['content_html'] + more_widget_html = more['load_more_widget_html'] + + return { + '_type': 'playlist', + 'title': title, + 'entries': self._ids_to_results(ids), + } class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):