youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 62c95fd5fcb8dbea2faeb4edac4c5177cbac5912
parent 25f14e9f93295a787e0cb436a5f6179d6174733d
Author: Sergey M․ <dstftw@gmail.com>
Date:   Fri, 15 May 2015 21:42:34 +0600

[youtube:feed] Check each 'load more' portion for unique video ids

Diffstat:
Myoutube_dl/extractor/youtube.py | 10++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -1621,10 +1621,16 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): # for the video ids doesn't contain an index ids = [] more_widget_html = content_html = page - for page_num in itertools.count(1): matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) - new_ids = orderedSet(matches) + + # 'recommended' feed has infinite 'load more' and each new portion spins + # the same videos in (sometimes) slightly different order, so we'll check + # for unicity and break when portion has no new videos + new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches)) + if not new_ids: + break + ids.extend(new_ids) mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)