youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 6e47b51eef26dbaa3634b73914e4ee7213ad38f7
parent 4a98cdbf3b19b07c7a885d348e79ddf79318f133
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Tue, 26 Nov 2013 19:09:14 +0100

[youtube:playlist] Remove the link with index 0

It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)

Diffstat:
Myoutube_dl/extractor/youtube.py | 8+++++---
1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -1528,7 +1528,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): )""" _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' _MORE_PAGES_INDICATOR = r'data-link-type="next"' - _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;' + _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)' IE_NAME = u'youtube:playlist' @classmethod @@ -1562,8 +1562,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): for page_num in itertools.count(1): url = self._TEMPLATE_URL % (playlist_id, page_num) page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) - # The ids are duplicated - new_ids = orderedSet(re.findall(self._VIDEO_RE, page)) + matches = re.finditer(self._VIDEO_RE, page) + # We remove the duplicates and the link with index 0 + # (it's not the first video of the playlist) + new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0') ids.extend(new_ids) if re.search(self._MORE_PAGES_INDICATOR, page) is None: