youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 12434026574bcaaaa705c31ef14428cc91a5efad
parent 2b2ee140c3c6f08b4078cc6a5a289e5e74bec2b4
Author: Yen Chi Hsuan <yan12125@gmail.com>
Date:   Fri, 24 Jul 2015 21:29:44 +0800

[dailymotion:playlist] Detect problematic redirection (fixes #6347)

Diffstat:
Myoutube_dl/extractor/dailymotion.py | 28+++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py @@ -30,6 +30,10 @@ class DailymotionBaseInfoExtractor(InfoExtractor): request.add_header('Cookie', 'family_filter=off; ff=off') return request + def _download_webpage_handle_no_ff(self, url, *args, **kwargs): + request = self._build_request(url) + return self._download_webpage_handle(request, *args, **kwargs) + def _download_webpage_no_ff(self, url, *args, **kwargs): request = self._build_request(url) return self._download_webpage(request, *args, **kwargs) @@ -275,10 +279,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): def _extract_entries(self, id): video_ids = [] + processed_urls = set() for pagenum in itertools.count(1): - webpage = self._download_webpage_no_ff( - self._PAGE_TEMPLATE % (id, pagenum), - id, 'Downloading page %s' % pagenum) + page_url = self._PAGE_TEMPLATE % (id, pagenum) + webpage, urlh = self._download_webpage_handle_no_ff( + page_url, id, 'Downloading page %s' % pagenum) + if urlh.geturl() in processed_urls: + self.report_warning('Stopped at duplicated page %s, which is the same as %s' % ( + page_url, urlh.geturl()), id) + break + + processed_urls.add(urlh.geturl()) video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage)) @@ -311,6 +322,17 @@ class DailymotionUserIE(DailymotionPlaylistIE): 'title': 'RĂ©mi Gaillard', }, 'playlist_mincount': 100, + }, { + 'url': 'http://www.dailymotion.com/user/UnderProject', + 'info_dict': { + 'id': 'UnderProject', + 'title': 'UnderProject', + }, + 'playlist_mincount': 1800, + 'expected_warnings': [ + 'Stopped at duplicated page', + ], + 'skip': 'Takes too long time', }] def _real_extract(self, url):