youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 40e146aa1e1a8fd57d3f84b0a541174f56fa5dba
parent f3f9cd9234aada1d677517e3042bd9312610081d
Author: Sergey M․ <dstftw@gmail.com>
Date:   Thu, 18 Feb 2016 22:29:17 +0600

[pornhub:user:videos] Add extractor (Closes #8548)

Diffstat:
Myoutube_dl/extractor/__init__.py | 1+
Myoutube_dl/extractor/pornhub.py | 53++++++++++++++++++++++++++++++++++++++---------------
2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -555,6 +555,7 @@ from .pornhd import PornHdIE from .pornhub import ( PornHubIE, PornHubPlaylistIE, + PornHubUserVideosIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py @@ -129,27 +129,20 @@ class PornHubIE(InfoExtractor): } -class PornHubPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/6201671', - 'info_dict': { - 'id': '6201671', - 'title': 'P0p4', - }, - 'playlist_mincount': 35, - }] +class PornHubPlaylistBaseIE(InfoExtractor): + def _extract_entries(self, webpage): + return [ + self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') + for video_url in set(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) + ] def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - entries = [ - self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') - for video_url in set(re.findall( - r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) - ] + entries = self._extract_entries(webpage) playlist = self._parse_json( self._search_regex( @@ -158,3 +151,33 @@ class PornHubPlaylistIE(InfoExtractor): return self.playlist_result( entries, playlist_id, playlist.get('title'), playlist.get('description')) + + +class PornHubPlaylistIE(PornHubPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.pornhub.com/playlist/6201671', + 'info_dict': { + 'id': '6201671', + 'title': 'P0p4', + }, + 'playlist_mincount': 35, + }] + + +class PornHubUserVideosIE(PornHubPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos' + _TESTS = [{ + 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'info_dict': { + 'id': 'rushandlia', + }, + 'playlist_mincount': 13, + }] + + def _real_extract(self, url): + user_id = self._match_id(url) + + webpage = self._download_webpage(url, user_id) + + return self.playlist_result(self._extract_entries(webpage), user_id)