youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit b52c9ef1655042688a4822d241af398592b951f9
parent e28ed498e64545f02f2d3dbccf97ecf0e47aa82a
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sun,  6 Nov 2016 21:52:00 +0700

[extractor/generic] Improve support for pornhub embeds (closes #11100)

Diffstat:
Myoutube_dl/extractor/generic.py | 10+++++-----
Myoutube_dl/extractor/pornhub.py | 13++++++-------
2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -1983,11 +1983,6 @@ class GenericIE(InfoExtractor): if sportbox_urls: return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') - # Look for embedded PornHub player - pornhub_url = PornHubIE._extract_url(webpage) - if pornhub_url: - return self.url_result(pornhub_url, 'PornHub') - # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) if xhamster_urls: @@ -1998,6 +1993,11 @@ class GenericIE(InfoExtractor): if tnaflix_urls: return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) + # Look for embedded PornHub player + pornhub_urls = PornHubIE._extract_urls(webpage) + if pornhub_urls: + return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key()) + # Look for embedded DrTuber player drtuber_urls = DrTuberIE._extract_urls(webpage) if drtuber_urls: diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py @@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor): (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) - (?P<id>[0-9a-z]+) + (?P<id>[\da-z]+) ''' _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', @@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage) - if mobj: - return mobj.group('url') + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)', + webpage) def _extract_count(self, pattern, webpage, name): return str_to_int(self._search_regex(