youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 3d9fae1ed2da722faed44d4f89143f05797ab4d9
parent 5c47fc68090db9f6259723d063ffa11392194642
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Sat, 13 Sep 2014 07:07:39 +0200

Add support for PornoXO

Diffstat:
Myoutube_dl/extractor/__init__.py | 1+
Ayoutube_dl/extractor/pornoxo.py | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 0 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -267,6 +267,7 @@ from .podomatic import PodomaticIE from .pornhd import PornHdIE from .pornhub import PornHubIE from .pornotube import PornotubeIE +from .pornoxo import PornoXOIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + str_to_int, +) + +class PornoXOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' + _TEST = { + 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', + 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', + 'info_dict': { + 'id': '7564', + 'ext': 'flv', + 'title': 'Striptease From Sexy Secretary!', + 'description': 'Striptease From Sexy Secretary!', + 'categories': list, # NSFW + 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url') + + title = self._html_search_regex( + r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') + + description = self._html_search_regex( + r'<meta name="description" content="([^"]+)\s*featuring', + webpage, 'description', fatal=False) + + thumbnail = self._html_search_regex( + r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + + view_count = str_to_int(self._html_search_regex( + r'Views:\s*(\d+)', webpage, 'view count', fatal=False)) + + categories_str = self._html_search_regex( + r'<meta name="description" content=".*featuring\s*([^"]+)"', + webpage, 'categories', fatal=False) + categories = ( + None if categories_str is None + else categories_str.split(',')) + + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'categories': categories, + 'view_count': view_count, + 'age_limit': 18, + }