pornoxo.py (1939B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 str_to_int, 8 ) 9 10 11 class PornoXOIE(InfoExtractor): 12 _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' 13 _TEST = { 14 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', 15 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', 16 'info_dict': { 17 'id': '7564', 18 'ext': 'flv', 19 'title': 'Striptease From Sexy Secretary!', 20 'display_id': 'striptease-from-sexy-secretary', 21 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', 22 'categories': list, # NSFW 23 'thumbnail': r're:https?://.*\.jpg$', 24 'age_limit': 18, 25 } 26 } 27 28 def _real_extract(self, url): 29 mobj = re.match(self._VALID_URL, url) 30 video_id, display_id = mobj.groups() 31 32 webpage = self._download_webpage(url, video_id) 33 video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) 34 35 title = self._html_search_regex( 36 r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') 37 38 view_count = str_to_int(self._html_search_regex( 39 r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) 40 41 categories_str = self._html_search_regex( 42 r'<meta name="description" content=".*featuring\s*([^"]+)"', 43 webpage, 'categories', fatal=False) 44 categories = ( 45 None if categories_str is None 46 else categories_str.split(',')) 47 48 video_data.update({ 49 'id': video_id, 50 'title': title, 51 'display_id': display_id, 52 'description': self._html_search_meta('description', webpage), 53 'categories': categories, 54 'view_count': view_count, 55 'age_limit': 18, 56 }) 57 58 return video_data