sexu.py (2000B)
1 from __future__ import unicode_literals 2 3 from .common import InfoExtractor 4 5 6 class SexuIE(InfoExtractor): 7 _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)' 8 _TEST = { 9 'url': 'http://sexu.com/961791/', 10 'md5': 'ff615aca9691053c94f8f10d96cd7884', 11 'info_dict': { 12 'id': '961791', 13 'ext': 'mp4', 14 'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b', 15 'description': 'md5:2b75327061310a3afb3fbd7d09e2e403', 16 'categories': list, # NSFW 17 'thumbnail': r're:https?://.*\.jpg$', 18 'age_limit': 18, 19 } 20 } 21 22 def _real_extract(self, url): 23 video_id = self._match_id(url) 24 webpage = self._download_webpage(url, video_id) 25 26 jwvideo = self._parse_json( 27 self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'), 28 video_id) 29 30 sources = jwvideo['sources'] 31 32 formats = [{ 33 'url': source['file'].replace('\\', ''), 34 'format_id': source.get('label'), 35 'height': int(self._search_regex( 36 r'^(\d+)[pP]', source.get('label', ''), 'height', 37 default=None)), 38 } for source in sources if source.get('file')] 39 self._sort_formats(formats) 40 41 title = self._html_search_regex( 42 r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title') 43 44 description = self._html_search_meta( 45 'description', webpage, 'description') 46 47 thumbnail = jwvideo.get('image') 48 49 categories_str = self._html_search_meta( 50 'keywords', webpage, 'categories') 51 categories = ( 52 None if categories_str is None 53 else categories_str.split(',')) 54 55 return { 56 'id': video_id, 57 'title': title, 58 'description': description, 59 'thumbnail': thumbnail, 60 'categories': categories, 61 'formats': formats, 62 'age_limit': 18, 63 }