foxgay.py (2203B)
1 from __future__ import unicode_literals 2 3 import itertools 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 get_element_by_id, 8 int_or_none, 9 remove_end, 10 ) 11 12 13 class FoxgayIE(InfoExtractor): 14 _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' 15 _TEST = { 16 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', 17 'md5': '344558ccfea74d33b7adbce22e577f54', 18 'info_dict': { 19 'id': '2582', 20 'ext': 'mp4', 21 'title': 'Fuck Turkish-style', 22 'description': 'md5:6ae2d9486921891efe89231ace13ffdf', 23 'age_limit': 18, 24 'thumbnail': r're:https?://.*\.jpg$', 25 }, 26 } 27 28 def _real_extract(self, url): 29 video_id = self._match_id(url) 30 webpage = self._download_webpage(url, video_id) 31 32 title = remove_end(self._html_search_regex( 33 r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com') 34 description = get_element_by_id('inf_tit', webpage) 35 36 # The default user-agent with foxgay cookies leads to pages without videos 37 self._downloader.cookiejar.clear('.foxgay.com') 38 # Find the URL for the iFrame which contains the actual video. 39 iframe_url = self._html_search_regex( 40 r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage, 41 'video frame', group='url') 42 iframe = self._download_webpage( 43 iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'}, 44 note='Downloading video frame') 45 video_data = self._parse_json(self._search_regex( 46 r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id) 47 48 formats = [{ 49 'url': source, 50 'height': int_or_none(resolution), 51 } for source, resolution in zip( 52 video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))] 53 54 self._sort_formats(formats) 55 56 return { 57 'id': video_id, 58 'title': title, 59 'formats': formats, 60 'description': description, 61 'thumbnail': video_data.get('act_vid', {}).get('thumb'), 62 'age_limit': 18, 63 }