xvideos.py (5285B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..compat import compat_urllib_parse_unquote 7 from ..utils import ( 8 clean_html, 9 determine_ext, 10 ExtractorError, 11 int_or_none, 12 parse_duration, 13 ) 14 15 16 class XVideosIE(InfoExtractor): 17 _VALID_URL = r'''(?x) 18 https?:// 19 (?: 20 (?:[^/]+\.)?xvideos2?\.com/video| 21 (?:www\.)?xvideos\.es/video| 22 flashservice\.xvideos\.com/embedframe/| 23 static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= 24 ) 25 (?P<id>[0-9]+) 26 ''' 27 _TESTS = [{ 28 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', 29 'md5': '14cea69fcb84db54293b1e971466c2e1', 30 'info_dict': { 31 'id': '4588838', 32 'ext': 'mp4', 33 'title': 'Biker Takes his Girl', 34 'duration': 108, 35 'age_limit': 18, 36 } 37 }, { 38 'url': 'https://flashservice.xvideos.com/embedframe/4588838', 39 'only_matching': True, 40 }, { 41 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', 42 'only_matching': True, 43 }, { 44 'url': 'http://xvideos.com/video4588838/biker_takes_his_girl', 45 'only_matching': True 46 }, { 47 'url': 'https://xvideos.com/video4588838/biker_takes_his_girl', 48 'only_matching': True 49 }, { 50 'url': 'https://xvideos.es/video4588838/biker_takes_his_girl', 51 'only_matching': True 52 }, { 53 'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl', 54 'only_matching': True 55 }, { 56 'url': 'http://xvideos.es/video4588838/biker_takes_his_girl', 57 'only_matching': True 58 }, { 59 'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl', 60 'only_matching': True 61 }, { 62 'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl', 63 'only_matching': True 64 }, { 65 'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl', 66 'only_matching': True 67 }, { 68 'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl', 69 'only_matching': True 70 }, { 71 'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl', 72 'only_matching': True 73 }, { 74 'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl', 75 'only_matching': True 76 }, { 77 'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl', 78 'only_matching': True 79 }] 80 81 def _real_extract(self, url): 82 video_id = self._match_id(url) 83 84 webpage = self._download_webpage( 85 'https://www.xvideos.com/video%s/' % video_id, video_id) 86 87 mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage) 88 if mobj: 89 raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) 90 91 title = self._html_search_regex( 92 (r'<title>(?P<title>.+?)\s+-\s+XVID', 93 r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), 94 webpage, 'title', default=None, 95 group='title') or self._og_search_title(webpage) 96 97 thumbnails = [] 98 for preference, thumbnail in enumerate(('', '169')): 99 thumbnail_url = self._search_regex( 100 r'setThumbUrl%s\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1' % thumbnail, 101 webpage, 'thumbnail', default=None, group='thumbnail') 102 if thumbnail_url: 103 thumbnails.append({ 104 'url': thumbnail_url, 105 'preference': preference, 106 }) 107 108 duration = int_or_none(self._og_search_property( 109 'duration', webpage, default=None)) or parse_duration( 110 self._search_regex( 111 r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)', 112 webpage, 'duration', fatal=False)) 113 114 formats = [] 115 116 video_url = compat_urllib_parse_unquote(self._search_regex( 117 r'flv_url=(.+?)&', webpage, 'video URL', default='')) 118 if video_url: 119 formats.append({ 120 'url': video_url, 121 'format_id': 'flv', 122 }) 123 124 for kind, _, format_url in re.findall( 125 r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage): 126 format_id = kind.lower() 127 if format_id == 'hls': 128 formats.extend(self._extract_m3u8_formats( 129 format_url, video_id, 'mp4', 130 entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) 131 elif format_id in ('urllow', 'urlhigh'): 132 formats.append({ 133 'url': format_url, 134 'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]), 135 'quality': -2 if format_id.endswith('low') else None, 136 }) 137 138 self._sort_formats(formats) 139 140 return { 141 'id': video_id, 142 'formats': formats, 143 'title': title, 144 'duration': duration, 145 'thumbnails': thumbnails, 146 'age_limit': 18, 147 }