xfileshare.py (7381B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_chr 8 from ..utils import ( 9 decode_packed_codes, 10 determine_ext, 11 ExtractorError, 12 int_or_none, 13 js_to_json, 14 urlencode_postdata, 15 ) 16 17 18 # based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58 19 def aa_decode(aa_code): 20 symbol_table = [ 21 ('7', '((゚ー゚) + (o^_^o))'), 22 ('6', '((o^_^o) +(o^_^o))'), 23 ('5', '((゚ー゚) + (゚Θ゚))'), 24 ('2', '((o^_^o) - (゚Θ゚))'), 25 ('4', '(゚ー゚)'), 26 ('3', '(o^_^o)'), 27 ('1', '(゚Θ゚)'), 28 ('0', '(c^_^o)'), 29 ] 30 delim = '(゚Д゚)[゚ε゚]+' 31 ret = '' 32 for aa_char in aa_code.split(delim): 33 for val, pat in symbol_table: 34 aa_char = aa_char.replace(pat, val) 35 aa_char = aa_char.replace('+ ', '') 36 m = re.match(r'^\d+', aa_char) 37 if m: 38 ret += compat_chr(int(m.group(0), 8)) 39 else: 40 m = re.match(r'^u([\da-f]+)', aa_char) 41 if m: 42 ret += compat_chr(int(m.group(1), 16)) 43 return ret 44 45 46 class XFileShareIE(InfoExtractor): 47 _SITES = ( 48 (r'aparat\.cam', 'Aparat'), 49 (r'clipwatching\.com', 'ClipWatching'), 50 (r'gounlimited\.to', 'GoUnlimited'), 51 (r'govid\.me', 'GoVid'), 52 (r'holavid\.com', 'HolaVid'), 53 (r'streamty\.com', 'Streamty'), 54 (r'thevideobee\.to', 'TheVideoBee'), 55 (r'uqload\.com', 'Uqload'), 56 (r'vidbom\.com', 'VidBom'), 57 (r'vidlo\.us', 'vidlo'), 58 (r'vidlocker\.xyz', 'VidLocker'), 59 (r'vidshare\.tv', 'VidShare'), 60 (r'vup\.to', 'VUp'), 61 (r'wolfstream\.tv', 'WolfStream'), 62 (r'xvideosharing\.com', 'XVideoSharing'), 63 ) 64 65 IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) 66 _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' 67 % '|'.join(site for site in list(zip(*_SITES))[0])) 68 69 _FILE_NOT_FOUND_REGEXES = ( 70 r'>(?:404 - )?File Not Found<', 71 r'>The file was removed by administrator<', 72 ) 73 74 _TESTS = [{ 75 'url': 'http://xvideosharing.com/fq65f94nd2ve', 76 'md5': '4181f63957e8fe90ac836fa58dc3c8a6', 77 'info_dict': { 78 'id': 'fq65f94nd2ve', 79 'ext': 'mp4', 80 'title': 'sample', 81 'thumbnail': r're:http://.*\.jpg', 82 }, 83 }, { 84 'url': 'https://aparat.cam/n4d6dh0wvlpr', 85 'only_matching': True, 86 }, { 87 'url': 'https://wolfstream.tv/nthme29v9u2x', 88 'only_matching': True, 89 }] 90 91 @staticmethod 92 def _extract_urls(webpage): 93 return [ 94 mobj.group('url') 95 for mobj in re.finditer( 96 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' 97 % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]), 98 webpage)] 99 100 def _real_extract(self, url): 101 host, video_id = re.match(self._VALID_URL, url).groups() 102 103 url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id) 104 webpage = self._download_webpage(url, video_id) 105 106 if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): 107 raise ExtractorError('Video %s does not exist' % video_id, expected=True) 108 109 fields = self._hidden_inputs(webpage) 110 111 if fields.get('op') == 'download1': 112 countdown = int_or_none(self._search_regex( 113 r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>', 114 webpage, 'countdown', default=None)) 115 if countdown: 116 self._sleep(countdown, video_id) 117 118 webpage = self._download_webpage( 119 url, video_id, 'Downloading video page', 120 data=urlencode_postdata(fields), headers={ 121 'Referer': url, 122 'Content-type': 'application/x-www-form-urlencoded', 123 }) 124 125 title = (self._search_regex( 126 (r'style="z-index: [0-9]+;">([^<]+)</span>', 127 r'<td nowrap>([^<]+)</td>', 128 r'h4-fine[^>]*>([^<]+)<', 129 r'>Watch (.+)[ <]', 130 r'<h2 class="video-page-head">([^<]+)</h2>', 131 r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to 132 r'title\s*:\s*"([^"]+)"'), # govid.me 133 webpage, 'title', default=None) or self._og_search_title( 134 webpage, default=None) or video_id).strip() 135 136 for regex, func in ( 137 (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes), 138 (r'(゚.+)', aa_decode)): 139 obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None) 140 if obf_code: 141 webpage = webpage.replace(obf_code, func(obf_code)) 142 143 formats = [] 144 145 jwplayer_data = self._search_regex( 146 [ 147 r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);', 148 r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);', 149 ], webpage, 150 'jwplayer data', default=None) 151 if jwplayer_data: 152 jwplayer_data = self._parse_json( 153 jwplayer_data.replace(r"\'", "'"), video_id, js_to_json) 154 if jwplayer_data: 155 formats = self._parse_jwplayer_data( 156 jwplayer_data, video_id, False, 157 m3u8_id='hls', mpd_id='dash')['formats'] 158 159 if not formats: 160 urls = [] 161 for regex in ( 162 r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', 163 r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1', 164 r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)', 165 r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'): 166 for mobj in re.finditer(regex, webpage): 167 video_url = mobj.group('url') 168 if video_url not in urls: 169 urls.append(video_url) 170 171 sources = self._search_regex( 172 r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None) 173 if sources: 174 urls.extend(self._parse_json(sources, video_id)) 175 176 formats = [] 177 for video_url in urls: 178 if determine_ext(video_url) == 'm3u8': 179 formats.extend(self._extract_m3u8_formats( 180 video_url, video_id, 'mp4', 181 entry_protocol='m3u8_native', m3u8_id='hls', 182 fatal=False)) 183 else: 184 formats.append({ 185 'url': video_url, 186 'format_id': 'sd', 187 }) 188 self._sort_formats(formats) 189 190 thumbnail = self._search_regex( 191 [ 192 r'<video[^>]+poster="([^"]+)"', 193 r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],', 194 ], webpage, 'thumbnail', default=None) 195 196 return { 197 'id': video_id, 198 'title': title, 199 'thumbnail': thumbnail, 200 'formats': formats, 201 }