videa.py (5806B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import random 5 import re 6 import string 7 8 from .common import InfoExtractor 9 from ..utils import ( 10 ExtractorError, 11 int_or_none, 12 mimetype2ext, 13 parse_codecs, 14 update_url_query, 15 xpath_element, 16 xpath_text, 17 ) 18 from ..compat import ( 19 compat_b64decode, 20 compat_ord, 21 compat_struct_pack, 22 ) 23 24 25 class VideaIE(InfoExtractor): 26 _VALID_URL = r'''(?x) 27 https?:// 28 videa(?:kid)?\.hu/ 29 (?: 30 videok/(?:[^/]+/)*[^?#&]+-| 31 (?:videojs_)?player\?.*?\bv=| 32 player/v/ 33 ) 34 (?P<id>[^?#&]+) 35 ''' 36 _TESTS = [{ 37 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ', 38 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', 39 'info_dict': { 40 'id': '8YfIAjxwWGwT8HVQ', 41 'ext': 'mp4', 42 'title': 'Az őrült kígyász 285 kígyót enged szabadon', 43 'thumbnail': r're:^https?://.*', 44 'duration': 21, 45 }, 46 }, { 47 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', 48 'only_matching': True, 49 }, { 50 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', 51 'only_matching': True, 52 }, { 53 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 54 'only_matching': True, 55 }, { 56 'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', 57 'only_matching': True, 58 }, { 59 'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ', 60 'only_matching': True, 61 }, { 62 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 63 'only_matching': True, 64 }] 65 _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' 66 67 @staticmethod 68 def _extract_urls(webpage): 69 return [url for _, url in re.findall( 70 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', 71 webpage)] 72 73 @staticmethod 74 def rc4(cipher_text, key): 75 res = b'' 76 77 key_len = len(key) 78 S = list(range(256)) 79 80 j = 0 81 for i in range(256): 82 j = (j + S[i] + ord(key[i % key_len])) % 256 83 S[i], S[j] = S[j], S[i] 84 85 i = 0 86 j = 0 87 for m in range(len(cipher_text)): 88 i = (i + 1) % 256 89 j = (j + S[i]) % 256 90 S[i], S[j] = S[j], S[i] 91 k = S[(S[i] + S[j]) % 256] 92 res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m])) 93 94 return res.decode() 95 96 def _real_extract(self, url): 97 video_id = self._match_id(url) 98 query = {'v': video_id} 99 player_page = self._download_webpage( 100 'https://videa.hu/player', video_id, query=query) 101 102 nonce = self._search_regex( 103 r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce') 104 l = nonce[:32] 105 s = nonce[32:] 106 result = '' 107 for i in range(0, 32): 108 result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] 109 110 random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) 111 query['_s'] = random_seed 112 query['_t'] = result[:16] 113 114 b64_info, handle = self._download_webpage_handle( 115 'http://videa.hu/videaplayer_get_xml.php', video_id, query=query) 116 if b64_info.startswith('<?xml'): 117 info = self._parse_xml(b64_info, video_id) 118 else: 119 key = result[16:] + random_seed + handle.headers['x-videa-xs'] 120 info = self._parse_xml(self.rc4( 121 compat_b64decode(b64_info), key), video_id) 122 123 video = xpath_element(info, './video', 'video') 124 if not video: 125 raise ExtractorError(xpath_element( 126 info, './error', fatal=True), expected=True) 127 sources = xpath_element( 128 info, './video_sources', 'sources', fatal=True) 129 hash_values = xpath_element( 130 info, './hash_values', 'hash values', fatal=True) 131 132 title = xpath_text(video, './title', fatal=True) 133 134 formats = [] 135 for source in sources.findall('./video_source'): 136 source_url = source.text 137 source_name = source.get('name') 138 source_exp = source.get('exp') 139 if not (source_url and source_name and source_exp): 140 continue 141 hash_value = xpath_text(hash_values, 'hash_value_' + source_name) 142 if not hash_value: 143 continue 144 source_url = update_url_query(source_url, { 145 'md5': hash_value, 146 'expires': source_exp, 147 }) 148 f = parse_codecs(source.get('codecs')) 149 f.update({ 150 'url': self._proto_relative_url(source_url), 151 'ext': mimetype2ext(source.get('mimetype')) or 'mp4', 152 'format_id': source.get('name'), 153 'width': int_or_none(source.get('width')), 154 'height': int_or_none(source.get('height')), 155 }) 156 formats.append(f) 157 self._sort_formats(formats) 158 159 thumbnail = self._proto_relative_url(xpath_text(video, './poster_src')) 160 161 age_limit = None 162 is_adult = xpath_text(video, './is_adult_content', default=None) 163 if is_adult: 164 age_limit = 18 if is_adult == '1' else 0 165 166 return { 167 'id': video_id, 168 'title': title, 169 'thumbnail': thumbnail, 170 'duration': int_or_none(xpath_text(video, './duration')), 171 'age_limit': age_limit, 172 'formats': formats, 173 }