indavideo.py (4415B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_str 8 from ..utils import ( 9 int_or_none, 10 parse_age_limit, 11 parse_iso8601, 12 update_url_query, 13 ) 14 15 16 class IndavideoEmbedIE(InfoExtractor): 17 _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)' 18 _TESTS = [{ 19 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/', 20 'md5': 'c8a507a1c7410685f83a06eaeeaafeab', 21 'info_dict': { 22 'id': '1837039', 23 'ext': 'mp4', 24 'title': 'Cicatánc', 25 'description': '', 26 'thumbnail': r're:^https?://.*\.jpg$', 27 'uploader': 'cukiajanlo', 28 'uploader_id': '83729', 29 'timestamp': 1439193826, 30 'upload_date': '20150810', 31 'duration': 72, 32 'age_limit': 0, 33 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'], 34 }, 35 }, { 36 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1', 37 'only_matching': True, 38 }, { 39 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1', 40 'only_matching': True, 41 }] 42 43 # Some example URLs covered by generic extractor: 44 # http://indavideo.hu/video/Vicces_cica_1 45 # http://index.indavideo.hu/video/2015_0728_beregszasz 46 # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko 47 # http://erotika.indavideo.hu/video/Amator_tini_punci 48 # http://film.indavideo.hu/video/f_hrom_nagymamm_volt 49 # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes 50 51 @staticmethod 52 def _extract_urls(webpage): 53 return re.findall( 54 r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)', 55 webpage) 56 57 def _real_extract(self, url): 58 video_id = self._match_id(url) 59 60 video = self._download_json( 61 'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id, 62 video_id)['data'] 63 64 title = video['title'] 65 66 video_urls = [] 67 68 video_files = video.get('video_files') 69 if isinstance(video_files, list): 70 video_urls.extend(video_files) 71 elif isinstance(video_files, dict): 72 video_urls.extend(video_files.values()) 73 74 video_file = video.get('video_file') 75 if video: 76 video_urls.append(video_file) 77 video_urls = list(set(video_urls)) 78 79 video_prefix = video_urls[0].rsplit('/', 1)[0] 80 81 for flv_file in video.get('flv_files', []): 82 flv_url = '%s/%s' % (video_prefix, flv_file) 83 if flv_url not in video_urls: 84 video_urls.append(flv_url) 85 86 filesh = video.get('filesh') 87 88 formats = [] 89 for video_url in video_urls: 90 height = int_or_none(self._search_regex( 91 r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)) 92 if filesh: 93 if not height: 94 continue 95 token = filesh.get(compat_str(height)) 96 if token is None: 97 continue 98 video_url = update_url_query(video_url, {'token': token}) 99 formats.append({ 100 'url': video_url, 101 'height': height, 102 }) 103 self._sort_formats(formats) 104 105 timestamp = video.get('date') 106 if timestamp: 107 # upload date is in CEST 108 timestamp = parse_iso8601(timestamp + ' +0200', ' ') 109 110 thumbnails = [{ 111 'url': self._proto_relative_url(thumbnail) 112 } for thumbnail in video.get('thumbnails', [])] 113 114 tags = [tag['title'] for tag in video.get('tags') or []] 115 116 return { 117 'id': video.get('id') or video_id, 118 'title': title, 119 'description': video.get('description'), 120 'thumbnails': thumbnails, 121 'uploader': video.get('user_name'), 122 'uploader_id': video.get('user_id'), 123 'timestamp': timestamp, 124 'duration': int_or_none(video.get('length')), 125 'age_limit': parse_age_limit(video.get('age_limit')), 126 'tags': tags, 127 'formats': formats, 128 }