zaq1.py (3293B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 int_or_none, 7 unified_timestamp, 8 ) 9 10 11 class Zaq1IE(InfoExtractor): 12 _VALID_URL = r'https?://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)' 13 _TESTS = [{ 14 'url': 'http://zaq1.pl/video/xev0e', 15 'md5': '24a5eb3f052e604ae597c4d0d19b351e', 16 'info_dict': { 17 'id': 'xev0e', 18 'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa', 19 'description': 'www.facebook.com/weseledjKontakt: 728 448 199 / 505 419 147', 20 'ext': 'mp4', 21 'duration': 511, 22 'timestamp': 1490896361, 23 'uploader': 'Anonim', 24 'upload_date': '20170330', 25 'view_count': int, 26 } 27 }, { 28 # malformed JSON-LD 29 'url': 'http://zaq1.pl/video/x81vn', 30 'info_dict': { 31 'id': 'x81vn', 32 'title': 'SEKRETNE ŻYCIE WALTERA MITTY', 33 'ext': 'mp4', 34 'duration': 6234, 35 'timestamp': 1493494860, 36 'uploader': 'Anonim', 37 'upload_date': '20170429', 38 'view_count': int, 39 }, 40 'params': { 41 'skip_download': True, 42 }, 43 'expected_warnings': ['Failed to parse JSON'], 44 }] 45 46 def _real_extract(self, url): 47 video_id = self._match_id(url) 48 49 webpage = self._download_webpage(url, video_id) 50 51 video_url = self._search_regex( 52 r'data-video-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 53 'video url', group='url') 54 55 info = self._search_json_ld(webpage, video_id, fatal=False) 56 57 def extract_data(field, name, fatal=False): 58 return self._search_regex( 59 r'data-%s=(["\'])(?P<field>(?:(?!\1).)+)\1' % field, 60 webpage, field, fatal=fatal, group='field') 61 62 if not info.get('title'): 63 info['title'] = extract_data('file-name', 'title', fatal=True) 64 65 if not info.get('duration'): 66 info['duration'] = int_or_none(extract_data('duration', 'duration')) 67 68 if not info.get('thumbnail'): 69 info['thumbnail'] = extract_data('photo-url', 'thumbnail') 70 71 if not info.get('timestamp'): 72 info['timestamp'] = unified_timestamp(self._html_search_meta( 73 'uploadDate', webpage, 'timestamp')) 74 75 if not info.get('interactionCount'): 76 info['view_count'] = int_or_none(self._html_search_meta( 77 'interactionCount', webpage, 'view count')) 78 79 uploader = self._html_search_regex( 80 r'Wideo dodał:\s*<a[^>]*>([^<]+)</a>', webpage, 'uploader', 81 fatal=False) 82 83 width = int_or_none(self._html_search_meta( 84 'width', webpage, fatal=False)) 85 height = int_or_none(self._html_search_meta( 86 'height', webpage, fatal=False)) 87 88 info.update({ 89 'id': video_id, 90 'formats': [{ 91 'url': video_url, 92 'width': width, 93 'height': height, 94 'http_headers': { 95 'Referer': url, 96 }, 97 }], 98 'uploader': uploader, 99 }) 100 101 return info