zype.py (5710B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_HTTPError 8 from ..utils import ( 9 dict_get, 10 ExtractorError, 11 int_or_none, 12 js_to_json, 13 parse_iso8601, 14 ) 15 16 17 class ZypeIE(InfoExtractor): 18 _ID_RE = r'[\da-fA-F]+' 19 _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)=' 20 _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE)) 21 _TEST = { 22 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false', 23 'md5': 'eaee31d474c76a955bdaba02a505c595', 24 'info_dict': { 25 'id': '5b400b834b32992a310622b9', 26 'ext': 'mp4', 27 'title': 'Smoky Barbecue Favorites', 28 'thumbnail': r're:^https?://.*\.jpe?g', 29 'description': 'md5:5ff01e76316bd8d46508af26dc86023b', 30 'timestamp': 1504915200, 31 'upload_date': '20170909', 32 }, 33 } 34 35 @staticmethod 36 def _extract_urls(webpage): 37 return [ 38 mobj.group('url') 39 for mobj in re.finditer( 40 r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE), 41 webpage)] 42 43 def _real_extract(self, url): 44 video_id = self._match_id(url) 45 46 try: 47 response = self._download_json(re.sub( 48 r'\.(?:js|html)\?', '.json?', url), video_id)['response'] 49 except ExtractorError as e: 50 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403): 51 raise ExtractorError(self._parse_json( 52 e.cause.read().decode(), video_id)['message'], expected=True) 53 raise 54 55 body = response['body'] 56 video = response['video'] 57 title = video['title'] 58 59 if isinstance(body, dict): 60 formats = [] 61 for output in body.get('outputs', []): 62 output_url = output.get('url') 63 if not output_url: 64 continue 65 name = output.get('name') 66 if name == 'm3u8': 67 formats = self._extract_m3u8_formats( 68 output_url, video_id, 'mp4', 69 'm3u8_native', m3u8_id='hls', fatal=False) 70 else: 71 f = { 72 'format_id': name, 73 'tbr': int_or_none(output.get('bitrate')), 74 'url': output_url, 75 } 76 if name in ('m4a', 'mp3'): 77 f['vcodec'] = 'none' 78 else: 79 f.update({ 80 'height': int_or_none(output.get('height')), 81 'width': int_or_none(output.get('width')), 82 }) 83 formats.append(f) 84 text_tracks = body.get('subtitles') or [] 85 else: 86 m3u8_url = self._search_regex( 87 r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', 88 body, 'm3u8 url', group='url', default=None) 89 if not m3u8_url: 90 source = self._search_regex( 91 r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source') 92 93 def get_attr(key): 94 return self._search_regex( 95 r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key, 96 source, key, group='val') 97 98 if get_attr('integration') == 'verizon-media': 99 m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id') 100 formats = self._extract_m3u8_formats( 101 m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') 102 text_tracks = self._search_regex( 103 r'textTracks\s*:\s*(\[[^]]+\])', 104 body, 'text tracks', default=None) 105 if text_tracks: 106 text_tracks = self._parse_json( 107 text_tracks, video_id, js_to_json, False) 108 self._sort_formats(formats) 109 110 subtitles = {} 111 if text_tracks: 112 for text_track in text_tracks: 113 tt_url = dict_get(text_track, ('file', 'src')) 114 if not tt_url: 115 continue 116 subtitles.setdefault(text_track.get('label') or 'English', []).append({ 117 'url': tt_url, 118 }) 119 120 thumbnails = [] 121 for thumbnail in video.get('thumbnails', []): 122 thumbnail_url = thumbnail.get('url') 123 if not thumbnail_url: 124 continue 125 thumbnails.append({ 126 'url': thumbnail_url, 127 'width': int_or_none(thumbnail.get('width')), 128 'height': int_or_none(thumbnail.get('height')), 129 }) 130 131 return { 132 'id': video_id, 133 'display_id': video.get('friendly_title'), 134 'title': title, 135 'thumbnails': thumbnails, 136 'description': dict_get(video, ('description', 'ott_description', 'short_description')), 137 'timestamp': parse_iso8601(video.get('published_at')), 138 'duration': int_or_none(video.get('duration')), 139 'view_count': int_or_none(video.get('request_count')), 140 'average_rating': int_or_none(video.get('rating')), 141 'season_number': int_or_none(video.get('season')), 142 'episode_number': int_or_none(video.get('episode')), 143 'formats': formats, 144 'subtitles': subtitles, 145 }