crackle.py (7625B)
1 # coding: utf-8 2 from __future__ import unicode_literals, division 3 4 import hashlib 5 import hmac 6 import re 7 import time 8 9 from .common import InfoExtractor 10 from ..compat import compat_HTTPError 11 from ..utils import ( 12 determine_ext, 13 float_or_none, 14 int_or_none, 15 parse_age_limit, 16 parse_duration, 17 url_or_none, 18 ExtractorError 19 ) 20 21 22 class CrackleIE(InfoExtractor): 23 _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' 24 _TESTS = [{ 25 # geo restricted to CA 26 'url': 'https://www.crackle.com/andromeda/2502343', 27 'info_dict': { 28 'id': '2502343', 29 'ext': 'mp4', 30 'title': 'Under The Night', 31 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a', 32 'duration': 2583, 33 'view_count': int, 34 'average_rating': 0, 35 'age_limit': 14, 36 'genre': 'Action, Sci-Fi', 37 'creator': 'Allan Kroeker', 38 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe', 39 'release_year': 2000, 40 'series': 'Andromeda', 41 'episode': 'Under The Night', 42 'season_number': 1, 43 'episode_number': 1, 44 }, 45 'params': { 46 # m3u8 download 47 'skip_download': True, 48 } 49 }, { 50 'url': 'https://www.sonycrackle.com/andromeda/2502343', 51 'only_matching': True, 52 }] 53 54 _MEDIA_FILE_SLOTS = { 55 '360p.mp4': { 56 'width': 640, 57 'height': 360, 58 }, 59 '480p.mp4': { 60 'width': 768, 61 'height': 432, 62 }, 63 '480p_1mbps.mp4': { 64 'width': 852, 65 'height': 480, 66 }, 67 } 68 69 def _real_extract(self, url): 70 video_id = self._match_id(url) 71 72 country_code = self._downloader.params.get('geo_bypass_country', None) 73 countries = [country_code] if country_code else ( 74 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI') 75 76 last_e = None 77 78 for country in countries: 79 try: 80 # Authorization generation algorithm is reverse engineered from: 81 # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js 82 media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country) 83 timestamp = time.strftime('%Y%m%d%H%M', time.gmtime()) 84 h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper() 85 media = self._download_json( 86 media_detail_url, video_id, 'Downloading media JSON as %s' % country, 87 'Unable to download media JSON', headers={ 88 'Accept': 'application/json', 89 'Authorization': '|'.join([h, timestamp, '117', '1']), 90 }) 91 except ExtractorError as e: 92 # 401 means geo restriction, trying next country 93 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: 94 last_e = e 95 continue 96 raise 97 98 media_urls = media.get('MediaURLs') 99 if not media_urls or not isinstance(media_urls, list): 100 continue 101 102 title = media['Title'] 103 104 formats = [] 105 for e in media['MediaURLs']: 106 if e.get('UseDRM') is True: 107 continue 108 format_url = url_or_none(e.get('Path')) 109 if not format_url: 110 continue 111 ext = determine_ext(format_url) 112 if ext == 'm3u8': 113 formats.extend(self._extract_m3u8_formats( 114 format_url, video_id, 'mp4', entry_protocol='m3u8_native', 115 m3u8_id='hls', fatal=False)) 116 elif ext == 'mpd': 117 formats.extend(self._extract_mpd_formats( 118 format_url, video_id, mpd_id='dash', fatal=False)) 119 elif format_url.endswith('.ism/Manifest'): 120 formats.extend(self._extract_ism_formats( 121 format_url, video_id, ism_id='mss', fatal=False)) 122 else: 123 mfs_path = e.get('Type') 124 mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path) 125 if not mfs_info: 126 continue 127 formats.append({ 128 'url': format_url, 129 'format_id': 'http-' + mfs_path.split('.')[0], 130 'width': mfs_info['width'], 131 'height': mfs_info['height'], 132 }) 133 self._sort_formats(formats) 134 135 description = media.get('Description') 136 duration = int_or_none(media.get( 137 'DurationInSeconds')) or parse_duration(media.get('Duration')) 138 view_count = int_or_none(media.get('CountViews')) 139 average_rating = float_or_none(media.get('UserRating')) 140 age_limit = parse_age_limit(media.get('Rating')) 141 genre = media.get('Genre') 142 release_year = int_or_none(media.get('ReleaseYear')) 143 creator = media.get('Directors') 144 artist = media.get('Cast') 145 146 if media.get('MediaTypeDisplayValue') == 'Full Episode': 147 series = media.get('ShowName') 148 episode = title 149 season_number = int_or_none(media.get('Season')) 150 episode_number = int_or_none(media.get('Episode')) 151 else: 152 series = episode = season_number = episode_number = None 153 154 subtitles = {} 155 cc_files = media.get('ClosedCaptionFiles') 156 if isinstance(cc_files, list): 157 for cc_file in cc_files: 158 if not isinstance(cc_file, dict): 159 continue 160 cc_url = url_or_none(cc_file.get('Path')) 161 if not cc_url: 162 continue 163 lang = cc_file.get('Locale') or 'en' 164 subtitles.setdefault(lang, []).append({'url': cc_url}) 165 166 thumbnails = [] 167 images = media.get('Images') 168 if isinstance(images, list): 169 for image_key, image_url in images.items(): 170 mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) 171 if not mobj: 172 continue 173 thumbnails.append({ 174 'url': image_url, 175 'width': int(mobj.group(1)), 176 'height': int(mobj.group(2)), 177 }) 178 179 return { 180 'id': video_id, 181 'title': title, 182 'description': description, 183 'duration': duration, 184 'view_count': view_count, 185 'average_rating': average_rating, 186 'age_limit': age_limit, 187 'genre': genre, 188 'creator': creator, 189 'artist': artist, 190 'release_year': release_year, 191 'series': series, 192 'episode': episode, 193 'season_number': season_number, 194 'episode_number': episode_number, 195 'thumbnails': thumbnails, 196 'subtitles': subtitles, 197 'formats': formats, 198 } 199 200 raise last_e