hotstar.py (9449B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import hashlib 5 import hmac 6 import json 7 import re 8 import time 9 import uuid 10 11 from .common import InfoExtractor 12 from ..compat import ( 13 compat_HTTPError, 14 compat_str, 15 ) 16 from ..utils import ( 17 determine_ext, 18 ExtractorError, 19 int_or_none, 20 str_or_none, 21 try_get, 22 url_or_none, 23 ) 24 25 26 class HotStarBaseIE(InfoExtractor): 27 _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' 28 29 def _call_api_impl(self, path, video_id, headers, query, data=None): 30 st = int(time.time()) 31 exp = st + 6000 32 auth = 'st=%d~exp=%d~acl=/*' % (st, exp) 33 auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() 34 h = {'hotstarauth': auth} 35 h.update(headers) 36 return self._download_json( 37 'https://api.hotstar.com/' + path, 38 video_id, headers=h, query=query, data=data) 39 40 def _call_api(self, path, video_id, query_name='contentId'): 41 response = self._call_api_impl(path, video_id, { 42 'x-country-code': 'IN', 43 'x-platform-code': 'JIO', 44 }, { 45 query_name: video_id, 46 'tas': 10000, 47 }) 48 if response['statusCode'] != 'OK': 49 raise ExtractorError( 50 response['body']['message'], expected=True) 51 return response['body']['results'] 52 53 def _call_api_v2(self, path, video_id, headers, query=None, data=None): 54 h = {'X-Request-Id': compat_str(uuid.uuid4())} 55 h.update(headers) 56 try: 57 return self._call_api_impl( 58 path, video_id, h, query, data) 59 except ExtractorError as e: 60 if isinstance(e.cause, compat_HTTPError): 61 if e.cause.code == 402: 62 self.raise_login_required() 63 message = self._parse_json(e.cause.read().decode(), video_id)['message'] 64 if message in ('Content not available in region', 'Country is not supported'): 65 raise self.raise_geo_restricted(message) 66 raise ExtractorError(message) 67 raise e 68 69 70 class HotStarIE(HotStarBaseIE): 71 IE_NAME = 'hotstar' 72 _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})' 73 _TESTS = [{ 74 # contentData 75 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 76 'info_dict': { 77 'id': '1000076273', 78 'ext': 'mp4', 79 'title': 'Can You Not Spread Rumours?', 80 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', 81 'timestamp': 1447248600, 82 'upload_date': '20151111', 83 'duration': 381, 84 }, 85 'params': { 86 # m3u8 download 87 'skip_download': True, 88 } 89 }, { 90 # contentDetail 91 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', 92 'only_matching': True, 93 }, { 94 'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583', 95 'only_matching': True, 96 }, { 97 'url': 'http://www.hotstar.com/1000000515', 98 'only_matching': True, 99 }, { 100 # only available via api v2 101 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', 102 'only_matching': True, 103 }, { 104 'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717', 105 'only_matching': True, 106 }] 107 _GEO_BYPASS = False 108 _DEVICE_ID = None 109 _USER_TOKEN = None 110 111 def _real_extract(self, url): 112 video_id = self._match_id(url) 113 114 webpage = self._download_webpage(url, video_id) 115 app_state = self._parse_json(self._search_regex( 116 r'<script>window\.APP_STATE\s*=\s*({.+?})</script>', 117 webpage, 'app state'), video_id) 118 video_data = {} 119 getters = list( 120 lambda x, k=k: x['initialState']['content%s' % k]['content'] 121 for k in ('Data', 'Detail') 122 ) 123 for v in app_state.values(): 124 content = try_get(v, getters, dict) 125 if content and content.get('contentId') == video_id: 126 video_data = content 127 break 128 129 title = video_data['title'] 130 131 if video_data.get('drmProtected'): 132 raise ExtractorError('This video is DRM protected.', expected=True) 133 134 headers = {'Referer': url} 135 formats = [] 136 geo_restricted = False 137 138 if not self._USER_TOKEN: 139 self._DEVICE_ID = compat_str(uuid.uuid4()) 140 self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, { 141 'X-HS-Platform': 'PCTV', 142 'Content-Type': 'application/json', 143 }, data=json.dumps({ 144 'device_ids': [{ 145 'id': self._DEVICE_ID, 146 'type': 'device_id', 147 }], 148 }).encode())['user_identity'] 149 150 playback_sets = self._call_api_v2( 151 'play/v2/playback/content/' + video_id, video_id, { 152 'X-HS-Platform': 'web', 153 'X-HS-AppVersion': '6.99.1', 154 'X-HS-UserToken': self._USER_TOKEN, 155 }, query={ 156 'device-id': self._DEVICE_ID, 157 'desired-config': 'encryption:plain', 158 'os-name': 'Windows', 159 'os-version': '10', 160 })['data']['playBackSets'] 161 for playback_set in playback_sets: 162 if not isinstance(playback_set, dict): 163 continue 164 format_url = url_or_none(playback_set.get('playbackUrl')) 165 if not format_url: 166 continue 167 format_url = re.sub( 168 r'(?<=//staragvod)(\d)', r'web\1', format_url) 169 tags = str_or_none(playback_set.get('tagsCombination')) or '' 170 if tags and 'encryption:plain' not in tags: 171 continue 172 ext = determine_ext(format_url) 173 try: 174 if 'package:hls' in tags or ext == 'm3u8': 175 formats.extend(self._extract_m3u8_formats( 176 format_url, video_id, 'mp4', 177 entry_protocol='m3u8_native', 178 m3u8_id='hls', headers=headers)) 179 elif 'package:dash' in tags or ext == 'mpd': 180 formats.extend(self._extract_mpd_formats( 181 format_url, video_id, mpd_id='dash', headers=headers)) 182 elif ext == 'f4m': 183 # produce broken files 184 pass 185 else: 186 formats.append({ 187 'url': format_url, 188 'width': int_or_none(playback_set.get('width')), 189 'height': int_or_none(playback_set.get('height')), 190 }) 191 except ExtractorError as e: 192 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: 193 geo_restricted = True 194 continue 195 if not formats and geo_restricted: 196 self.raise_geo_restricted(countries=['IN']) 197 self._sort_formats(formats) 198 199 for f in formats: 200 f.setdefault('http_headers', {}).update(headers) 201 202 image = try_get(video_data, lambda x: x['image']['h'], compat_str) 203 204 return { 205 'id': video_id, 206 'title': title, 207 'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None, 208 'description': video_data.get('description'), 209 'duration': int_or_none(video_data.get('duration')), 210 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), 211 'formats': formats, 212 'channel': video_data.get('channelName'), 213 'channel_id': str_or_none(video_data.get('channelId')), 214 'series': video_data.get('showName'), 215 'season': video_data.get('seasonName'), 216 'season_number': int_or_none(video_data.get('seasonNo')), 217 'season_id': str_or_none(video_data.get('seasonId')), 218 'episode': title, 219 'episode_number': int_or_none(video_data.get('episodeNo')), 220 } 221 222 223 class HotStarPlaylistIE(HotStarBaseIE): 224 IE_NAME = 'hotstar:playlist' 225 _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' 226 _TESTS = [{ 227 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', 228 'info_dict': { 229 'id': '3_2_26', 230 }, 231 'playlist_mincount': 20, 232 }, { 233 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', 234 'only_matching': True, 235 }, { 236 'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830', 237 'only_matching': True, 238 }] 239 240 def _real_extract(self, url): 241 playlist_id = self._match_id(url) 242 243 collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId') 244 245 entries = [ 246 self.url_result( 247 'https://www.hotstar.com/%s' % video['contentId'], 248 ie=HotStarIE.ie_key(), video_id=video['contentId']) 249 for video in collection['assets']['items'] 250 if video.get('contentId')] 251 252 return self.playlist_result(entries, playlist_id)