srgssr.py (9882B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 ExtractorError, 9 float_or_none, 10 int_or_none, 11 parse_iso8601, 12 qualities, 13 try_get, 14 ) 15 16 17 class SRGSSRIE(InfoExtractor): 18 _VALID_URL = r'''(?x) 19 (?: 20 https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn| 21 srgssr 22 ): 23 (?P<bu> 24 srf|rts|rsi|rtr|swi 25 ):(?:[^:]+:)? 26 (?P<type> 27 video|audio 28 ): 29 (?P<id> 30 [0-9a-f\-]{36}|\d+ 31 ) 32 ''' 33 _GEO_BYPASS = False 34 _GEO_COUNTRIES = ['CH'] 35 36 _ERRORS = { 37 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', 38 'AGERATING18': 'To protect children under the age of 18, this video is only available between 11 p.m. and 5 a.m.', 39 # 'ENDDATE': 'For legal reasons, this video was only available for a specified period of time.', 40 'GEOBLOCK': 'For legal reasons, this video is only available in Switzerland.', 41 'LEGAL': 'The video cannot be transmitted for legal reasons.', 42 'STARTDATE': 'This video is not yet available. Please try again later.', 43 } 44 _DEFAULT_LANGUAGE_CODES = { 45 'srf': 'de', 46 'rts': 'fr', 47 'rsi': 'it', 48 'rtr': 'rm', 49 'swi': 'en', 50 } 51 52 def _get_tokenized_src(self, url, video_id, format_id): 53 token = self._download_json( 54 'http://tp.srgssr.ch/akahd/token?acl=*', 55 video_id, 'Downloading %s token' % format_id, fatal=False) or {} 56 auth_params = try_get(token, lambda x: x['token']['authparams']) 57 if auth_params: 58 url += ('?' if '?' not in url else '&') + auth_params 59 return url 60 61 def _get_media_data(self, bu, media_type, media_id): 62 query = {'onlyChapters': True} if media_type == 'video' else {} 63 full_media_data = self._download_json( 64 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json' 65 % (bu, media_type, media_id), 66 media_id, query=query)['chapterList'] 67 try: 68 media_data = next( 69 x for x in full_media_data if x.get('id') == media_id) 70 except StopIteration: 71 raise ExtractorError('No media information found') 72 73 block_reason = media_data.get('blockReason') 74 if block_reason and block_reason in self._ERRORS: 75 message = self._ERRORS[block_reason] 76 if block_reason == 'GEOBLOCK': 77 self.raise_geo_restricted( 78 msg=message, countries=self._GEO_COUNTRIES) 79 raise ExtractorError( 80 '%s said: %s' % (self.IE_NAME, message), expected=True) 81 82 return media_data 83 84 def _real_extract(self, url): 85 bu, media_type, media_id = re.match(self._VALID_URL, url).groups() 86 media_data = self._get_media_data(bu, media_type, media_id) 87 title = media_data['title'] 88 89 formats = [] 90 q = qualities(['SD', 'HD']) 91 for source in (media_data.get('resourceList') or []): 92 format_url = source.get('url') 93 if not format_url: 94 continue 95 protocol = source.get('protocol') 96 quality = source.get('quality') 97 format_id = [] 98 for e in (protocol, source.get('encoding'), quality): 99 if e: 100 format_id.append(e) 101 format_id = '-'.join(format_id) 102 103 if protocol in ('HDS', 'HLS'): 104 if source.get('tokenType') == 'AKAMAI': 105 format_url = self._get_tokenized_src( 106 format_url, media_id, format_id) 107 formats.extend(self._extract_akamai_formats( 108 format_url, media_id)) 109 elif protocol == 'HLS': 110 formats.extend(self._extract_m3u8_formats( 111 format_url, media_id, 'mp4', 'm3u8_native', 112 m3u8_id=format_id, fatal=False)) 113 elif protocol in ('HTTP', 'HTTPS'): 114 formats.append({ 115 'format_id': format_id, 116 'url': format_url, 117 'quality': q(quality), 118 }) 119 120 # This is needed because for audio medias the podcast url is usually 121 # always included, even if is only an audio segment and not the 122 # whole episode. 123 if int_or_none(media_data.get('position')) == 0: 124 for p in ('S', 'H'): 125 podcast_url = media_data.get('podcast%sdUrl' % p) 126 if not podcast_url: 127 continue 128 quality = p + 'D' 129 formats.append({ 130 'format_id': 'PODCAST-' + quality, 131 'url': podcast_url, 132 'quality': q(quality), 133 }) 134 self._sort_formats(formats) 135 136 subtitles = {} 137 if media_type == 'video': 138 for sub in (media_data.get('subtitleList') or []): 139 sub_url = sub.get('url') 140 if not sub_url: 141 continue 142 lang = sub.get('locale') or self._DEFAULT_LANGUAGE_CODES[bu] 143 subtitles.setdefault(lang, []).append({ 144 'url': sub_url, 145 }) 146 147 return { 148 'id': media_id, 149 'title': title, 150 'description': media_data.get('description'), 151 'timestamp': parse_iso8601(media_data.get('date')), 152 'thumbnail': media_data.get('imageUrl'), 153 'duration': float_or_none(media_data.get('duration'), 1000), 154 'subtitles': subtitles, 155 'formats': formats, 156 } 157 158 159 class SRGSSRPlayIE(InfoExtractor): 160 IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites' 161 _VALID_URL = r'''(?x) 162 https?:// 163 (?:(?:www|play)\.)? 164 (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/ 165 (?: 166 [^/]+/(?P<type>video|audio)/[^?]+| 167 popup(?P<type_2>video|audio)player 168 ) 169 \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P<id>[0-9a-f\-]{36}|\d+) 170 ''' 171 172 _TESTS = [{ 173 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 174 'md5': '6db2226ba97f62ad42ce09783680046c', 175 'info_dict': { 176 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', 177 'ext': 'mp4', 178 'upload_date': '20130701', 179 'title': 'Snowden beantragt Asyl in Russland', 180 'timestamp': 1372708215, 181 'duration': 113.827, 182 'thumbnail': r're:^https?://.*1383719781\.png$', 183 }, 184 'expected_warnings': ['Unable to download f4m manifest'], 185 }, { 186 'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc', 187 'info_dict': { 188 'id': '63cb0778-27f8-49af-9284-8c7a8c6d15fc', 189 'ext': 'mp3', 190 'upload_date': '20151013', 191 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem', 192 'timestamp': 1444709160, 193 'duration': 336.816, 194 }, 195 'params': { 196 # rtmp download 197 'skip_download': True, 198 }, 199 }, { 200 'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260', 201 'md5': '67a2a9ae4e8e62a68d0e9820cc9782df', 202 'info_dict': { 203 'id': '6348260', 204 'display_id': '6348260', 205 'ext': 'mp4', 206 'duration': 1796.76, 207 'title': 'Le 19h30', 208 'upload_date': '20141201', 209 'timestamp': 1417458600, 210 'thumbnail': r're:^https?://.*\.image', 211 }, 212 'params': { 213 # m3u8 download 214 'skip_download': True, 215 } 216 }, { 217 'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270', 218 'info_dict': { 219 'id': '42960270', 220 'ext': 'mp4', 221 'title': 'Why people were against tax reforms', 222 'description': 'md5:7ac442c558e9630e947427469c4b824d', 223 'duration': 94.0, 224 'upload_date': '20170215', 225 'timestamp': 1487173560, 226 'thumbnail': r're:https?://www\.swissinfo\.ch/srgscalableimage/42961964', 227 'subtitles': 'count:9', 228 }, 229 'params': { 230 'skip_download': True, 231 } 232 }, { 233 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', 234 'only_matching': True, 235 }, { 236 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5', 237 'only_matching': True, 238 }, { 239 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260', 240 'only_matching': True, 241 }, { 242 # audio segment, has podcastSdUrl of the full episode 243 'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb', 244 'only_matching': True, 245 }] 246 247 def _real_extract(self, url): 248 mobj = re.match(self._VALID_URL, url) 249 bu = mobj.group('bu') 250 media_type = mobj.group('type') or mobj.group('type_2') 251 media_id = mobj.group('id') 252 return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')