drtv.py (13913B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import binascii 5 import hashlib 6 import re 7 8 9 from .common import InfoExtractor 10 from ..aes import aes_cbc_decrypt 11 from ..compat import compat_urllib_parse_unquote 12 from ..utils import ( 13 bytes_to_intlist, 14 ExtractorError, 15 int_or_none, 16 intlist_to_bytes, 17 float_or_none, 18 mimetype2ext, 19 str_or_none, 20 try_get, 21 unified_timestamp, 22 update_url_query, 23 url_or_none, 24 ) 25 26 27 class DRTVIE(InfoExtractor): 28 _VALID_URL = r'''(?x) 29 https?:// 30 (?: 31 (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*| 32 (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ 33 ) 34 (?P<id>[\da-z_-]+) 35 ''' 36 _GEO_BYPASS = False 37 _GEO_COUNTRIES = ['DK'] 38 IE_NAME = 'drtv' 39 _TESTS = [{ 40 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', 41 'md5': '25e659cccc9a2ed956110a299fdf5983', 42 'info_dict': { 43 'id': 'klassen-darlig-taber-10', 44 'ext': 'mp4', 45 'title': 'Klassen - Dårlig taber (10)', 46 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', 47 'timestamp': 1539085800, 48 'upload_date': '20181009', 49 'duration': 606.84, 50 'series': 'Klassen', 51 'season': 'Klassen I', 52 'season_number': 1, 53 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b', 54 'episode': 'Episode 10', 55 'episode_number': 10, 56 'release_year': 2016, 57 }, 58 'expected_warnings': ['Unable to download f4m manifest'], 59 }, { 60 # embed 61 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', 62 'info_dict': { 63 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6', 64 'ext': 'mp4', 65 'title': 'christiania pusher street ryddes drdkrjpo', 66 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5', 67 'timestamp': 1472800279, 68 'upload_date': '20160902', 69 'duration': 131.4, 70 }, 71 'params': { 72 'skip_download': True, 73 }, 74 'expected_warnings': ['Unable to download f4m manifest'], 75 }, { 76 # with SignLanguage formats 77 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', 78 'info_dict': { 79 'id': 'historien-om-danmark-stenalder', 80 'ext': 'mp4', 81 'title': 'Historien om Danmark: Stenalder', 82 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', 83 'timestamp': 1546628400, 84 'upload_date': '20190104', 85 'duration': 3502.56, 86 'formats': 'mincount:20', 87 }, 88 'params': { 89 'skip_download': True, 90 }, 91 }, { 92 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', 93 'only_matching': True, 94 }, { 95 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', 96 'info_dict': { 97 'id': '00951930010', 98 'ext': 'mp4', 99 'title': 'Bonderøven (1:8)', 100 'description': 'md5:3cf18fc0d3b205745d4505f896af8121', 101 'timestamp': 1546542000, 102 'upload_date': '20190103', 103 'duration': 2576.6, 104 }, 105 'params': { 106 'skip_download': True, 107 }, 108 }, { 109 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769', 110 'only_matching': True, 111 }, { 112 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769', 113 'only_matching': True, 114 }, { 115 'url': 'https://www.dr.dk/drtv/program/jagten_220924', 116 'only_matching': True, 117 }] 118 119 def _real_extract(self, url): 120 video_id = self._match_id(url) 121 122 webpage = self._download_webpage(url, video_id) 123 124 if '>Programmet er ikke længere tilgængeligt' in webpage: 125 raise ExtractorError( 126 'Video %s is not available' % video_id, expected=True) 127 128 video_id = self._search_regex( 129 (r'data-(?:material-identifier|episode-slug)="([^"]+)"', 130 r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), 131 webpage, 'video id', default=None) 132 133 if not video_id: 134 video_id = self._search_regex( 135 r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)', 136 webpage, 'urn', default=None) 137 if video_id: 138 video_id = compat_urllib_parse_unquote(video_id) 139 140 _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard' 141 query = {'expanded': 'true'} 142 143 if video_id: 144 programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id) 145 else: 146 programcard_url = _PROGRAMCARD_BASE 147 page = self._parse_json( 148 self._search_regex( 149 r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage, 150 'data'), '1')['cache']['page'] 151 page = page[list(page.keys())[0]] 152 item = try_get( 153 page, (lambda x: x['item'], lambda x: x['entries'][0]['item']), 154 dict) 155 video_id = item['customId'].split(':')[-1] 156 query['productionnumber'] = video_id 157 158 data = self._download_json( 159 programcard_url, video_id, 'Downloading video JSON', query=query) 160 161 title = str_or_none(data.get('Title')) or re.sub( 162 r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '', 163 self._og_search_title(webpage)) 164 description = self._og_search_description( 165 webpage, default=None) or data.get('Description') 166 167 timestamp = unified_timestamp( 168 data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime')) 169 170 thumbnail = None 171 duration = None 172 173 restricted_to_denmark = False 174 175 formats = [] 176 subtitles = {} 177 178 assets = [] 179 primary_asset = data.get('PrimaryAsset') 180 if isinstance(primary_asset, dict): 181 assets.append(primary_asset) 182 secondary_assets = data.get('SecondaryAssets') 183 if isinstance(secondary_assets, list): 184 for secondary_asset in secondary_assets: 185 if isinstance(secondary_asset, dict): 186 assets.append(secondary_asset) 187 188 def hex_to_bytes(hex): 189 return binascii.a2b_hex(hex.encode('ascii')) 190 191 def decrypt_uri(e): 192 n = int(e[2:10], 16) 193 a = e[10 + n:] 194 data = bytes_to_intlist(hex_to_bytes(e[10:10 + n])) 195 key = bytes_to_intlist(hashlib.sha256( 196 ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()) 197 iv = bytes_to_intlist(hex_to_bytes(a)) 198 decrypted = aes_cbc_decrypt(data, key, iv) 199 return intlist_to_bytes( 200 decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0] 201 202 for asset in assets: 203 kind = asset.get('Kind') 204 if kind == 'Image': 205 thumbnail = url_or_none(asset.get('Uri')) 206 elif kind in ('VideoResource', 'AudioResource'): 207 duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) 208 restricted_to_denmark = asset.get('RestrictedToDenmark') 209 asset_target = asset.get('Target') 210 for link in asset.get('Links', []): 211 uri = link.get('Uri') 212 if not uri: 213 encrypted_uri = link.get('EncryptedUri') 214 if not encrypted_uri: 215 continue 216 try: 217 uri = decrypt_uri(encrypted_uri) 218 except Exception: 219 self.report_warning( 220 'Unable to decrypt EncryptedUri', video_id) 221 continue 222 uri = url_or_none(uri) 223 if not uri: 224 continue 225 target = link.get('Target') 226 format_id = target or '' 227 if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'): 228 preference = -1 229 format_id += '-%s' % asset_target 230 elif asset_target == 'Default': 231 preference = 1 232 else: 233 preference = None 234 if target == 'HDS': 235 f4m_formats = self._extract_f4m_formats( 236 uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', 237 video_id, preference, f4m_id=format_id, fatal=False) 238 if kind == 'AudioResource': 239 for f in f4m_formats: 240 f['vcodec'] = 'none' 241 formats.extend(f4m_formats) 242 elif target == 'HLS': 243 formats.extend(self._extract_m3u8_formats( 244 uri, video_id, 'mp4', entry_protocol='m3u8_native', 245 preference=preference, m3u8_id=format_id, 246 fatal=False)) 247 else: 248 bitrate = link.get('Bitrate') 249 if bitrate: 250 format_id += '-%s' % bitrate 251 formats.append({ 252 'url': uri, 253 'format_id': format_id, 254 'tbr': int_or_none(bitrate), 255 'ext': link.get('FileFormat'), 256 'vcodec': 'none' if kind == 'AudioResource' else None, 257 'preference': preference, 258 }) 259 subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist') 260 if isinstance(subtitles_list, list): 261 LANGS = { 262 'Danish': 'da', 263 } 264 for subs in subtitles_list: 265 if not isinstance(subs, dict): 266 continue 267 sub_uri = url_or_none(subs.get('Uri')) 268 if not sub_uri: 269 continue 270 lang = subs.get('Language') or 'da' 271 subtitles.setdefault(LANGS.get(lang, lang), []).append({ 272 'url': sub_uri, 273 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' 274 }) 275 276 if not formats and restricted_to_denmark: 277 self.raise_geo_restricted( 278 'Unfortunately, DR is not allowed to show this program outside Denmark.', 279 countries=self._GEO_COUNTRIES) 280 281 self._sort_formats(formats) 282 283 return { 284 'id': video_id, 285 'title': title, 286 'description': description, 287 'thumbnail': thumbnail, 288 'timestamp': timestamp, 289 'duration': duration, 290 'formats': formats, 291 'subtitles': subtitles, 292 'series': str_or_none(data.get('SeriesTitle')), 293 'season': str_or_none(data.get('SeasonTitle')), 294 'season_number': int_or_none(data.get('SeasonNumber')), 295 'season_id': str_or_none(data.get('SeasonUrn')), 296 'episode': str_or_none(data.get('EpisodeTitle')), 297 'episode_number': int_or_none(data.get('EpisodeNumber')), 298 'release_year': int_or_none(data.get('ProductionYear')), 299 } 300 301 302 class DRTVLiveIE(InfoExtractor): 303 IE_NAME = 'drtv:live' 304 _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)' 305 _GEO_COUNTRIES = ['DK'] 306 _TEST = { 307 'url': 'https://www.dr.dk/tv/live/dr1', 308 'info_dict': { 309 'id': 'dr1', 310 'ext': 'mp4', 311 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 312 }, 313 'params': { 314 # m3u8 download 315 'skip_download': True, 316 }, 317 } 318 319 def _real_extract(self, url): 320 channel_id = self._match_id(url) 321 channel_data = self._download_json( 322 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id, 323 channel_id) 324 title = self._live_title(channel_data['Title']) 325 326 formats = [] 327 for streaming_server in channel_data.get('StreamingServers', []): 328 server = streaming_server.get('Server') 329 if not server: 330 continue 331 link_type = streaming_server.get('LinkType') 332 for quality in streaming_server.get('Qualities', []): 333 for stream in quality.get('Streams', []): 334 stream_path = stream.get('Stream') 335 if not stream_path: 336 continue 337 stream_url = update_url_query( 338 '%s/%s' % (server, stream_path), {'b': ''}) 339 if link_type == 'HLS': 340 formats.extend(self._extract_m3u8_formats( 341 stream_url, channel_id, 'mp4', 342 m3u8_id=link_type, fatal=False, live=True)) 343 elif link_type == 'HDS': 344 formats.extend(self._extract_f4m_formats(update_url_query( 345 '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), 346 channel_id, f4m_id=link_type, fatal=False)) 347 self._sort_formats(formats) 348 349 return { 350 'id': channel_id, 351 'title': title, 352 'thumbnail': channel_data.get('PrimaryImageUri'), 353 'formats': formats, 354 'is_live': True, 355 }