radiocanada.py (6349B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_HTTPError 8 from ..utils import ( 9 determine_ext, 10 ExtractorError, 11 int_or_none, 12 unified_strdate, 13 ) 14 15 16 class RadioCanadaIE(InfoExtractor): 17 IE_NAME = 'radiocanada' 18 _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' 19 _TESTS = [ 20 { 21 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', 22 'info_dict': { 23 'id': '7184272', 24 'ext': 'mp4', 25 'title': 'Le parcours du tireur capté sur vidéo', 26 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', 27 'upload_date': '20141023', 28 }, 29 'params': { 30 # m3u8 download 31 'skip_download': True, 32 } 33 }, 34 { 35 # empty Title 36 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', 37 'info_dict': { 38 'id': '7754998', 39 'ext': 'mp4', 40 'title': 'letelejournal22h', 41 'description': 'INTEGRALE WEB 22H-TJ', 42 'upload_date': '20170720', 43 }, 44 'params': { 45 # m3u8 download 46 'skip_download': True, 47 }, 48 }, 49 { 50 # with protectionType but not actually DRM protected 51 'url': 'radiocanada:toutv:140872', 52 'info_dict': { 53 'id': '140872', 54 'title': 'Épisode 1', 55 'series': 'District 31', 56 }, 57 'only_matching': True, 58 } 59 ] 60 _GEO_COUNTRIES = ['CA'] 61 _access_token = None 62 _claims = None 63 64 def _call_api(self, path, video_id=None, app_code=None, query=None): 65 if not query: 66 query = {} 67 query.update({ 68 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', 69 'output': 'json', 70 }) 71 if video_id: 72 query.update({ 73 'appCode': app_code, 74 'idMedia': video_id, 75 }) 76 if self._access_token: 77 query['access_token'] = self._access_token 78 try: 79 return self._download_json( 80 'https://services.radio-canada.ca/media/' + path, video_id, query=query) 81 except ExtractorError as e: 82 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422): 83 data = self._parse_json(e.cause.read().decode(), None) 84 error = data.get('error_description') or data['errorMessage']['text'] 85 raise ExtractorError(error, expected=True) 86 raise 87 88 def _extract_info(self, app_code, video_id): 89 metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] 90 91 def get_meta(name): 92 for meta in metas: 93 if meta.get('name') == name: 94 text = meta.get('text') 95 if text: 96 return text 97 98 # protectionType does not necessarily mean the video is DRM protected (see 99 # https://github.com/ytdl-org/youtube-dl/pull/18609). 100 if get_meta('protectionType'): 101 self.report_warning('This video is probably DRM protected.') 102 103 query = { 104 'connectionType': 'hd', 105 'deviceType': 'ipad', 106 'multibitrate': 'true', 107 } 108 if self._claims: 109 query['claims'] = self._claims 110 v_data = self._call_api('validation/v2/', video_id, app_code, query) 111 v_url = v_data.get('url') 112 if not v_url: 113 error = v_data['message'] 114 if error == "Le contenu sélectionné n'est pas disponible dans votre pays": 115 raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) 116 if error == 'Le contenu sélectionné est disponible seulement en premium': 117 self.raise_login_required(error) 118 raise ExtractorError( 119 '%s said: %s' % (self.IE_NAME, error), expected=True) 120 formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') 121 self._sort_formats(formats) 122 123 subtitles = {} 124 closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') 125 if closed_caption_url: 126 subtitles['fr'] = [{ 127 'url': closed_caption_url, 128 'ext': determine_ext(closed_caption_url, 'vtt'), 129 }] 130 131 return { 132 'id': video_id, 133 'title': get_meta('Title') or get_meta('AV-nomEmission'), 134 'description': get_meta('Description') or get_meta('ShortDescription'), 135 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 136 'duration': int_or_none(get_meta('length')), 137 'series': get_meta('Emission'), 138 'season_number': int_or_none('SrcSaison'), 139 'episode_number': int_or_none('SrcEpisode'), 140 'upload_date': unified_strdate(get_meta('Date')), 141 'subtitles': subtitles, 142 'formats': formats, 143 } 144 145 def _real_extract(self, url): 146 return self._extract_info(*re.match(self._VALID_URL, url).groups()) 147 148 149 class RadioCanadaAudioVideoIE(InfoExtractor): 150 IE_NAME = 'radiocanada:audiovideo' 151 _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' 152 _TESTS = [{ 153 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 154 'info_dict': { 155 'id': '7527184', 156 'ext': 'mp4', 157 'title': 'Barack Obama au Vietnam', 158 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', 159 'upload_date': '20160523', 160 }, 161 'params': { 162 # m3u8 download 163 'skip_download': True, 164 }, 165 }, { 166 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', 167 'only_matching': True, 168 }] 169 170 def _real_extract(self, url): 171 return self.url_result('radiocanada:medianet:%s' % self._match_id(url))