adobetv.py (10291B)
1 from __future__ import unicode_literals 2 3 import functools 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_str 8 from ..utils import ( 9 float_or_none, 10 int_or_none, 11 ISO639Utils, 12 OnDemandPagedList, 13 parse_duration, 14 str_or_none, 15 str_to_int, 16 unified_strdate, 17 ) 18 19 20 class AdobeTVBaseIE(InfoExtractor): 21 def _call_api(self, path, video_id, query, note=None): 22 return self._download_json( 23 'http://tv.adobe.com/api/v4/' + path, 24 video_id, note, query=query)['data'] 25 26 def _parse_subtitles(self, video_data, url_key): 27 subtitles = {} 28 for translation in video_data.get('translations', []): 29 vtt_path = translation.get(url_key) 30 if not vtt_path: 31 continue 32 lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) 33 subtitles.setdefault(lang, []).append({ 34 'ext': 'vtt', 35 'url': vtt_path, 36 }) 37 return subtitles 38 39 def _parse_video_data(self, video_data): 40 video_id = compat_str(video_data['id']) 41 title = video_data['title'] 42 43 s3_extracted = False 44 formats = [] 45 for source in video_data.get('videos', []): 46 source_url = source.get('url') 47 if not source_url: 48 continue 49 f = { 50 'format_id': source.get('quality_level'), 51 'fps': int_or_none(source.get('frame_rate')), 52 'height': int_or_none(source.get('height')), 53 'tbr': int_or_none(source.get('video_data_rate')), 54 'width': int_or_none(source.get('width')), 55 'url': source_url, 56 } 57 original_filename = source.get('original_filename') 58 if original_filename: 59 if not (f.get('height') and f.get('width')): 60 mobj = re.search(r'_(\d+)x(\d+)', original_filename) 61 if mobj: 62 f.update({ 63 'height': int(mobj.group(2)), 64 'width': int(mobj.group(1)), 65 }) 66 if original_filename.startswith('s3://') and not s3_extracted: 67 formats.append({ 68 'format_id': 'original', 69 'preference': 1, 70 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), 71 }) 72 s3_extracted = True 73 formats.append(f) 74 self._sort_formats(formats) 75 76 return { 77 'id': video_id, 78 'title': title, 79 'description': video_data.get('description'), 80 'thumbnail': video_data.get('thumbnail'), 81 'upload_date': unified_strdate(video_data.get('start_date')), 82 'duration': parse_duration(video_data.get('duration')), 83 'view_count': str_to_int(video_data.get('playcount')), 84 'formats': formats, 85 'subtitles': self._parse_subtitles(video_data, 'vtt'), 86 } 87 88 89 class AdobeTVEmbedIE(AdobeTVBaseIE): 90 IE_NAME = 'adobetv:embed' 91 _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)' 92 _TEST = { 93 'url': 'https://tv.adobe.com/embed/22/4153', 94 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', 95 'info_dict': { 96 'id': '4153', 97 'ext': 'flv', 98 'title': 'Creating Graphics Optimized for BlackBerry', 99 'description': 'md5:eac6e8dced38bdaae51cd94447927459', 100 'thumbnail': r're:https?://.*\.jpg$', 101 'upload_date': '20091109', 102 'duration': 377, 103 'view_count': int, 104 }, 105 } 106 107 def _real_extract(self, url): 108 video_id = self._match_id(url) 109 110 video_data = self._call_api( 111 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] 112 return self._parse_video_data(video_data) 113 114 115 class AdobeTVIE(AdobeTVBaseIE): 116 IE_NAME = 'adobetv' 117 _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' 118 119 _TEST = { 120 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', 121 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', 122 'info_dict': { 123 'id': '10981', 124 'ext': 'mp4', 125 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', 126 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', 127 'thumbnail': r're:https?://.*\.jpg$', 128 'upload_date': '20110914', 129 'duration': 60, 130 'view_count': int, 131 }, 132 } 133 134 def _real_extract(self, url): 135 language, show_urlname, urlname = re.match(self._VALID_URL, url).groups() 136 if not language: 137 language = 'en' 138 139 video_data = self._call_api( 140 'episode/get', urlname, { 141 'disclosure': 'standard', 142 'language': language, 143 'show_urlname': show_urlname, 144 'urlname': urlname, 145 })[0] 146 return self._parse_video_data(video_data) 147 148 149 class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): 150 _PAGE_SIZE = 25 151 152 def _fetch_page(self, display_id, query, page): 153 page += 1 154 query['page'] = page 155 for element_data in self._call_api( 156 self._RESOURCE, display_id, query, 'Download Page %d' % page): 157 yield self._process_data(element_data) 158 159 def _extract_playlist_entries(self, display_id, query): 160 return OnDemandPagedList(functools.partial( 161 self._fetch_page, display_id, query), self._PAGE_SIZE) 162 163 164 class AdobeTVShowIE(AdobeTVPlaylistBaseIE): 165 IE_NAME = 'adobetv:show' 166 _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' 167 168 _TEST = { 169 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', 170 'info_dict': { 171 'id': '36', 172 'title': 'The Complete Picture with Julieanne Kost', 173 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', 174 }, 175 'playlist_mincount': 136, 176 } 177 _RESOURCE = 'episode' 178 _process_data = AdobeTVBaseIE._parse_video_data 179 180 def _real_extract(self, url): 181 language, show_urlname = re.match(self._VALID_URL, url).groups() 182 if not language: 183 language = 'en' 184 query = { 185 'disclosure': 'standard', 186 'language': language, 187 'show_urlname': show_urlname, 188 } 189 190 show_data = self._call_api( 191 'show/get', show_urlname, query)[0] 192 193 return self.playlist_result( 194 self._extract_playlist_entries(show_urlname, query), 195 str_or_none(show_data.get('id')), 196 show_data.get('show_name'), 197 show_data.get('show_description')) 198 199 200 class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): 201 IE_NAME = 'adobetv:channel' 202 _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' 203 204 _TEST = { 205 'url': 'http://tv.adobe.com/channel/development', 206 'info_dict': { 207 'id': 'development', 208 }, 209 'playlist_mincount': 96, 210 } 211 _RESOURCE = 'show' 212 213 def _process_data(self, show_data): 214 return self.url_result( 215 show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) 216 217 def _real_extract(self, url): 218 language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() 219 if not language: 220 language = 'en' 221 query = { 222 'channel_urlname': channel_urlname, 223 'language': language, 224 } 225 if category_urlname: 226 query['category_urlname'] = category_urlname 227 228 return self.playlist_result( 229 self._extract_playlist_entries(channel_urlname, query), 230 channel_urlname) 231 232 233 class AdobeTVVideoIE(AdobeTVBaseIE): 234 IE_NAME = 'adobetv:video' 235 _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' 236 237 _TEST = { 238 # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners 239 'url': 'https://video.tv.adobe.com/v/2456/', 240 'md5': '43662b577c018ad707a63766462b1e87', 241 'info_dict': { 242 'id': '2456', 243 'ext': 'mp4', 244 'title': 'New experience with Acrobat DC', 245 'description': 'New experience with Acrobat DC', 246 'duration': 248.667, 247 }, 248 } 249 250 def _real_extract(self, url): 251 video_id = self._match_id(url) 252 webpage = self._download_webpage(url, video_id) 253 254 video_data = self._parse_json(self._search_regex( 255 r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) 256 title = video_data['title'] 257 258 formats = [] 259 sources = video_data.get('sources') or [] 260 for source in sources: 261 source_src = source.get('src') 262 if not source_src: 263 continue 264 formats.append({ 265 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), 266 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])), 267 'height': int_or_none(source.get('height') or None), 268 'tbr': int_or_none(source.get('bitrate') or None), 269 'width': int_or_none(source.get('width') or None), 270 'url': source_src, 271 }) 272 self._sort_formats(formats) 273 274 # For both metadata and downloaded files the duration varies among 275 # formats. I just pick the max one 276 duration = max(filter(None, [ 277 float_or_none(source.get('duration'), scale=1000) 278 for source in sources])) 279 280 return { 281 'id': video_id, 282 'formats': formats, 283 'title': title, 284 'description': video_data.get('description'), 285 'thumbnail': video_data.get('video', {}).get('poster'), 286 'duration': duration, 287 'subtitles': self._parse_subtitles(video_data, 'vttPath'), 288 }