youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

adobetv.py (10291B)


      1 from __future__ import unicode_literals
      2 
      3 import functools
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import compat_str
      8 from ..utils import (
      9     float_or_none,
     10     int_or_none,
     11     ISO639Utils,
     12     OnDemandPagedList,
     13     parse_duration,
     14     str_or_none,
     15     str_to_int,
     16     unified_strdate,
     17 )
     18 
     19 
     20 class AdobeTVBaseIE(InfoExtractor):
     21     def _call_api(self, path, video_id, query, note=None):
     22         return self._download_json(
     23             'http://tv.adobe.com/api/v4/' + path,
     24             video_id, note, query=query)['data']
     25 
     26     def _parse_subtitles(self, video_data, url_key):
     27         subtitles = {}
     28         for translation in video_data.get('translations', []):
     29             vtt_path = translation.get(url_key)
     30             if not vtt_path:
     31                 continue
     32             lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
     33             subtitles.setdefault(lang, []).append({
     34                 'ext': 'vtt',
     35                 'url': vtt_path,
     36             })
     37         return subtitles
     38 
     39     def _parse_video_data(self, video_data):
     40         video_id = compat_str(video_data['id'])
     41         title = video_data['title']
     42 
     43         s3_extracted = False
     44         formats = []
     45         for source in video_data.get('videos', []):
     46             source_url = source.get('url')
     47             if not source_url:
     48                 continue
     49             f = {
     50                 'format_id': source.get('quality_level'),
     51                 'fps': int_or_none(source.get('frame_rate')),
     52                 'height': int_or_none(source.get('height')),
     53                 'tbr': int_or_none(source.get('video_data_rate')),
     54                 'width': int_or_none(source.get('width')),
     55                 'url': source_url,
     56             }
     57             original_filename = source.get('original_filename')
     58             if original_filename:
     59                 if not (f.get('height') and f.get('width')):
     60                     mobj = re.search(r'_(\d+)x(\d+)', original_filename)
     61                     if mobj:
     62                         f.update({
     63                             'height': int(mobj.group(2)),
     64                             'width': int(mobj.group(1)),
     65                         })
     66                 if original_filename.startswith('s3://') and not s3_extracted:
     67                     formats.append({
     68                         'format_id': 'original',
     69                         'preference': 1,
     70                         'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
     71                     })
     72                     s3_extracted = True
     73             formats.append(f)
     74         self._sort_formats(formats)
     75 
     76         return {
     77             'id': video_id,
     78             'title': title,
     79             'description': video_data.get('description'),
     80             'thumbnail': video_data.get('thumbnail'),
     81             'upload_date': unified_strdate(video_data.get('start_date')),
     82             'duration': parse_duration(video_data.get('duration')),
     83             'view_count': str_to_int(video_data.get('playcount')),
     84             'formats': formats,
     85             'subtitles': self._parse_subtitles(video_data, 'vtt'),
     86         }
     87 
     88 
     89 class AdobeTVEmbedIE(AdobeTVBaseIE):
     90     IE_NAME = 'adobetv:embed'
     91     _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
     92     _TEST = {
     93         'url': 'https://tv.adobe.com/embed/22/4153',
     94         'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
     95         'info_dict': {
     96             'id': '4153',
     97             'ext': 'flv',
     98             'title': 'Creating Graphics Optimized for BlackBerry',
     99             'description': 'md5:eac6e8dced38bdaae51cd94447927459',
    100             'thumbnail': r're:https?://.*\.jpg$',
    101             'upload_date': '20091109',
    102             'duration': 377,
    103             'view_count': int,
    104         },
    105     }
    106 
    107     def _real_extract(self, url):
    108         video_id = self._match_id(url)
    109 
    110         video_data = self._call_api(
    111             'episode/' + video_id, video_id, {'disclosure': 'standard'})[0]
    112         return self._parse_video_data(video_data)
    113 
    114 
    115 class AdobeTVIE(AdobeTVBaseIE):
    116     IE_NAME = 'adobetv'
    117     _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
    118 
    119     _TEST = {
    120         'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
    121         'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
    122         'info_dict': {
    123             'id': '10981',
    124             'ext': 'mp4',
    125             'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
    126             'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
    127             'thumbnail': r're:https?://.*\.jpg$',
    128             'upload_date': '20110914',
    129             'duration': 60,
    130             'view_count': int,
    131         },
    132     }
    133 
    134     def _real_extract(self, url):
    135         language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
    136         if not language:
    137             language = 'en'
    138 
    139         video_data = self._call_api(
    140             'episode/get', urlname, {
    141                 'disclosure': 'standard',
    142                 'language': language,
    143                 'show_urlname': show_urlname,
    144                 'urlname': urlname,
    145             })[0]
    146         return self._parse_video_data(video_data)
    147 
    148 
    149 class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
    150     _PAGE_SIZE = 25
    151 
    152     def _fetch_page(self, display_id, query, page):
    153         page += 1
    154         query['page'] = page
    155         for element_data in self._call_api(
    156                 self._RESOURCE, display_id, query, 'Download Page %d' % page):
    157             yield self._process_data(element_data)
    158 
    159     def _extract_playlist_entries(self, display_id, query):
    160         return OnDemandPagedList(functools.partial(
    161             self._fetch_page, display_id, query), self._PAGE_SIZE)
    162 
    163 
    164 class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
    165     IE_NAME = 'adobetv:show'
    166     _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
    167 
    168     _TEST = {
    169         'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
    170         'info_dict': {
    171             'id': '36',
    172             'title': 'The Complete Picture with Julieanne Kost',
    173             'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
    174         },
    175         'playlist_mincount': 136,
    176     }
    177     _RESOURCE = 'episode'
    178     _process_data = AdobeTVBaseIE._parse_video_data
    179 
    180     def _real_extract(self, url):
    181         language, show_urlname = re.match(self._VALID_URL, url).groups()
    182         if not language:
    183             language = 'en'
    184         query = {
    185             'disclosure': 'standard',
    186             'language': language,
    187             'show_urlname': show_urlname,
    188         }
    189 
    190         show_data = self._call_api(
    191             'show/get', show_urlname, query)[0]
    192 
    193         return self.playlist_result(
    194             self._extract_playlist_entries(show_urlname, query),
    195             str_or_none(show_data.get('id')),
    196             show_data.get('show_name'),
    197             show_data.get('show_description'))
    198 
    199 
    200 class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
    201     IE_NAME = 'adobetv:channel'
    202     _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
    203 
    204     _TEST = {
    205         'url': 'http://tv.adobe.com/channel/development',
    206         'info_dict': {
    207             'id': 'development',
    208         },
    209         'playlist_mincount': 96,
    210     }
    211     _RESOURCE = 'show'
    212 
    213     def _process_data(self, show_data):
    214         return self.url_result(
    215             show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
    216 
    217     def _real_extract(self, url):
    218         language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
    219         if not language:
    220             language = 'en'
    221         query = {
    222             'channel_urlname': channel_urlname,
    223             'language': language,
    224         }
    225         if category_urlname:
    226             query['category_urlname'] = category_urlname
    227 
    228         return self.playlist_result(
    229             self._extract_playlist_entries(channel_urlname, query),
    230             channel_urlname)
    231 
    232 
    233 class AdobeTVVideoIE(AdobeTVBaseIE):
    234     IE_NAME = 'adobetv:video'
    235     _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
    236 
    237     _TEST = {
    238         # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
    239         'url': 'https://video.tv.adobe.com/v/2456/',
    240         'md5': '43662b577c018ad707a63766462b1e87',
    241         'info_dict': {
    242             'id': '2456',
    243             'ext': 'mp4',
    244             'title': 'New experience with Acrobat DC',
    245             'description': 'New experience with Acrobat DC',
    246             'duration': 248.667,
    247         },
    248     }
    249 
    250     def _real_extract(self, url):
    251         video_id = self._match_id(url)
    252         webpage = self._download_webpage(url, video_id)
    253 
    254         video_data = self._parse_json(self._search_regex(
    255             r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
    256         title = video_data['title']
    257 
    258         formats = []
    259         sources = video_data.get('sources') or []
    260         for source in sources:
    261             source_src = source.get('src')
    262             if not source_src:
    263                 continue
    264             formats.append({
    265                 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
    266                 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])),
    267                 'height': int_or_none(source.get('height') or None),
    268                 'tbr': int_or_none(source.get('bitrate') or None),
    269                 'width': int_or_none(source.get('width') or None),
    270                 'url': source_src,
    271             })
    272         self._sort_formats(formats)
    273 
    274         # For both metadata and downloaded files the duration varies among
    275         # formats. I just pick the max one
    276         duration = max(filter(None, [
    277             float_or_none(source.get('duration'), scale=1000)
    278             for source in sources]))
    279 
    280         return {
    281             'id': video_id,
    282             'formats': formats,
    283             'title': title,
    284             'description': video_data.get('description'),
    285             'thumbnail': video_data.get('video', {}).get('poster'),
    286             'duration': duration,
    287             'subtitles': self._parse_subtitles(video_data, 'vttPath'),
    288         }