youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

go.py (12909B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .adobepass import AdobePassIE
      7 from ..compat import compat_str
      8 from ..utils import (
      9     int_or_none,
     10     determine_ext,
     11     parse_age_limit,
     12     try_get,
     13     urlencode_postdata,
     14     ExtractorError,
     15 )
     16 
     17 
     18 class GoIE(AdobePassIE):
     19     _SITE_INFO = {
     20         'abc': {
     21             'brand': '001',
     22             'requestor_id': 'ABC',
     23         },
     24         'freeform': {
     25             'brand': '002',
     26             'requestor_id': 'ABCFamily',
     27         },
     28         'watchdisneychannel': {
     29             'brand': '004',
     30             'resource_id': 'Disney',
     31         },
     32         'watchdisneyjunior': {
     33             'brand': '008',
     34             'resource_id': 'DisneyJunior',
     35         },
     36         'watchdisneyxd': {
     37             'brand': '009',
     38             'resource_id': 'DisneyXD',
     39         },
     40         'disneynow': {
     41             'brand': '011',
     42             'resource_id': 'Disney',
     43         },
     44         'fxnow.fxnetworks': {
     45             'brand': '025',
     46             'requestor_id': 'dtci',
     47         },
     48     }
     49     _VALID_URL = r'''(?x)
     50                     https?://
     51                         (?:
     52                             (?:(?P<sub_domain>%s)\.)?go|
     53                             (?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
     54                         )\.com/
     55                         (?:
     56                             (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
     57                             (?:[^/]+/)*(?P<display_id>[^/?\#]+)
     58                         )
     59                     ''' % '|'.join(list(_SITE_INFO.keys()))
     60     _TESTS = [{
     61         'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
     62         'info_dict': {
     63             'id': 'VDKA3807643',
     64             'ext': 'mp4',
     65             'title': 'The Traitor in the White House',
     66             'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
     67         },
     68         'params': {
     69             # m3u8 download
     70             'skip_download': True,
     71         },
     72         'skip': 'This content is no longer available.',
     73     }, {
     74         'url': 'http://watchdisneyxd.go.com/doraemon',
     75         'info_dict': {
     76             'title': 'Doraemon',
     77             'id': 'SH55574025',
     78         },
     79         'playlist_mincount': 51,
     80     }, {
     81         'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
     82         'info_dict': {
     83             'id': 'VDKA3609139',
     84             'ext': 'mp4',
     85             'title': 'This Guilty Blood',
     86             'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
     87             'age_limit': 14,
     88         },
     89         'params': {
     90             'geo_bypass_ip_block': '3.244.239.0/24',
     91             # m3u8 download
     92             'skip_download': True,
     93         },
     94     }, {
     95         'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
     96         'info_dict': {
     97             'id': 'VDKA13435179',
     98             'ext': 'mp4',
     99             'title': 'The Bet',
    100             'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
    101             'age_limit': 14,
    102         },
    103         'params': {
    104             'geo_bypass_ip_block': '3.244.239.0/24',
    105             # m3u8 download
    106             'skip_download': True,
    107         },
    108     }, {
    109         'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
    110         'info_dict': {
    111             'id': 'VDKA12782841',
    112             'ext': 'mp4',
    113             'title': 'First Look: Better Things - Season 2',
    114             'description': 'md5:fa73584a95761c605d9d54904e35b407',
    115         },
    116         'params': {
    117             'geo_bypass_ip_block': '3.244.239.0/24',
    118             # m3u8 download
    119             'skip_download': True,
    120         },
    121     }, {
    122         'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
    123         'info_dict': {
    124             'id': 'VDKA22600213',
    125             'ext': 'mp4',
    126             'title': 'Pilot',
    127             'description': 'md5:74306df917cfc199d76d061d66bebdb4',
    128         },
    129         'params': {
    130             # m3u8 download
    131             'skip_download': True,
    132         },
    133     }, {
    134         'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
    135         'only_matching': True,
    136     }, {
    137         'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
    138         'only_matching': True,
    139     }, {
    140         # brand 004
    141         'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
    142         'only_matching': True,
    143     }, {
    144         # brand 008
    145         'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
    146         'only_matching': True,
    147     }, {
    148         'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
    149         'only_matching': True,
    150     }]
    151 
    152     def _extract_videos(self, brand, video_id='-1', show_id='-1'):
    153         display_id = video_id if video_id != '-1' else show_id
    154         return self._download_json(
    155             'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
    156             display_id)['video']
    157 
    158     def _real_extract(self, url):
    159         mobj = re.match(self._VALID_URL, url)
    160         sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
    161         video_id, display_id = mobj.group('id', 'display_id')
    162         site_info = self._SITE_INFO.get(sub_domain, {})
    163         brand = site_info.get('brand')
    164         if not video_id or not site_info:
    165             webpage = self._download_webpage(url, display_id or video_id)
    166             data = self._parse_json(
    167                 self._search_regex(
    168                     r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
    169                     'data', default='{}'),
    170                 display_id or video_id, fatal=False)
    171             # https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
    172             layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
    173             video_id = None
    174             if layout:
    175                 video_id = try_get(
    176                     layout,
    177                     (lambda x: x['videoid'], lambda x: x['video']['id']),
    178                     compat_str)
    179             if not video_id:
    180                 video_id = self._search_regex(
    181                     (
    182                         # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
    183                         # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
    184                         r'data-video-id=["\']*(VDKA\w+)',
    185                         # page.analytics.videoIdCode
    186                         r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
    187                         # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
    188                         r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
    189                     ), webpage, 'video id', default=video_id)
    190             if not site_info:
    191                 brand = self._search_regex(
    192                     (r'data-brand=\s*["\']\s*(\d+)',
    193                      r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
    194                     default='004')
    195                 site_info = next(
    196                     si for _, si in self._SITE_INFO.items()
    197                     if si.get('brand') == brand)
    198             if not video_id:
    199                 # show extraction works for Disney, DisneyJunior and DisneyXD
    200                 # ABC and Freeform has different layout
    201                 show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
    202                 videos = self._extract_videos(brand, show_id=show_id)
    203                 show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
    204                 entries = []
    205                 for video in videos:
    206                     entries.append(self.url_result(
    207                         video['url'], 'Go', video.get('id'), video.get('title')))
    208                 entries.reverse()
    209                 return self.playlist_result(entries, show_id, show_title)
    210         video_data = self._extract_videos(brand, video_id)[0]
    211         video_id = video_data['id']
    212         title = video_data['title']
    213 
    214         formats = []
    215         for asset in video_data.get('assets', {}).get('asset', []):
    216             asset_url = asset.get('value')
    217             if not asset_url:
    218                 continue
    219             format_id = asset.get('format')
    220             ext = determine_ext(asset_url)
    221             if ext == 'm3u8':
    222                 video_type = video_data.get('type')
    223                 data = {
    224                     'video_id': video_data['id'],
    225                     'video_type': video_type,
    226                     'brand': brand,
    227                     'device': '001',
    228                 }
    229                 if video_data.get('accesslevel') == '1':
    230                     requestor_id = site_info.get('requestor_id', 'DisneyChannels')
    231                     resource = site_info.get('resource_id') or self._get_mvpd_resource(
    232                         requestor_id, title, video_id, None)
    233                     auth = self._extract_mvpd_auth(
    234                         url, video_id, requestor_id, resource)
    235                     data.update({
    236                         'token': auth,
    237                         'token_type': 'ap',
    238                         'adobe_requestor_id': requestor_id,
    239                     })
    240                 else:
    241                     self._initialize_geo_bypass({'countries': ['US']})
    242                 entitlement = self._download_json(
    243                     'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
    244                     video_id, data=urlencode_postdata(data))
    245                 errors = entitlement.get('errors', {}).get('errors', [])
    246                 if errors:
    247                     for error in errors:
    248                         if error.get('code') == 1002:
    249                             self.raise_geo_restricted(
    250                                 error['message'], countries=['US'])
    251                     error_message = ', '.join([error['message'] for error in errors])
    252                     raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
    253                 asset_url += '?' + entitlement['uplynkData']['sessionKey']
    254                 formats.extend(self._extract_m3u8_formats(
    255                     asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
    256             else:
    257                 f = {
    258                     'format_id': format_id,
    259                     'url': asset_url,
    260                     'ext': ext,
    261                 }
    262                 if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
    263                     f.update({
    264                         'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
    265                         'preference': 1,
    266                     })
    267                 else:
    268                     mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
    269                     if mobj:
    270                         height = int(mobj.group(2))
    271                         f.update({
    272                             'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
    273                             'width': int(mobj.group(1)),
    274                             'height': height,
    275                         })
    276                 formats.append(f)
    277         self._sort_formats(formats)
    278 
    279         subtitles = {}
    280         for cc in video_data.get('closedcaption', {}).get('src', []):
    281             cc_url = cc.get('value')
    282             if not cc_url:
    283                 continue
    284             ext = determine_ext(cc_url)
    285             if ext == 'xml':
    286                 ext = 'ttml'
    287             subtitles.setdefault(cc.get('lang'), []).append({
    288                 'url': cc_url,
    289                 'ext': ext,
    290             })
    291 
    292         thumbnails = []
    293         for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
    294             thumbnail_url = thumbnail.get('value')
    295             if not thumbnail_url:
    296                 continue
    297             thumbnails.append({
    298                 'url': thumbnail_url,
    299                 'width': int_or_none(thumbnail.get('width')),
    300                 'height': int_or_none(thumbnail.get('height')),
    301             })
    302 
    303         return {
    304             'id': video_id,
    305             'title': title,
    306             'description': video_data.get('longdescription') or video_data.get('description'),
    307             'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
    308             'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
    309             'episode_number': int_or_none(video_data.get('episodenumber')),
    310             'series': video_data.get('show', {}).get('title'),
    311             'season_number': int_or_none(video_data.get('season', {}).get('num')),
    312             'thumbnails': thumbnails,
    313             'formats': formats,
    314             'subtitles': subtitles,
    315         }