youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

nba.py (16675B)


      1 from __future__ import unicode_literals
      2 
      3 import functools
      4 import re
      5 
      6 from .turner import TurnerBaseIE
      7 from ..compat import (
      8     compat_parse_qs,
      9     compat_str,
     10     compat_urllib_parse_unquote,
     11     compat_urllib_parse_urlparse,
     12 )
     13 from ..utils import (
     14     int_or_none,
     15     merge_dicts,
     16     OnDemandPagedList,
     17     parse_duration,
     18     parse_iso8601,
     19     try_get,
     20     update_url_query,
     21     urljoin,
     22 )
     23 
     24 
     25 class NBACVPBaseIE(TurnerBaseIE):
     26     def _extract_nba_cvp_info(self, path, video_id, fatal=False):
     27         return self._extract_cvp_info(
     28             'http://secure.nba.com/%s' % path, video_id, {
     29                 'default': {
     30                     'media_src': 'http://nba.cdn.turner.com/nba/big',
     31                 },
     32                 'm3u8': {
     33                     'media_src': 'http://nbavod-f.akamaihd.net',
     34                 },
     35             }, fatal=fatal)
     36 
     37 
     38 class NBAWatchBaseIE(NBACVPBaseIE):
     39     _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
     40 
     41     def _extract_video(self, filter_key, filter_value):
     42         video = self._download_json(
     43             'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
     44             filter_value, query={
     45                 'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
     46                 'q': filter_key + ':' + filter_value,
     47                 'wt': 'json',
     48             })['response']['docs'][0]
     49 
     50         video_id = str(video['pid'])
     51         title = video['name']
     52 
     53         formats = []
     54         m3u8_url = (self._download_json(
     55             'https://watch.nba.com/service/publishpoint', video_id, query={
     56                 'type': 'video',
     57                 'format': 'json',
     58                 'id': video_id,
     59             }, headers={
     60                 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
     61             }, fatal=False) or {}).get('path')
     62         if m3u8_url:
     63             m3u8_formats = self._extract_m3u8_formats(
     64                 re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
     65                 'm3u8_native', m3u8_id='hls', fatal=False)
     66             formats.extend(m3u8_formats)
     67             for f in m3u8_formats:
     68                 http_f = f.copy()
     69                 http_f.update({
     70                     'format_id': http_f['format_id'].replace('hls-', 'http-'),
     71                     'protocol': 'http',
     72                     'url': http_f['url'].replace('.m3u8', ''),
     73                 })
     74                 formats.append(http_f)
     75 
     76         info = {
     77             'id': video_id,
     78             'title': title,
     79             'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
     80             'description': video.get('description'),
     81             'duration': int_or_none(video.get('runtime')),
     82             'timestamp': parse_iso8601(video.get('releaseDate')),
     83             'tags': video.get('tags'),
     84         }
     85 
     86         seo_name = video.get('seoName')
     87         if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
     88             base_path = ''
     89             if seo_name.startswith('teams/'):
     90                 base_path += seo_name.split('/')[1] + '/'
     91             base_path += 'video/'
     92             cvp_info = self._extract_nba_cvp_info(
     93                 base_path + seo_name + '.xml', video_id, False)
     94             if cvp_info:
     95                 formats.extend(cvp_info['formats'])
     96                 info = merge_dicts(info, cvp_info)
     97 
     98         self._sort_formats(formats)
     99         info['formats'] = formats
    100         return info
    101 
    102 
    103 class NBAWatchEmbedIE(NBAWatchBaseIE):
    104     IENAME = 'nba:watch:embed'
    105     _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
    106     _TESTS = [{
    107         'url': 'http://watch.nba.com/embed?id=659395',
    108         'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
    109         'info_dict': {
    110             'id': '659395',
    111             'ext': 'mp4',
    112             'title': 'Mix clip: More than 7 points of  Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
    113             'description': 'Mix clip: More than 7 points of  Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
    114             'timestamp': 1492228800,
    115             'upload_date': '20170415',
    116         },
    117     }]
    118 
    119     def _real_extract(self, url):
    120         video_id = self._match_id(url)
    121         return self._extract_video('pid', video_id)
    122 
    123 
    124 class NBAWatchIE(NBAWatchBaseIE):
    125     IE_NAME = 'nba:watch'
    126     _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
    127     _TESTS = [{
    128         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
    129         'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
    130         'info_dict': {
    131             'id': '70946',
    132             'ext': 'mp4',
    133             'title': 'Thunder vs. Nets',
    134             'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
    135             'duration': 181,
    136             'timestamp': 1354597200,
    137             'upload_date': '20121204',
    138         },
    139     }, {
    140         'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
    141         'only_matching': True,
    142     }, {
    143         'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
    144         'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
    145         'info_dict': {
    146             'id': '330865',
    147             'ext': 'mp4',
    148             'title': 'Hawks vs. Cavaliers Game 1',
    149             'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
    150             'duration': 228,
    151             'timestamp': 1432094400,
    152             'upload_date': '20150521',
    153         },
    154     }, {
    155         'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
    156         'only_matching': True,
    157     }, {
    158         # only CVP mp4 format available
    159         'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
    160         'only_matching': True,
    161     }, {
    162         'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
    163         'only_matching': True,
    164     }]
    165 
    166     def _real_extract(self, url):
    167         display_id = self._match_id(url)
    168         collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
    169         if collection_id:
    170             if self._downloader.params.get('noplaylist'):
    171                 self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
    172             else:
    173                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
    174                 return self.url_result(
    175                     'https://www.nba.com/watch/list/collection/' + collection_id,
    176                     NBAWatchCollectionIE.ie_key(), collection_id)
    177         return self._extract_video('seoName', display_id)
    178 
    179 
    180 class NBAWatchCollectionIE(NBAWatchBaseIE):
    181     IE_NAME = 'nba:watch:collection'
    182     _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
    183     _TESTS = [{
    184         'url': 'https://watch.nba.com/list/collection/season-preview-2020',
    185         'info_dict': {
    186             'id': 'season-preview-2020',
    187         },
    188         'playlist_mincount': 43,
    189     }]
    190     _PAGE_SIZE = 100
    191 
    192     def _fetch_page(self, collection_id, page):
    193         page += 1
    194         videos = self._download_json(
    195             'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
    196             collection_id, 'Downloading page %d JSON metadata' % page, query={
    197                 'count': self._PAGE_SIZE,
    198                 'page': page,
    199             })['results']['videos']
    200         for video in videos:
    201             program = video.get('program') or {}
    202             seo_name = program.get('seoName') or program.get('slug')
    203             if not seo_name:
    204                 continue
    205             yield {
    206                 '_type': 'url',
    207                 'id': program.get('id'),
    208                 'title': program.get('title') or video.get('title'),
    209                 'url': 'https://www.nba.com/watch/video/' + seo_name,
    210                 'thumbnail': video.get('image'),
    211                 'description': program.get('description') or video.get('description'),
    212                 'duration': parse_duration(program.get('runtimeHours')),
    213                 'timestamp': parse_iso8601(video.get('releaseDate')),
    214             }
    215 
    216     def _real_extract(self, url):
    217         collection_id = self._match_id(url)
    218         entries = OnDemandPagedList(
    219             functools.partial(self._fetch_page, collection_id),
    220             self._PAGE_SIZE)
    221         return self.playlist_result(entries, collection_id)
    222 
    223 
    224 class NBABaseIE(NBACVPBaseIE):
    225     _VALID_URL_BASE = r'''(?x)
    226         https?://(?:www\.)?nba\.com/
    227             (?P<team>
    228                 blazers|
    229                 bucks|
    230                 bulls|
    231                 cavaliers|
    232                 celtics|
    233                 clippers|
    234                 grizzlies|
    235                 hawks|
    236                 heat|
    237                 hornets|
    238                 jazz|
    239                 kings|
    240                 knicks|
    241                 lakers|
    242                 magic|
    243                 mavericks|
    244                 nets|
    245                 nuggets|
    246                 pacers|
    247                 pelicans|
    248                 pistons|
    249                 raptors|
    250                 rockets|
    251                 sixers|
    252                 spurs|
    253                 suns|
    254                 thunder|
    255                 timberwolves|
    256                 warriors|
    257                 wizards
    258             )
    259         (?:/play\#)?/'''
    260     _CHANNEL_PATH_REGEX = r'video/channel|series'
    261 
    262     def _embed_url_result(self, team, content_id):
    263         return self.url_result(update_url_query(
    264             'https://secure.nba.com/assets/amp/include/video/iframe.html', {
    265                 'contentId': content_id,
    266                 'team': team,
    267             }), NBAEmbedIE.ie_key())
    268 
    269     def _call_api(self, team, content_id, query, resource):
    270         return self._download_json(
    271             'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
    272             content_id, 'Download %s JSON metadata' % resource,
    273             query=query, headers={
    274                 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
    275             })['response']['result']
    276 
    277     def _extract_video(self, video, team, extract_all=True):
    278         video_id = compat_str(video['nid'])
    279         team = video['brand']
    280 
    281         info = {
    282             'id': video_id,
    283             'title': video.get('title') or video.get('headline') or video['shortHeadline'],
    284             'description': video.get('description'),
    285             'timestamp': parse_iso8601(video.get('published')),
    286         }
    287 
    288         subtitles = {}
    289         captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
    290         for caption_url in captions.values():
    291             subtitles.setdefault('en', []).append({'url': caption_url})
    292 
    293         formats = []
    294         mp4_url = video.get('mp4')
    295         if mp4_url:
    296             formats.append({
    297                 'url': mp4_url,
    298             })
    299 
    300         if extract_all:
    301             source_url = video.get('videoSource')
    302             if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
    303                 formats.append({
    304                     'format_id': 'source',
    305                     'url': source_url,
    306                     'preference': 1,
    307                 })
    308 
    309             m3u8_url = video.get('m3u8')
    310             if m3u8_url:
    311                 if '.akamaihd.net/i/' in m3u8_url:
    312                     formats.extend(self._extract_akamai_formats(
    313                         m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
    314                 else:
    315                     formats.extend(self._extract_m3u8_formats(
    316                         m3u8_url, video_id, 'mp4',
    317                         'm3u8_native', m3u8_id='hls', fatal=False))
    318 
    319             content_xml = video.get('contentXml')
    320             if team and content_xml:
    321                 cvp_info = self._extract_nba_cvp_info(
    322                     team + content_xml, video_id, fatal=False)
    323                 if cvp_info:
    324                     formats.extend(cvp_info['formats'])
    325                     subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
    326                     info = merge_dicts(info, cvp_info)
    327 
    328             self._sort_formats(formats)
    329         else:
    330             info.update(self._embed_url_result(team, video['videoId']))
    331 
    332         info.update({
    333             'formats': formats,
    334             'subtitles': subtitles,
    335         })
    336 
    337         return info
    338 
    339     def _real_extract(self, url):
    340         team, display_id = re.match(self._VALID_URL, url).groups()
    341         if '/play#/' in url:
    342             display_id = compat_urllib_parse_unquote(display_id)
    343         else:
    344             webpage = self._download_webpage(url, display_id)
    345             display_id = self._search_regex(
    346                 self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
    347         return self._extract_url_results(team, display_id)
    348 
    349 
    350 class NBAEmbedIE(NBABaseIE):
    351     IENAME = 'nba:embed'
    352     _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
    353     _TESTS = [{
    354         'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
    355         'only_matching': True,
    356     }, {
    357         'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
    358         'only_matching': True,
    359     }]
    360 
    361     def _real_extract(self, url):
    362         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
    363         content_id = qs['contentId'][0]
    364         team = qs.get('team', [None])[0]
    365         if not team:
    366             return self.url_result(
    367                 'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
    368         video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
    369         return self._extract_video(video, team)
    370 
    371 
    372 class NBAIE(NBABaseIE):
    373     IENAME = 'nba'
    374     _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
    375     _TESTS = [{
    376         'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
    377         'info_dict': {
    378             'id': '45039',
    379             'ext': 'mp4',
    380             'title': 'AND WE BACK.',
    381             'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
    382             'duration': 94,
    383             'timestamp': 1607112000,
    384             'upload_date': '20201218',
    385         },
    386     }, {
    387         'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
    388         'only_matching': True,
    389     }, {
    390         'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
    391         'only_matching': True,
    392     }]
    393     _CONTENT_ID_REGEX = r'videoID'
    394 
    395     def _extract_url_results(self, team, content_id):
    396         return self._embed_url_result(team, content_id)
    397 
    398 
    399 class NBAChannelIE(NBABaseIE):
    400     IENAME = 'nba:channel'
    401     _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
    402     _TESTS = [{
    403         'url': 'https://www.nba.com/blazers/video/channel/summer_league',
    404         'info_dict': {
    405             'title': 'Summer League',
    406         },
    407         'playlist_mincount': 138,
    408     }, {
    409         'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
    410         'only_matching': True,
    411     }]
    412     _CONTENT_ID_REGEX = r'videoSubCategory'
    413     _PAGE_SIZE = 100
    414 
    415     def _fetch_page(self, team, channel, page):
    416         results = self._call_api(team, channel, {
    417             'channels': channel,
    418             'count': self._PAGE_SIZE,
    419             'offset': page * self._PAGE_SIZE,
    420         }, 'page %d' % (page + 1))
    421         for video in results:
    422             yield self._extract_video(video, team, False)
    423 
    424     def _extract_url_results(self, team, content_id):
    425         entries = OnDemandPagedList(
    426             functools.partial(self._fetch_page, team, content_id),
    427             self._PAGE_SIZE)
    428         return self.playlist_result(entries, playlist_title=content_id)