nba.py (16675B)
1 from __future__ import unicode_literals 2 3 import functools 4 import re 5 6 from .turner import TurnerBaseIE 7 from ..compat import ( 8 compat_parse_qs, 9 compat_str, 10 compat_urllib_parse_unquote, 11 compat_urllib_parse_urlparse, 12 ) 13 from ..utils import ( 14 int_or_none, 15 merge_dicts, 16 OnDemandPagedList, 17 parse_duration, 18 parse_iso8601, 19 try_get, 20 update_url_query, 21 urljoin, 22 ) 23 24 25 class NBACVPBaseIE(TurnerBaseIE): 26 def _extract_nba_cvp_info(self, path, video_id, fatal=False): 27 return self._extract_cvp_info( 28 'http://secure.nba.com/%s' % path, video_id, { 29 'default': { 30 'media_src': 'http://nba.cdn.turner.com/nba/big', 31 }, 32 'm3u8': { 33 'media_src': 'http://nbavod-f.akamaihd.net', 34 }, 35 }, fatal=fatal) 36 37 38 class NBAWatchBaseIE(NBACVPBaseIE): 39 _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/' 40 41 def _extract_video(self, filter_key, filter_value): 42 video = self._download_json( 43 'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch', 44 filter_value, query={ 45 'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName', 46 'q': filter_key + ':' + filter_value, 47 'wt': 'json', 48 })['response']['docs'][0] 49 50 video_id = str(video['pid']) 51 title = video['name'] 52 53 formats = [] 54 m3u8_url = (self._download_json( 55 'https://watch.nba.com/service/publishpoint', video_id, query={ 56 'type': 'video', 57 'format': 'json', 58 'id': video_id, 59 }, headers={ 60 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1', 61 }, fatal=False) or {}).get('path') 62 if m3u8_url: 63 m3u8_formats = self._extract_m3u8_formats( 64 re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4', 65 'm3u8_native', m3u8_id='hls', fatal=False) 66 formats.extend(m3u8_formats) 67 for f in m3u8_formats: 68 http_f = f.copy() 69 http_f.update({ 70 'format_id': http_f['format_id'].replace('hls-', 'http-'), 71 'protocol': 'http', 72 'url': http_f['url'].replace('.m3u8', ''), 73 }) 74 formats.append(http_f) 75 76 info = { 77 'id': video_id, 78 'title': title, 79 'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')), 80 'description': video.get('description'), 81 'duration': int_or_none(video.get('runtime')), 82 'timestamp': parse_iso8601(video.get('releaseDate')), 83 'tags': video.get('tags'), 84 } 85 86 seo_name = video.get('seoName') 87 if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name): 88 base_path = '' 89 if seo_name.startswith('teams/'): 90 base_path += seo_name.split('/')[1] + '/' 91 base_path += 'video/' 92 cvp_info = self._extract_nba_cvp_info( 93 base_path + seo_name + '.xml', video_id, False) 94 if cvp_info: 95 formats.extend(cvp_info['formats']) 96 info = merge_dicts(info, cvp_info) 97 98 self._sort_formats(formats) 99 info['formats'] = formats 100 return info 101 102 103 class NBAWatchEmbedIE(NBAWatchBaseIE): 104 IENAME = 'nba:watch:embed' 105 _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)' 106 _TESTS = [{ 107 'url': 'http://watch.nba.com/embed?id=659395', 108 'md5': 'b7e3f9946595f4ca0a13903ce5edd120', 109 'info_dict': { 110 'id': '659395', 111 'ext': 'mp4', 112 'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017', 113 'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017', 114 'timestamp': 1492228800, 115 'upload_date': '20170415', 116 }, 117 }] 118 119 def _real_extract(self, url): 120 video_id = self._match_id(url) 121 return self._extract_video('pid', video_id) 122 123 124 class NBAWatchIE(NBAWatchBaseIE): 125 IE_NAME = 'nba:watch' 126 _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)' 127 _TESTS = [{ 128 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', 129 'md5': '9d902940d2a127af3f7f9d2f3dc79c96', 130 'info_dict': { 131 'id': '70946', 132 'ext': 'mp4', 133 'title': 'Thunder vs. Nets', 134 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', 135 'duration': 181, 136 'timestamp': 1354597200, 137 'upload_date': '20121204', 138 }, 139 }, { 140 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', 141 'only_matching': True, 142 }, { 143 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', 144 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4', 145 'info_dict': { 146 'id': '330865', 147 'ext': 'mp4', 148 'title': 'Hawks vs. Cavaliers Game 1', 149 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d', 150 'duration': 228, 151 'timestamp': 1432094400, 152 'upload_date': '20150521', 153 }, 154 }, { 155 'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115', 156 'only_matching': True, 157 }, { 158 # only CVP mp4 format available 159 'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106', 160 'only_matching': True, 161 }, { 162 'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights', 163 'only_matching': True, 164 }] 165 166 def _real_extract(self, url): 167 display_id = self._match_id(url) 168 collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0] 169 if collection_id: 170 if self._downloader.params.get('noplaylist'): 171 self.to_screen('Downloading just video %s because of --no-playlist' % display_id) 172 else: 173 self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id) 174 return self.url_result( 175 'https://www.nba.com/watch/list/collection/' + collection_id, 176 NBAWatchCollectionIE.ie_key(), collection_id) 177 return self._extract_video('seoName', display_id) 178 179 180 class NBAWatchCollectionIE(NBAWatchBaseIE): 181 IE_NAME = 'nba:watch:collection' 182 _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)' 183 _TESTS = [{ 184 'url': 'https://watch.nba.com/list/collection/season-preview-2020', 185 'info_dict': { 186 'id': 'season-preview-2020', 187 }, 188 'playlist_mincount': 43, 189 }] 190 _PAGE_SIZE = 100 191 192 def _fetch_page(self, collection_id, page): 193 page += 1 194 videos = self._download_json( 195 'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id, 196 collection_id, 'Downloading page %d JSON metadata' % page, query={ 197 'count': self._PAGE_SIZE, 198 'page': page, 199 })['results']['videos'] 200 for video in videos: 201 program = video.get('program') or {} 202 seo_name = program.get('seoName') or program.get('slug') 203 if not seo_name: 204 continue 205 yield { 206 '_type': 'url', 207 'id': program.get('id'), 208 'title': program.get('title') or video.get('title'), 209 'url': 'https://www.nba.com/watch/video/' + seo_name, 210 'thumbnail': video.get('image'), 211 'description': program.get('description') or video.get('description'), 212 'duration': parse_duration(program.get('runtimeHours')), 213 'timestamp': parse_iso8601(video.get('releaseDate')), 214 } 215 216 def _real_extract(self, url): 217 collection_id = self._match_id(url) 218 entries = OnDemandPagedList( 219 functools.partial(self._fetch_page, collection_id), 220 self._PAGE_SIZE) 221 return self.playlist_result(entries, collection_id) 222 223 224 class NBABaseIE(NBACVPBaseIE): 225 _VALID_URL_BASE = r'''(?x) 226 https?://(?:www\.)?nba\.com/ 227 (?P<team> 228 blazers| 229 bucks| 230 bulls| 231 cavaliers| 232 celtics| 233 clippers| 234 grizzlies| 235 hawks| 236 heat| 237 hornets| 238 jazz| 239 kings| 240 knicks| 241 lakers| 242 magic| 243 mavericks| 244 nets| 245 nuggets| 246 pacers| 247 pelicans| 248 pistons| 249 raptors| 250 rockets| 251 sixers| 252 spurs| 253 suns| 254 thunder| 255 timberwolves| 256 warriors| 257 wizards 258 ) 259 (?:/play\#)?/''' 260 _CHANNEL_PATH_REGEX = r'video/channel|series' 261 262 def _embed_url_result(self, team, content_id): 263 return self.url_result(update_url_query( 264 'https://secure.nba.com/assets/amp/include/video/iframe.html', { 265 'contentId': content_id, 266 'team': team, 267 }), NBAEmbedIE.ie_key()) 268 269 def _call_api(self, team, content_id, query, resource): 270 return self._download_json( 271 'https://api.nba.net/2/%s/video,imported_video,wsc/' % team, 272 content_id, 'Download %s JSON metadata' % resource, 273 query=query, headers={ 274 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b', 275 })['response']['result'] 276 277 def _extract_video(self, video, team, extract_all=True): 278 video_id = compat_str(video['nid']) 279 team = video['brand'] 280 281 info = { 282 'id': video_id, 283 'title': video.get('title') or video.get('headline') or video['shortHeadline'], 284 'description': video.get('description'), 285 'timestamp': parse_iso8601(video.get('published')), 286 } 287 288 subtitles = {} 289 captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {} 290 for caption_url in captions.values(): 291 subtitles.setdefault('en', []).append({'url': caption_url}) 292 293 formats = [] 294 mp4_url = video.get('mp4') 295 if mp4_url: 296 formats.append({ 297 'url': mp4_url, 298 }) 299 300 if extract_all: 301 source_url = video.get('videoSource') 302 if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'): 303 formats.append({ 304 'format_id': 'source', 305 'url': source_url, 306 'preference': 1, 307 }) 308 309 m3u8_url = video.get('m3u8') 310 if m3u8_url: 311 if '.akamaihd.net/i/' in m3u8_url: 312 formats.extend(self._extract_akamai_formats( 313 m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'})) 314 else: 315 formats.extend(self._extract_m3u8_formats( 316 m3u8_url, video_id, 'mp4', 317 'm3u8_native', m3u8_id='hls', fatal=False)) 318 319 content_xml = video.get('contentXml') 320 if team and content_xml: 321 cvp_info = self._extract_nba_cvp_info( 322 team + content_xml, video_id, fatal=False) 323 if cvp_info: 324 formats.extend(cvp_info['formats']) 325 subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles']) 326 info = merge_dicts(info, cvp_info) 327 328 self._sort_formats(formats) 329 else: 330 info.update(self._embed_url_result(team, video['videoId'])) 331 332 info.update({ 333 'formats': formats, 334 'subtitles': subtitles, 335 }) 336 337 return info 338 339 def _real_extract(self, url): 340 team, display_id = re.match(self._VALID_URL, url).groups() 341 if '/play#/' in url: 342 display_id = compat_urllib_parse_unquote(display_id) 343 else: 344 webpage = self._download_webpage(url, display_id) 345 display_id = self._search_regex( 346 self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id') 347 return self._extract_url_results(team, display_id) 348 349 350 class NBAEmbedIE(NBABaseIE): 351 IENAME = 'nba:embed' 352 _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)' 353 _TESTS = [{ 354 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=', 355 'only_matching': True, 356 }, { 357 'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP', 358 'only_matching': True, 359 }] 360 361 def _real_extract(self, url): 362 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) 363 content_id = qs['contentId'][0] 364 team = qs.get('team', [None])[0] 365 if not team: 366 return self.url_result( 367 'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key()) 368 video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0] 369 return self._extract_video(video, team) 370 371 372 class NBAIE(NBABaseIE): 373 IENAME = 'nba' 374 _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX 375 _TESTS = [{ 376 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774', 377 'info_dict': { 378 'id': '45039', 379 'ext': 'mp4', 380 'title': 'AND WE BACK.', 381 'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.', 382 'duration': 94, 383 'timestamp': 1607112000, 384 'upload_date': '20201218', 385 }, 386 }, { 387 'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860', 388 'only_matching': True, 389 }, { 390 'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0', 391 'only_matching': True, 392 }] 393 _CONTENT_ID_REGEX = r'videoID' 394 395 def _extract_url_results(self, team, content_id): 396 return self._embed_url_result(team, content_id) 397 398 399 class NBAChannelIE(NBABaseIE): 400 IENAME = 'nba:channel' 401 _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX 402 _TESTS = [{ 403 'url': 'https://www.nba.com/blazers/video/channel/summer_league', 404 'info_dict': { 405 'title': 'Summer League', 406 }, 407 'playlist_mincount': 138, 408 }, { 409 'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date', 410 'only_matching': True, 411 }] 412 _CONTENT_ID_REGEX = r'videoSubCategory' 413 _PAGE_SIZE = 100 414 415 def _fetch_page(self, team, channel, page): 416 results = self._call_api(team, channel, { 417 'channels': channel, 418 'count': self._PAGE_SIZE, 419 'offset': page * self._PAGE_SIZE, 420 }, 'page %d' % (page + 1)) 421 for video in results: 422 yield self._extract_video(video, team, False) 423 424 def _extract_url_results(self, team, content_id): 425 entries = OnDemandPagedList( 426 functools.partial(self._fetch_page, team, content_id), 427 self._PAGE_SIZE) 428 return self.playlist_result(entries, playlist_title=content_id)