nfl.py (6731B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 clean_html, 9 determine_ext, 10 get_element_by_class, 11 ) 12 13 14 class NFLBaseIE(InfoExtractor): 15 _VALID_URL_BASE = r'''(?x) 16 https?:// 17 (?P<host> 18 (?:www\.)? 19 (?: 20 (?: 21 nfl| 22 buffalobills| 23 miamidolphins| 24 patriots| 25 newyorkjets| 26 baltimoreravens| 27 bengals| 28 clevelandbrowns| 29 steelers| 30 houstontexans| 31 colts| 32 jaguars| 33 (?:titansonline|tennesseetitans)| 34 denverbroncos| 35 (?:kc)?chiefs| 36 raiders| 37 chargers| 38 dallascowboys| 39 giants| 40 philadelphiaeagles| 41 (?:redskins|washingtonfootball)| 42 chicagobears| 43 detroitlions| 44 packers| 45 vikings| 46 atlantafalcons| 47 panthers| 48 neworleanssaints| 49 buccaneers| 50 azcardinals| 51 (?:stlouis|the)rams| 52 49ers| 53 seahawks 54 )\.com| 55 .+?\.clubs\.nfl\.com 56 ) 57 )/ 58 ''' 59 _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+})' 60 _WORKING = False 61 62 def _parse_video_config(self, video_config, display_id): 63 video_config = self._parse_json(video_config, display_id) 64 item = video_config['playlist'][0] 65 mcp_id = item.get('mcpID') 66 if mcp_id: 67 info = self.url_result( 68 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + mcp_id, 69 'Anvato', mcp_id) 70 else: 71 media_id = item.get('id') or item['entityId'] 72 title = item['title'] 73 item_url = item['url'] 74 info = {'id': media_id} 75 ext = determine_ext(item_url) 76 if ext == 'm3u8': 77 info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4') 78 self._sort_formats(info['formats']) 79 else: 80 info['url'] = item_url 81 if item.get('audio') is True: 82 info['vcodec'] = 'none' 83 is_live = video_config.get('live') is True 84 thumbnails = None 85 image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage')) 86 if image_url: 87 thumbnails = [{ 88 'url': image_url, 89 'ext': determine_ext(image_url, 'jpg'), 90 }] 91 info.update({ 92 'title': self._live_title(title) if is_live else title, 93 'is_live': is_live, 94 'description': clean_html(item.get('description')), 95 'thumbnails': thumbnails, 96 }) 97 return info 98 99 100 class NFLIE(NFLBaseIE): 101 IE_NAME = 'nfl.com' 102 _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)' 103 _TESTS = [{ 104 'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14', 105 'info_dict': { 106 'id': '899441', 107 'ext': 'mp4', 108 'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14", 109 'description': 'md5:85e05a3cc163f8c344340f220521136d', 110 'upload_date': '20201215', 111 'timestamp': 1608009755, 112 'thumbnail': r're:^https?://.*\.jpg$', 113 'uploader': 'NFL', 114 } 115 }, { 116 'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown', 117 'md5': '6886b32c24b463038c760ceb55a34566', 118 'info_dict': { 119 'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99', 120 'ext': 'mp3', 121 'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown', 122 'description': 'md5:12ada8ee70e6762658c30e223e095075', 123 } 124 }, { 125 'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14', 126 'only_matching': True, 127 }, { 128 'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz', 129 'only_matching': True, 130 }] 131 132 def _real_extract(self, url): 133 display_id = self._match_id(url) 134 webpage = self._download_webpage(url, display_id) 135 return self._parse_video_config(self._search_regex( 136 self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id) 137 138 139 class NFLArticleIE(NFLBaseIE): 140 IE_NAME = 'nfl.com:article' 141 _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)' 142 _TEST = { 143 'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e', 144 'info_dict': { 145 'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e', 146 'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations", 147 }, 148 'playlist_count': 4, 149 } 150 151 def _real_extract(self, url): 152 display_id = self._match_id(url) 153 webpage = self._download_webpage(url, display_id) 154 entries = [] 155 for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage): 156 entries.append(self._parse_video_config(video_config, display_id)) 157 title = clean_html(get_element_by_class( 158 'nfl-c-article__title', webpage)) or self._html_search_meta( 159 ['og:title', 'twitter:title'], webpage) 160 return self.playlist_result(entries, display_id, title)