espn.py (8755B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from .once import OnceIE 7 from ..compat import compat_str 8 from ..utils import ( 9 determine_ext, 10 int_or_none, 11 unified_timestamp, 12 ) 13 14 15 class ESPNIE(OnceIE): 16 _VALID_URL = r'''(?x) 17 https?:// 18 (?: 19 (?: 20 (?: 21 (?:(?:\w+\.)+)?espn\.go| 22 (?:www\.)?espn 23 )\.com/ 24 (?: 25 (?: 26 video/(?:clip|iframe/twitter)| 27 watch/player 28 ) 29 (?: 30 .*?\?.*?\bid=| 31 /_/id/ 32 )| 33 [^/]+/video/ 34 ) 35 )| 36 (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ 37 ) 38 (?P<id>\d+) 39 ''' 40 41 _TESTS = [{ 42 'url': 'http://espn.go.com/video/clip?id=10365079', 43 'info_dict': { 44 'id': '10365079', 45 'ext': 'mp4', 46 'title': '30 for 30 Shorts: Judging Jewell', 47 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f', 48 'timestamp': 1390936111, 49 'upload_date': '20140128', 50 }, 51 'params': { 52 'skip_download': True, 53 }, 54 }, { 55 'url': 'https://broadband.espn.go.com/video/clip?id=18910086', 56 'info_dict': { 57 'id': '18910086', 58 'ext': 'mp4', 59 'title': 'Kyrie spins around defender for two', 60 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b', 61 'timestamp': 1489539155, 62 'upload_date': '20170315', 63 }, 64 'params': { 65 'skip_download': True, 66 }, 67 'expected_warnings': ['Unable to download f4m manifest'], 68 }, { 69 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672', 70 'only_matching': True, 71 }, { 72 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774', 73 'only_matching': True, 74 }, { 75 'url': 'http://www.espn.com/watch/player?id=19141491', 76 'only_matching': True, 77 }, { 78 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875', 79 'only_matching': True, 80 }, { 81 'url': 'http://www.espn.com/watch/player/_/id/19141491', 82 'only_matching': True, 83 }, { 84 'url': 'http://www.espn.com/video/clip?id=10365079', 85 'only_matching': True, 86 }, { 87 'url': 'http://www.espn.com/video/clip/_/id/17989860', 88 'only_matching': True, 89 }, { 90 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 91 'only_matching': True, 92 }, { 93 'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls', 94 'only_matching': True, 95 }, { 96 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', 97 'only_matching': True, 98 }, { 99 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings', 100 'only_matching': True, 101 }] 102 103 def _real_extract(self, url): 104 video_id = self._match_id(url) 105 106 clip = self._download_json( 107 'http://api-app.espn.com/v1/video/clips/%s' % video_id, 108 video_id)['videos'][0] 109 110 title = clip['headline'] 111 112 format_urls = set() 113 formats = [] 114 115 def traverse_source(source, base_source_id=None): 116 for source_id, source in source.items(): 117 if source_id == 'alert': 118 continue 119 elif isinstance(source, compat_str): 120 extract_source(source, base_source_id) 121 elif isinstance(source, dict): 122 traverse_source( 123 source, 124 '%s-%s' % (base_source_id, source_id) 125 if base_source_id else source_id) 126 127 def extract_source(source_url, source_id=None): 128 if source_url in format_urls: 129 return 130 format_urls.add(source_url) 131 ext = determine_ext(source_url) 132 if OnceIE.suitable(source_url): 133 formats.extend(self._extract_once_formats(source_url)) 134 elif ext == 'smil': 135 formats.extend(self._extract_smil_formats( 136 source_url, video_id, fatal=False)) 137 elif ext == 'f4m': 138 formats.extend(self._extract_f4m_formats( 139 source_url, video_id, f4m_id=source_id, fatal=False)) 140 elif ext == 'm3u8': 141 formats.extend(self._extract_m3u8_formats( 142 source_url, video_id, 'mp4', entry_protocol='m3u8_native', 143 m3u8_id=source_id, fatal=False)) 144 else: 145 f = { 146 'url': source_url, 147 'format_id': source_id, 148 } 149 mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url) 150 if mobj: 151 f.update({ 152 'height': int(mobj.group(1)), 153 'fps': int(mobj.group(2)), 154 'tbr': int(mobj.group(3)), 155 }) 156 if source_id == 'mezzanine': 157 f['preference'] = 1 158 formats.append(f) 159 160 links = clip.get('links', {}) 161 traverse_source(links.get('source', {})) 162 traverse_source(links.get('mobile', {})) 163 self._sort_formats(formats) 164 165 description = clip.get('caption') or clip.get('description') 166 thumbnail = clip.get('thumbnail') 167 duration = int_or_none(clip.get('duration')) 168 timestamp = unified_timestamp(clip.get('originalPublishDate')) 169 170 return { 171 'id': video_id, 172 'title': title, 173 'description': description, 174 'thumbnail': thumbnail, 175 'timestamp': timestamp, 176 'duration': duration, 177 'formats': formats, 178 } 179 180 181 class ESPNArticleIE(InfoExtractor): 182 _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' 183 _TESTS = [{ 184 'url': 'http://espn.go.com/nba/recap?gameId=400793786', 185 'only_matching': True, 186 }, { 187 'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge', 188 'only_matching': True, 189 }, { 190 'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings', 191 'only_matching': True, 192 }, { 193 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 194 'only_matching': True, 195 }] 196 197 @classmethod 198 def suitable(cls, url): 199 return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url) 200 201 def _real_extract(self, url): 202 video_id = self._match_id(url) 203 204 webpage = self._download_webpage(url, video_id) 205 206 video_id = self._search_regex( 207 r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)', 208 webpage, 'video id', group='id') 209 210 return self.url_result( 211 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) 212 213 214 class FiveThirtyEightIE(InfoExtractor): 215 _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)' 216 _TEST = { 217 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', 218 'info_dict': { 219 'id': '56032156', 220 'ext': 'flv', 221 'title': 'FiveThirtyEight: The Raiders can still make the playoffs', 222 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', 223 }, 224 'params': { 225 'skip_download': True, 226 }, 227 } 228 229 def _real_extract(self, url): 230 video_id = self._match_id(url) 231 232 webpage = self._download_webpage(url, video_id) 233 234 embed_url = self._search_regex( 235 r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)', 236 webpage, 'embed url') 237 238 return self.url_result(embed_url, 'AbcNewsVideo')