vevo.py (14131B)
1 from __future__ import unicode_literals 2 3 import re 4 import json 5 6 from .common import InfoExtractor 7 from ..compat import ( 8 compat_str, 9 compat_urlparse, 10 compat_HTTPError, 11 ) 12 from ..utils import ( 13 ExtractorError, 14 int_or_none, 15 parse_iso8601, 16 ) 17 18 19 class VevoBaseIE(InfoExtractor): 20 def _extract_json(self, webpage, video_id): 21 return self._parse_json( 22 self._search_regex( 23 r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', 24 webpage, 'initial store'), 25 video_id) 26 27 28 class VevoIE(VevoBaseIE): 29 ''' 30 Accepts urls from vevo.com or in the format 'vevo:{id}' 31 (currently used by MTVIE and MySpaceIE) 32 ''' 33 _VALID_URL = r'''(?x) 34 (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| 35 https?://cache\.vevo\.com/m/html/embed\.html\?video=| 36 https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| 37 https?://embed\.vevo\.com/.*?[?&]isrc=| 38 vevo:) 39 (?P<id>[^&?#]+)''' 40 41 _TESTS = [{ 42 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', 43 'md5': '95ee28ee45e70130e3ab02b0f579ae23', 44 'info_dict': { 45 'id': 'GB1101300280', 46 'ext': 'mp4', 47 'title': 'Hurts - Somebody to Die For', 48 'timestamp': 1372057200, 49 'upload_date': '20130624', 50 'uploader': 'Hurts', 51 'track': 'Somebody to Die For', 52 'artist': 'Hurts', 53 'genre': 'Pop', 54 }, 55 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 56 }, { 57 'note': 'v3 SMIL format', 58 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', 59 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 60 'info_dict': { 61 'id': 'USUV71302923', 62 'ext': 'mp4', 63 'title': 'Cassadee Pope - I Wish I Could Break Your Heart', 64 'timestamp': 1392796919, 65 'upload_date': '20140219', 66 'uploader': 'Cassadee Pope', 67 'track': 'I Wish I Could Break Your Heart', 68 'artist': 'Cassadee Pope', 69 'genre': 'Country', 70 }, 71 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 72 }, { 73 'note': 'Age-limited video', 74 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 75 'info_dict': { 76 'id': 'USRV81300282', 77 'ext': 'mp4', 78 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 79 'age_limit': 18, 80 'timestamp': 1372888800, 81 'upload_date': '20130703', 82 'uploader': 'Justin Timberlake', 83 'track': 'Tunnel Vision (Explicit)', 84 'artist': 'Justin Timberlake', 85 'genre': 'Pop', 86 }, 87 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 88 }, { 89 'note': 'No video_info', 90 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', 91 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', 92 'info_dict': { 93 'id': 'USUV71503000', 94 'ext': 'mp4', 95 'title': 'K Camp ft. T.I. - Till I Die', 96 'age_limit': 18, 97 'timestamp': 1449468000, 98 'upload_date': '20151207', 99 'uploader': 'K Camp', 100 'track': 'Till I Die', 101 'artist': 'K Camp', 102 'genre': 'Hip-Hop', 103 }, 104 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 105 }, { 106 'note': 'Featured test', 107 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', 108 'md5': 'd28675e5e8805035d949dc5cf161071d', 109 'info_dict': { 110 'id': 'USUV71402190', 111 'ext': 'mp4', 112 'title': 'Lemaitre ft. LoLo - Wait', 113 'age_limit': 0, 114 'timestamp': 1413432000, 115 'upload_date': '20141016', 116 'uploader': 'Lemaitre', 117 'track': 'Wait', 118 'artist': 'Lemaitre', 119 'genre': 'Electronic', 120 }, 121 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 122 }, { 123 'note': 'Only available via webpage', 124 'url': 'http://www.vevo.com/watch/GBUV71600656', 125 'md5': '67e79210613865b66a47c33baa5e37fe', 126 'info_dict': { 127 'id': 'GBUV71600656', 128 'ext': 'mp4', 129 'title': 'ABC - Viva Love', 130 'age_limit': 0, 131 'timestamp': 1461830400, 132 'upload_date': '20160428', 133 'uploader': 'ABC', 134 'track': 'Viva Love', 135 'artist': 'ABC', 136 'genre': 'Pop', 137 }, 138 'expected_warnings': ['Failed to download video versions info'], 139 }, { 140 # no genres available 141 'url': 'http://www.vevo.com/watch/INS171400764', 142 'only_matching': True, 143 }, { 144 # Another case available only via the webpage; using streams/streamsV3 formats 145 # Geo-restricted to Netherlands/Germany 146 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', 147 'only_matching': True, 148 }, { 149 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=', 150 'only_matching': True, 151 }] 152 _VERSIONS = { 153 0: 'youtube', # only in AuthenticateVideo videoVersions 154 1: 'level3', 155 2: 'akamai', 156 3: 'level3', 157 4: 'amazon', 158 } 159 160 def _initialize_api(self, video_id): 161 webpage = self._download_webpage( 162 'https://accounts.vevo.com/token', None, 163 note='Retrieving oauth token', 164 errnote='Unable to retrieve oauth token', 165 data=json.dumps({ 166 'client_id': 'SPupX1tvqFEopQ1YS6SS', 167 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous', 168 }).encode('utf-8'), 169 headers={ 170 'Content-Type': 'application/json', 171 }) 172 173 if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): 174 self.raise_geo_restricted( 175 '%s said: This page is currently unavailable in your region' % self.IE_NAME) 176 177 auth_info = self._parse_json(webpage, video_id) 178 self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token'] 179 180 def _call_api(self, path, *args, **kwargs): 181 try: 182 data = self._download_json(self._api_url_template % path, *args, **kwargs) 183 except ExtractorError as e: 184 if isinstance(e.cause, compat_HTTPError): 185 errors = self._parse_json(e.cause.read().decode(), None)['errors'] 186 error_message = ', '.join([error['message'] for error in errors]) 187 raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) 188 raise 189 return data 190 191 def _real_extract(self, url): 192 video_id = self._match_id(url) 193 194 self._initialize_api(video_id) 195 196 video_info = self._call_api( 197 'video/%s' % video_id, video_id, 'Downloading api video info', 198 'Failed to download video info') 199 200 video_versions = self._call_api( 201 'video/%s/streams' % video_id, video_id, 202 'Downloading video versions info', 203 'Failed to download video versions info', 204 fatal=False) 205 206 # Some videos are only available via webpage (e.g. 207 # https://github.com/ytdl-org/youtube-dl/issues/9366) 208 if not video_versions: 209 webpage = self._download_webpage(url, video_id) 210 json_data = self._extract_json(webpage, video_id) 211 if 'streams' in json_data.get('default', {}): 212 video_versions = json_data['default']['streams'][video_id][0] 213 else: 214 video_versions = [ 215 value 216 for key, value in json_data['apollo']['data'].items() 217 if key.startswith('%s.streams' % video_id)] 218 219 uploader = None 220 artist = None 221 featured_artist = None 222 artists = video_info.get('artists') 223 for curr_artist in artists: 224 if curr_artist.get('role') == 'Featured': 225 featured_artist = curr_artist['name'] 226 else: 227 artist = uploader = curr_artist['name'] 228 229 formats = [] 230 for video_version in video_versions: 231 version = self._VERSIONS.get(video_version.get('version'), 'generic') 232 version_url = video_version.get('url') 233 if not version_url: 234 continue 235 236 if '.ism' in version_url: 237 continue 238 elif '.mpd' in version_url: 239 formats.extend(self._extract_mpd_formats( 240 version_url, video_id, mpd_id='dash-%s' % version, 241 note='Downloading %s MPD information' % version, 242 errnote='Failed to download %s MPD information' % version, 243 fatal=False)) 244 elif '.m3u8' in version_url: 245 formats.extend(self._extract_m3u8_formats( 246 version_url, video_id, 'mp4', 'm3u8_native', 247 m3u8_id='hls-%s' % version, 248 note='Downloading %s m3u8 information' % version, 249 errnote='Failed to download %s m3u8 information' % version, 250 fatal=False)) 251 else: 252 m = re.search(r'''(?xi) 253 _(?P<width>[0-9]+)x(?P<height>[0-9]+) 254 _(?P<vcodec>[a-z0-9]+) 255 _(?P<vbr>[0-9]+) 256 _(?P<acodec>[a-z0-9]+) 257 _(?P<abr>[0-9]+) 258 \.(?P<ext>[a-z0-9]+)''', version_url) 259 if not m: 260 continue 261 262 formats.append({ 263 'url': version_url, 264 'format_id': 'http-%s-%s' % (version, video_version['quality']), 265 'vcodec': m.group('vcodec'), 266 'acodec': m.group('acodec'), 267 'vbr': int(m.group('vbr')), 268 'abr': int(m.group('abr')), 269 'ext': m.group('ext'), 270 'width': int(m.group('width')), 271 'height': int(m.group('height')), 272 }) 273 self._sort_formats(formats) 274 275 track = video_info['title'] 276 if featured_artist: 277 artist = '%s ft. %s' % (artist, featured_artist) 278 title = '%s - %s' % (artist, track) if artist else track 279 280 genres = video_info.get('genres') 281 genre = ( 282 genres[0] if genres and isinstance(genres, list) 283 and isinstance(genres[0], compat_str) else None) 284 285 is_explicit = video_info.get('isExplicit') 286 if is_explicit is True: 287 age_limit = 18 288 elif is_explicit is False: 289 age_limit = 0 290 else: 291 age_limit = None 292 293 return { 294 'id': video_id, 295 'title': title, 296 'formats': formats, 297 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'), 298 'timestamp': parse_iso8601(video_info.get('releaseDate')), 299 'uploader': uploader, 300 'duration': int_or_none(video_info.get('duration')), 301 'view_count': int_or_none(video_info.get('views', {}).get('total')), 302 'age_limit': age_limit, 303 'track': track, 304 'artist': uploader, 305 'genre': genre, 306 } 307 308 309 class VevoPlaylistIE(VevoBaseIE): 310 _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' 311 312 _TESTS = [{ 313 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', 314 'info_dict': { 315 'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29', 316 'title': 'Best-Of: Birdman', 317 }, 318 'playlist_count': 10, 319 }, { 320 'url': 'http://www.vevo.com/watch/genre/rock', 321 'info_dict': { 322 'id': 'rock', 323 'title': 'Rock', 324 }, 325 'playlist_count': 20, 326 }, { 327 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0', 328 'md5': '32dcdfddddf9ec6917fc88ca26d36282', 329 'info_dict': { 330 'id': 'USCMV1100073', 331 'ext': 'mp4', 332 'title': 'Birdman - Y.U. MAD', 333 'timestamp': 1323417600, 334 'upload_date': '20111209', 335 'uploader': 'Birdman', 336 'track': 'Y.U. MAD', 337 'artist': 'Birdman', 338 'genre': 'Rap/Hip-Hop', 339 }, 340 'expected_warnings': ['Unable to download SMIL file'], 341 }, { 342 'url': 'http://www.vevo.com/watch/genre/rock?index=0', 343 'only_matching': True, 344 }] 345 346 def _real_extract(self, url): 347 mobj = re.match(self._VALID_URL, url) 348 playlist_id = mobj.group('id') 349 playlist_kind = mobj.group('kind') 350 351 webpage = self._download_webpage(url, playlist_id) 352 353 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) 354 index = qs.get('index', [None])[0] 355 356 if index: 357 video_id = self._search_regex( 358 r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>', 359 webpage, 'video id', default=None, group='id') 360 if video_id: 361 return self.url_result('vevo:%s' % video_id, VevoIE.ie_key()) 362 363 playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind] 364 365 playlist = (list(playlists.values())[0] 366 if playlist_kind == 'playlist' else playlists[playlist_id]) 367 368 entries = [ 369 self.url_result('vevo:%s' % src, VevoIE.ie_key()) 370 for src in playlist['isrcs']] 371 372 return self.playlist_result( 373 entries, playlist.get('playlistId') or playlist_id, 374 playlist.get('name'), playlist.get('description'))