yandexmusic.py (17743B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import hashlib 5 import itertools 6 import re 7 8 from .common import InfoExtractor 9 from ..compat import compat_str 10 from ..utils import ( 11 ExtractorError, 12 int_or_none, 13 float_or_none, 14 try_get, 15 ) 16 17 18 class YandexMusicBaseIE(InfoExtractor): 19 _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)' 20 21 @staticmethod 22 def _handle_error(response): 23 if isinstance(response, dict): 24 error = response.get('error') 25 if error: 26 raise ExtractorError(error, expected=True) 27 if response.get('type') == 'captcha' or 'captcha' in response: 28 YandexMusicBaseIE._raise_captcha() 29 30 @staticmethod 31 def _raise_captcha(): 32 raise ExtractorError( 33 'YandexMusic has considered youtube-dl requests automated and ' 34 'asks you to solve a CAPTCHA. You can either wait for some ' 35 'time until unblocked and optionally use --sleep-interval ' 36 'in future or alternatively you can go to https://music.yandex.ru/ ' 37 'solve CAPTCHA, then export cookies and pass cookie file to ' 38 'youtube-dl with --cookies', 39 expected=True) 40 41 def _download_webpage_handle(self, *args, **kwargs): 42 webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs) 43 if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: 44 self._raise_captcha() 45 return webpage 46 47 def _download_json(self, *args, **kwargs): 48 response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs) 49 self._handle_error(response) 50 return response 51 52 def _call_api(self, ep, tld, url, item_id, note, query): 53 return self._download_json( 54 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep), 55 item_id, note, 56 fatal=False, 57 headers={ 58 'Referer': url, 59 'X-Requested-With': 'XMLHttpRequest', 60 'X-Retpath-Y': url, 61 }, 62 query=query) 63 64 65 class YandexMusicTrackIE(YandexMusicBaseIE): 66 IE_NAME = 'yandexmusic:track' 67 IE_DESC = 'Яндекс.Музыка - Трек' 68 _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE 69 70 _TESTS = [{ 71 'url': 'http://music.yandex.ru/album/540508/track/4878838', 72 'md5': 'dec8b661f12027ceaba33318787fff76', 73 'info_dict': { 74 'id': '4878838', 75 'ext': 'mp3', 76 'title': 'md5:c63e19341fdbe84e43425a30bc777856', 77 'filesize': int, 78 'duration': 193.04, 79 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff', 80 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a', 81 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200', 82 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160', 83 'release_year': 2009, 84 }, 85 # 'skip': 'Travis CI servers blocked by YandexMusic', 86 }, { 87 # multiple disks 88 'url': 'http://music.yandex.ru/album/3840501/track/705105', 89 'md5': '82a54e9e787301dd45aba093cf6e58c0', 90 'info_dict': { 91 'id': '705105', 92 'ext': 'mp3', 93 'title': 'md5:f86d4a9188279860a83000277024c1a6', 94 'filesize': int, 95 'duration': 239.27, 96 'track': 'md5:40f887f0666ba1aa10b835aca44807d1', 97 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873', 98 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12', 99 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12', 100 'release_year': 2016, 101 'genre': 'pop', 102 'disc_number': 2, 103 'track_number': 9, 104 }, 105 # 'skip': 'Travis CI servers blocked by YandexMusic', 106 }, { 107 'url': 'http://music.yandex.com/album/540508/track/4878838', 108 'only_matching': True, 109 }] 110 111 def _real_extract(self, url): 112 mobj = re.match(self._VALID_URL, url) 113 tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id') 114 115 track = self._call_api( 116 'track', tld, url, track_id, 'Downloading track JSON', 117 {'track': '%s:%s' % (track_id, album_id)})['track'] 118 track_title = track['title'] 119 120 download_data = self._download_json( 121 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id), 122 track_id, 'Downloading track location url JSON', 123 headers={'X-Retpath-Y': url}) 124 125 fd_data = self._download_json( 126 download_data['src'], track_id, 127 'Downloading track location JSON', 128 query={'format': 'json'}) 129 key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest() 130 f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id']) 131 132 thumbnail = None 133 cover_uri = track.get('albums', [{}])[0].get('coverUri') 134 if cover_uri: 135 thumbnail = cover_uri.replace('%%', 'orig') 136 if not thumbnail.startswith('http'): 137 thumbnail = 'http://' + thumbnail 138 139 track_info = { 140 'id': track_id, 141 'ext': 'mp3', 142 'url': f_url, 143 'filesize': int_or_none(track.get('fileSize')), 144 'duration': float_or_none(track.get('durationMs'), 1000), 145 'thumbnail': thumbnail, 146 'track': track_title, 147 'acodec': download_data.get('codec'), 148 'abr': int_or_none(download_data.get('bitrate')), 149 } 150 151 def extract_artist_name(artist): 152 decomposed = artist.get('decomposed') 153 if not isinstance(decomposed, list): 154 return artist['name'] 155 parts = [artist['name']] 156 for element in decomposed: 157 if isinstance(element, dict) and element.get('name'): 158 parts.append(element['name']) 159 elif isinstance(element, compat_str): 160 parts.append(element) 161 return ''.join(parts) 162 163 def extract_artist(artist_list): 164 if artist_list and isinstance(artist_list, list): 165 artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')] 166 if artists_names: 167 return ', '.join(artists_names) 168 169 albums = track.get('albums') 170 if albums and isinstance(albums, list): 171 album = albums[0] 172 if isinstance(album, dict): 173 year = album.get('year') 174 disc_number = int_or_none(try_get( 175 album, lambda x: x['trackPosition']['volume'])) 176 track_number = int_or_none(try_get( 177 album, lambda x: x['trackPosition']['index'])) 178 track_info.update({ 179 'album': album.get('title'), 180 'album_artist': extract_artist(album.get('artists')), 181 'release_year': int_or_none(year), 182 'genre': album.get('genre'), 183 'disc_number': disc_number, 184 'track_number': track_number, 185 }) 186 187 track_artist = extract_artist(track.get('artists')) 188 if track_artist: 189 track_info.update({ 190 'artist': track_artist, 191 'title': '%s - %s' % (track_artist, track_title), 192 }) 193 else: 194 track_info['title'] = track_title 195 196 return track_info 197 198 199 class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): 200 def _extract_tracks(self, source, item_id, url, tld): 201 tracks = source['tracks'] 202 track_ids = [compat_str(track_id) for track_id in source['trackIds']] 203 204 # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks, 205 # missing tracks should be retrieved manually. 206 if len(tracks) < len(track_ids): 207 present_track_ids = set([ 208 compat_str(track['id']) 209 for track in tracks if track.get('id')]) 210 missing_track_ids = [ 211 track_id for track_id in track_ids 212 if track_id not in present_track_ids] 213 # Request missing tracks in chunks to avoid exceeding max HTTP header size, 214 # see https://github.com/ytdl-org/youtube-dl/issues/27355 215 _TRACKS_PER_CHUNK = 250 216 for chunk_num in itertools.count(0): 217 start = chunk_num * _TRACKS_PER_CHUNK 218 end = start + _TRACKS_PER_CHUNK 219 missing_track_ids_req = missing_track_ids[start:end] 220 assert missing_track_ids_req 221 missing_tracks = self._call_api( 222 'track-entries', tld, url, item_id, 223 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), { 224 'entries': ','.join(missing_track_ids_req), 225 'lang': tld, 226 'external-domain': 'music.yandex.%s' % tld, 227 'overembed': 'false', 228 'strict': 'true', 229 }) 230 if missing_tracks: 231 tracks.extend(missing_tracks) 232 if end >= len(missing_track_ids): 233 break 234 235 return tracks 236 237 def _build_playlist(self, tracks): 238 entries = [] 239 for track in tracks: 240 track_id = track.get('id') or track.get('realId') 241 if not track_id: 242 continue 243 albums = track.get('albums') 244 if not albums or not isinstance(albums, list): 245 continue 246 album = albums[0] 247 if not isinstance(album, dict): 248 continue 249 album_id = album.get('id') 250 if not album_id: 251 continue 252 entries.append(self.url_result( 253 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id), 254 ie=YandexMusicTrackIE.ie_key(), video_id=track_id)) 255 return entries 256 257 258 class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): 259 IE_NAME = 'yandexmusic:album' 260 IE_DESC = 'Яндекс.Музыка - Альбом' 261 _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE 262 263 _TESTS = [{ 264 'url': 'http://music.yandex.ru/album/540508', 265 'info_dict': { 266 'id': '540508', 267 'title': 'md5:7ed1c3567f28d14be9f61179116f5571', 268 }, 269 'playlist_count': 50, 270 # 'skip': 'Travis CI servers blocked by YandexMusic', 271 }, { 272 'url': 'https://music.yandex.ru/album/3840501', 273 'info_dict': { 274 'id': '3840501', 275 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f', 276 }, 277 'playlist_count': 33, 278 # 'skip': 'Travis CI servers blocked by YandexMusic', 279 }, { 280 # empty artists 281 'url': 'https://music.yandex.ru/album/9091882', 282 'info_dict': { 283 'id': '9091882', 284 'title': 'ТЕД на русском', 285 }, 286 'playlist_count': 187, 287 }] 288 289 @classmethod 290 def suitable(cls, url): 291 return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url) 292 293 def _real_extract(self, url): 294 mobj = re.match(self._VALID_URL, url) 295 tld = mobj.group('tld') 296 album_id = mobj.group('id') 297 298 album = self._call_api( 299 'album', tld, url, album_id, 'Downloading album JSON', 300 {'album': album_id}) 301 302 entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) 303 304 title = album['title'] 305 artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str) 306 if artist: 307 title = '%s - %s' % (artist, title) 308 year = album.get('year') 309 if year: 310 title += ' (%s)' % year 311 312 return self.playlist_result(entries, compat_str(album['id']), title) 313 314 315 class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): 316 IE_NAME = 'yandexmusic:playlist' 317 IE_DESC = 'Яндекс.Музыка - Плейлист' 318 _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE 319 320 _TESTS = [{ 321 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', 322 'info_dict': { 323 'id': '1245', 324 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097', 325 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', 326 }, 327 'playlist_count': 5, 328 # 'skip': 'Travis CI servers blocked by YandexMusic', 329 }, { 330 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036', 331 'only_matching': True, 332 }, { 333 # playlist exceeding the limit of 150 tracks (see 334 # https://github.com/ytdl-org/youtube-dl/issues/6666) 335 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364', 336 'info_dict': { 337 'id': '1364', 338 'title': 'md5:b3b400f997d3f878a13ae0699653f7db', 339 }, 340 'playlist_mincount': 437, 341 # 'skip': 'Travis CI servers blocked by YandexMusic', 342 }] 343 344 def _real_extract(self, url): 345 mobj = re.match(self._VALID_URL, url) 346 tld = mobj.group('tld') 347 user = mobj.group('user') 348 playlist_id = mobj.group('id') 349 350 playlist = self._call_api( 351 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', { 352 'owner': user, 353 'kinds': playlist_id, 354 'light': 'true', 355 'lang': tld, 356 'external-domain': 'music.yandex.%s' % tld, 357 'overembed': 'false', 358 })['playlist'] 359 360 tracks = self._extract_tracks(playlist, playlist_id, url, tld) 361 362 return self.playlist_result( 363 self._build_playlist(tracks), 364 compat_str(playlist_id), 365 playlist.get('title'), playlist.get('description')) 366 367 368 class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE): 369 def _call_artist(self, tld, url, artist_id): 370 return self._call_api( 371 'artist', tld, url, artist_id, 372 'Downloading artist %s JSON' % self._ARTIST_WHAT, { 373 'artist': artist_id, 374 'what': self._ARTIST_WHAT, 375 'sort': self._ARTIST_SORT or '', 376 'dir': '', 377 'period': '', 378 'lang': tld, 379 'external-domain': 'music.yandex.%s' % tld, 380 'overembed': 'false', 381 }) 382 383 def _real_extract(self, url): 384 mobj = re.match(self._VALID_URL, url) 385 tld = mobj.group('tld') 386 artist_id = mobj.group('id') 387 data = self._call_artist(tld, url, artist_id) 388 tracks = self._extract_tracks(data, artist_id, url, tld) 389 title = try_get(data, lambda x: x['artist']['name'], compat_str) 390 return self.playlist_result( 391 self._build_playlist(tracks), artist_id, title) 392 393 394 class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE): 395 IE_NAME = 'yandexmusic:artist:tracks' 396 IE_DESC = 'Яндекс.Музыка - Артист - Треки' 397 _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE 398 399 _TESTS = [{ 400 'url': 'https://music.yandex.ru/artist/617526/tracks', 401 'info_dict': { 402 'id': '617526', 403 'title': 'md5:131aef29d45fd5a965ca613e708c040b', 404 }, 405 'playlist_count': 507, 406 # 'skip': 'Travis CI servers blocked by YandexMusic', 407 }] 408 409 _ARTIST_SORT = '' 410 _ARTIST_WHAT = 'tracks' 411 412 def _real_extract(self, url): 413 mobj = re.match(self._VALID_URL, url) 414 tld = mobj.group('tld') 415 artist_id = mobj.group('id') 416 data = self._call_artist(tld, url, artist_id) 417 tracks = self._extract_tracks(data, artist_id, url, tld) 418 artist = try_get(data, lambda x: x['artist']['name'], compat_str) 419 title = '%s - %s' % (artist or artist_id, 'Треки') 420 return self.playlist_result( 421 self._build_playlist(tracks), artist_id, title) 422 423 424 class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE): 425 IE_NAME = 'yandexmusic:artist:albums' 426 IE_DESC = 'Яндекс.Музыка - Артист - Альбомы' 427 _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE 428 429 _TESTS = [{ 430 'url': 'https://music.yandex.ru/artist/617526/albums', 431 'info_dict': { 432 'id': '617526', 433 'title': 'md5:55dc58d5c85699b7fb41ee926700236c', 434 }, 435 'playlist_count': 8, 436 # 'skip': 'Travis CI servers blocked by YandexMusic', 437 }] 438 439 _ARTIST_SORT = 'year' 440 _ARTIST_WHAT = 'albums' 441 442 def _real_extract(self, url): 443 mobj = re.match(self._VALID_URL, url) 444 tld = mobj.group('tld') 445 artist_id = mobj.group('id') 446 data = self._call_artist(tld, url, artist_id) 447 entries = [] 448 for album in data['albums']: 449 if not isinstance(album, dict): 450 continue 451 album_id = album.get('id') 452 if not album_id: 453 continue 454 entries.append(self.url_result( 455 'http://music.yandex.ru/album/%s' % album_id, 456 ie=YandexMusicAlbumIE.ie_key(), video_id=album_id)) 457 artist = try_get(data, lambda x: x['artist']['name'], compat_str) 458 title = '%s - %s' % (artist or artist_id, 'Альбомы') 459 return self.playlist_result(entries, artist_id, title)