youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

neteasemusic.py (17336B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from hashlib import md5
      5 from base64 import b64encode
      6 from datetime import datetime
      7 import re
      8 
      9 from .common import InfoExtractor
     10 from ..compat import (
     11     compat_urllib_parse_urlencode,
     12     compat_str,
     13     compat_itertools_count,
     14 )
     15 from ..utils import (
     16     sanitized_Request,
     17     float_or_none,
     18 )
     19 
     20 
     21 class NetEaseMusicBaseIE(InfoExtractor):
     22     _FORMATS = ['bMusic', 'mMusic', 'hMusic']
     23     _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
     24     _API_BASE = 'http://music.163.com/api/'
     25 
     26     @classmethod
     27     def _encrypt(cls, dfsid):
     28         salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
     29         string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
     30         salt_len = len(salt_bytes)
     31         for i in range(len(string_bytes)):
     32             string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
     33         m = md5()
     34         m.update(bytes(string_bytes))
     35         result = b64encode(m.digest()).decode('ascii')
     36         return result.replace('/', '_').replace('+', '-')
     37 
     38     def extract_formats(self, info):
     39         formats = []
     40         for song_format in self._FORMATS:
     41             details = info.get(song_format)
     42             if not details:
     43                 continue
     44             song_file_path = '/%s/%s.%s' % (
     45                 self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
     46 
     47             # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
     48             # from NetEase's CDN provider that can be used if m5.music.126.net does not
     49             # work, especially for users outside of Mainland China
     50             # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
     51             for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
     52                          'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
     53                 song_url = host + song_file_path
     54                 if self._is_valid_url(song_url, info['id'], 'song'):
     55                     formats.append({
     56                         'url': song_url,
     57                         'ext': details.get('extension'),
     58                         'abr': float_or_none(details.get('bitrate'), scale=1000),
     59                         'format_id': song_format,
     60                         'filesize': details.get('size'),
     61                         'asr': details.get('sr')
     62                     })
     63                     break
     64         return formats
     65 
     66     @classmethod
     67     def convert_milliseconds(cls, ms):
     68         return int(round(ms / 1000.0))
     69 
     70     def query_api(self, endpoint, video_id, note):
     71         req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
     72         req.add_header('Referer', self._API_BASE)
     73         return self._download_json(req, video_id, note)
     74 
     75 
     76 class NetEaseMusicIE(NetEaseMusicBaseIE):
     77     IE_NAME = 'netease:song'
     78     IE_DESC = '网易云音乐'
     79     _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
     80     _TESTS = [{
     81         'url': 'http://music.163.com/#/song?id=32102397',
     82         'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
     83         'info_dict': {
     84             'id': '32102397',
     85             'ext': 'mp3',
     86             'title': 'Bad Blood (feat. Kendrick Lamar)',
     87             'creator': 'Taylor Swift / Kendrick Lamar',
     88             'upload_date': '20150517',
     89             'timestamp': 1431878400,
     90             'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
     91         },
     92         'skip': 'Blocked outside Mainland China',
     93     }, {
     94         'note': 'No lyrics translation.',
     95         'url': 'http://music.163.com/#/song?id=29822014',
     96         'info_dict': {
     97             'id': '29822014',
     98             'ext': 'mp3',
     99             'title': '听见下雨的声音',
    100             'creator': '周杰伦',
    101             'upload_date': '20141225',
    102             'timestamp': 1419523200,
    103             'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
    104         },
    105         'skip': 'Blocked outside Mainland China',
    106     }, {
    107         'note': 'No lyrics.',
    108         'url': 'http://music.163.com/song?id=17241424',
    109         'info_dict': {
    110             'id': '17241424',
    111             'ext': 'mp3',
    112             'title': 'Opus 28',
    113             'creator': 'Dustin O\'Halloran',
    114             'upload_date': '20080211',
    115             'timestamp': 1202745600,
    116         },
    117         'skip': 'Blocked outside Mainland China',
    118     }, {
    119         'note': 'Has translated name.',
    120         'url': 'http://music.163.com/#/song?id=22735043',
    121         'info_dict': {
    122             'id': '22735043',
    123             'ext': 'mp3',
    124             'title': '소원을 말해봐 (Genie)',
    125             'creator': '少女时代',
    126             'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
    127             'upload_date': '20100127',
    128             'timestamp': 1264608000,
    129             'alt_title': '说出愿望吧(Genie)',
    130         },
    131         'skip': 'Blocked outside Mainland China',
    132     }]
    133 
    134     def _process_lyrics(self, lyrics_info):
    135         original = lyrics_info.get('lrc', {}).get('lyric')
    136         translated = lyrics_info.get('tlyric', {}).get('lyric')
    137 
    138         if not translated:
    139             return original
    140 
    141         lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
    142         original_ts_texts = re.findall(lyrics_expr, original)
    143         translation_ts_dict = dict(
    144             (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
    145         )
    146         lyrics = '\n'.join([
    147             '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
    148             for time_stamp, text in original_ts_texts
    149         ])
    150         return lyrics
    151 
    152     def _real_extract(self, url):
    153         song_id = self._match_id(url)
    154 
    155         params = {
    156             'id': song_id,
    157             'ids': '[%s]' % song_id
    158         }
    159         info = self.query_api(
    160             'song/detail?' + compat_urllib_parse_urlencode(params),
    161             song_id, 'Downloading song info')['songs'][0]
    162 
    163         formats = self.extract_formats(info)
    164         self._sort_formats(formats)
    165 
    166         lyrics_info = self.query_api(
    167             'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
    168             song_id, 'Downloading lyrics data')
    169         lyrics = self._process_lyrics(lyrics_info)
    170 
    171         alt_title = None
    172         if info.get('transNames'):
    173             alt_title = '/'.join(info.get('transNames'))
    174 
    175         return {
    176             'id': song_id,
    177             'title': info['name'],
    178             'alt_title': alt_title,
    179             'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
    180             'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
    181             'thumbnail': info.get('album', {}).get('picUrl'),
    182             'duration': self.convert_milliseconds(info.get('duration', 0)),
    183             'description': lyrics,
    184             'formats': formats,
    185         }
    186 
    187 
    188 class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
    189     IE_NAME = 'netease:album'
    190     IE_DESC = '网易云音乐 - 专辑'
    191     _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
    192     _TEST = {
    193         'url': 'http://music.163.com/#/album?id=220780',
    194         'info_dict': {
    195             'id': '220780',
    196             'title': 'B\'day',
    197         },
    198         'playlist_count': 23,
    199         'skip': 'Blocked outside Mainland China',
    200     }
    201 
    202     def _real_extract(self, url):
    203         album_id = self._match_id(url)
    204 
    205         info = self.query_api(
    206             'album/%s?id=%s' % (album_id, album_id),
    207             album_id, 'Downloading album data')['album']
    208 
    209         name = info['name']
    210         desc = info.get('description')
    211         entries = [
    212             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
    213                             'NetEaseMusic', song['id'])
    214             for song in info['songs']
    215         ]
    216         return self.playlist_result(entries, album_id, name, desc)
    217 
    218 
    219 class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
    220     IE_NAME = 'netease:singer'
    221     IE_DESC = '网易云音乐 - 歌手'
    222     _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
    223     _TESTS = [{
    224         'note': 'Singer has aliases.',
    225         'url': 'http://music.163.com/#/artist?id=10559',
    226         'info_dict': {
    227             'id': '10559',
    228             'title': '张惠妹 - aMEI;阿密特',
    229         },
    230         'playlist_count': 50,
    231         'skip': 'Blocked outside Mainland China',
    232     }, {
    233         'note': 'Singer has translated name.',
    234         'url': 'http://music.163.com/#/artist?id=124098',
    235         'info_dict': {
    236             'id': '124098',
    237             'title': '李昇基 - 이승기',
    238         },
    239         'playlist_count': 50,
    240         'skip': 'Blocked outside Mainland China',
    241     }]
    242 
    243     def _real_extract(self, url):
    244         singer_id = self._match_id(url)
    245 
    246         info = self.query_api(
    247             'artist/%s?id=%s' % (singer_id, singer_id),
    248             singer_id, 'Downloading singer data')
    249 
    250         name = info['artist']['name']
    251         if info['artist']['trans']:
    252             name = '%s - %s' % (name, info['artist']['trans'])
    253         if info['artist']['alias']:
    254             name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
    255 
    256         entries = [
    257             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
    258                             'NetEaseMusic', song['id'])
    259             for song in info['hotSongs']
    260         ]
    261         return self.playlist_result(entries, singer_id, name)
    262 
    263 
    264 class NetEaseMusicListIE(NetEaseMusicBaseIE):
    265     IE_NAME = 'netease:playlist'
    266     IE_DESC = '网易云音乐 - 歌单'
    267     _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
    268     _TESTS = [{
    269         'url': 'http://music.163.com/#/playlist?id=79177352',
    270         'info_dict': {
    271             'id': '79177352',
    272             'title': 'Billboard 2007 Top 100',
    273             'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
    274         },
    275         'playlist_count': 99,
    276         'skip': 'Blocked outside Mainland China',
    277     }, {
    278         'note': 'Toplist/Charts sample',
    279         'url': 'http://music.163.com/#/discover/toplist?id=3733003',
    280         'info_dict': {
    281             'id': '3733003',
    282             'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
    283             'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
    284         },
    285         'playlist_count': 50,
    286         'skip': 'Blocked outside Mainland China',
    287     }]
    288 
    289     def _real_extract(self, url):
    290         list_id = self._match_id(url)
    291 
    292         info = self.query_api(
    293             'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
    294             list_id, 'Downloading playlist data')['result']
    295 
    296         name = info['name']
    297         desc = info.get('description')
    298 
    299         if info.get('specialType') == 10:  # is a chart/toplist
    300             datestamp = datetime.fromtimestamp(
    301                 self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
    302             name = '%s %s' % (name, datestamp)
    303 
    304         entries = [
    305             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
    306                             'NetEaseMusic', song['id'])
    307             for song in info['tracks']
    308         ]
    309         return self.playlist_result(entries, list_id, name, desc)
    310 
    311 
    312 class NetEaseMusicMvIE(NetEaseMusicBaseIE):
    313     IE_NAME = 'netease:mv'
    314     IE_DESC = '网易云音乐 - MV'
    315     _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
    316     _TEST = {
    317         'url': 'http://music.163.com/#/mv?id=415350',
    318         'info_dict': {
    319             'id': '415350',
    320             'ext': 'mp4',
    321             'title': '이럴거면 그러지말지',
    322             'description': '白雅言自作曲唱甜蜜爱情',
    323             'creator': '白雅言',
    324             'upload_date': '20150520',
    325         },
    326         'skip': 'Blocked outside Mainland China',
    327     }
    328 
    329     def _real_extract(self, url):
    330         mv_id = self._match_id(url)
    331 
    332         info = self.query_api(
    333             'mv/detail?id=%s&type=mp4' % mv_id,
    334             mv_id, 'Downloading mv info')['data']
    335 
    336         formats = [
    337             {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
    338             for brs, mv_url in info['brs'].items()
    339         ]
    340         self._sort_formats(formats)
    341 
    342         return {
    343             'id': mv_id,
    344             'title': info['name'],
    345             'description': info.get('desc') or info.get('briefDesc'),
    346             'creator': info['artistName'],
    347             'upload_date': info['publishTime'].replace('-', ''),
    348             'formats': formats,
    349             'thumbnail': info.get('cover'),
    350             'duration': self.convert_milliseconds(info.get('duration', 0)),
    351         }
    352 
    353 
    354 class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
    355     IE_NAME = 'netease:program'
    356     IE_DESC = '网易云音乐 - 电台节目'
    357     _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
    358     _TESTS = [{
    359         'url': 'http://music.163.com/#/program?id=10109055',
    360         'info_dict': {
    361             'id': '10109055',
    362             'ext': 'mp3',
    363             'title': '不丹足球背后的故事',
    364             'description': '喜马拉雅人的足球梦 ...',
    365             'creator': '大话西藏',
    366             'timestamp': 1434179342,
    367             'upload_date': '20150613',
    368             'duration': 900,
    369         },
    370         'skip': 'Blocked outside Mainland China',
    371     }, {
    372         'note': 'This program has accompanying songs.',
    373         'url': 'http://music.163.com/#/program?id=10141022',
    374         'info_dict': {
    375             'id': '10141022',
    376             'title': '25岁,你是自在如风的少年<27°C>',
    377             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
    378         },
    379         'playlist_count': 4,
    380         'skip': 'Blocked outside Mainland China',
    381     }, {
    382         'note': 'This program has accompanying songs.',
    383         'url': 'http://music.163.com/#/program?id=10141022',
    384         'info_dict': {
    385             'id': '10141022',
    386             'ext': 'mp3',
    387             'title': '25岁,你是自在如风的少年<27°C>',
    388             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
    389             'timestamp': 1434450841,
    390             'upload_date': '20150616',
    391         },
    392         'params': {
    393             'noplaylist': True
    394         },
    395         'skip': 'Blocked outside Mainland China',
    396     }]
    397 
    398     def _real_extract(self, url):
    399         program_id = self._match_id(url)
    400 
    401         info = self.query_api(
    402             'dj/program/detail?id=%s' % program_id,
    403             program_id, 'Downloading program info')['program']
    404 
    405         name = info['name']
    406         description = info['description']
    407 
    408         if not info['songs'] or self._downloader.params.get('noplaylist'):
    409             if info['songs']:
    410                 self.to_screen(
    411                     'Downloading just the main audio %s because of --no-playlist'
    412                     % info['mainSong']['id'])
    413 
    414             formats = self.extract_formats(info['mainSong'])
    415             self._sort_formats(formats)
    416 
    417             return {
    418                 'id': program_id,
    419                 'title': name,
    420                 'description': description,
    421                 'creator': info['dj']['brand'],
    422                 'timestamp': self.convert_milliseconds(info['createTime']),
    423                 'thumbnail': info['coverUrl'],
    424                 'duration': self.convert_milliseconds(info.get('duration', 0)),
    425                 'formats': formats,
    426             }
    427 
    428         self.to_screen(
    429             'Downloading playlist %s - add --no-playlist to just download the main audio %s'
    430             % (program_id, info['mainSong']['id']))
    431 
    432         song_ids = [info['mainSong']['id']]
    433         song_ids.extend([song['id'] for song in info['songs']])
    434         entries = [
    435             self.url_result('http://music.163.com/#/song?id=%s' % song_id,
    436                             'NetEaseMusic', song_id)
    437             for song_id in song_ids
    438         ]
    439         return self.playlist_result(entries, program_id, name, description)
    440 
    441 
    442 class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
    443     IE_NAME = 'netease:djradio'
    444     IE_DESC = '网易云音乐 - 电台'
    445     _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
    446     _TEST = {
    447         'url': 'http://music.163.com/#/djradio?id=42',
    448         'info_dict': {
    449             'id': '42',
    450             'title': '声音蔓延',
    451             'description': 'md5:766220985cbd16fdd552f64c578a6b15'
    452         },
    453         'playlist_mincount': 40,
    454         'skip': 'Blocked outside Mainland China',
    455     }
    456     _PAGE_SIZE = 1000
    457 
    458     def _real_extract(self, url):
    459         dj_id = self._match_id(url)
    460 
    461         name = None
    462         desc = None
    463         entries = []
    464         for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
    465             info = self.query_api(
    466                 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
    467                 % (self._PAGE_SIZE, dj_id, offset),
    468                 dj_id, 'Downloading dj programs - %d' % offset)
    469 
    470             entries.extend([
    471                 self.url_result(
    472                     'http://music.163.com/#/program?id=%s' % program['id'],
    473                     'NetEaseMusicProgram', program['id'])
    474                 for program in info['programs']
    475             ])
    476 
    477             if name is None:
    478                 radio = info['programs'][0]['radio']
    479                 name = radio['name']
    480                 desc = radio['desc']
    481 
    482             if not info['more']:
    483                 break
    484 
    485         return self.playlist_result(entries, dj_id, name, desc)