soundcloud.py - youtube-dl - Another place where youtube-dl lives on

soundcloud.py (30076B)
      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import itertools
      5 import re
      6 
      7 from .common import (
      8     InfoExtractor,
      9     SearchInfoExtractor
     10 )
     11 from ..compat import (
     12     compat_HTTPError,
     13     compat_kwargs,
     14     compat_str,
     15     compat_urlparse,
     16 )
     17 from ..utils import (
     18     error_to_compat_str,
     19     ExtractorError,
     20     float_or_none,
     21     HEADRequest,
     22     int_or_none,
     23     KNOWN_EXTENSIONS,
     24     mimetype2ext,
     25     str_or_none,
     26     try_get,
     27     unified_timestamp,
     28     update_url_query,
     29     url_or_none,
     30     urlhandle_detect_ext,
     31 )
     32 
     33 
     34 class SoundcloudEmbedIE(InfoExtractor):
     35     _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
     36     _TEST = {
     37         # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
     38         'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
     39         'only_matching': True,
     40     }
     41 
     42     @staticmethod
     43     def _extract_urls(webpage):
     44         return [m.group('url') for m in re.finditer(
     45             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
     46             webpage)]
     47 
     48     def _real_extract(self, url):
     49         query = compat_urlparse.parse_qs(
     50             compat_urlparse.urlparse(url).query)
     51         api_url = query['url'][0]
     52         secret_token = query.get('secret_token')
     53         if secret_token:
     54             api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
     55         return self.url_result(api_url)
     56 
     57 
     58 class SoundcloudIE(InfoExtractor):
     59     """Information extractor for soundcloud.com
     60        To access the media, the uid of the song and a stream token
     61        must be extracted from the page source and the script must make
     62        a request to media.soundcloud.com/crossdomain.xml. Then
     63        the media can be grabbed by requesting from an url composed
     64        of the stream token and uid
     65      """
     66 
     67     _VALID_URL = r'''(?x)^(?:https?://)?
     68                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
     69                             (?!stations/track)
     70                             (?P<uploader>[\w\d-]+)/
     71                             (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
     72                             (?P<title>[\w\d-]+)/?
     73                             (?P<token>[^?]+?)?(?:[?].*)?$)
     74                        |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
     75                           (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
     76                     )
     77                     '''
     78     IE_NAME = 'soundcloud'
     79     _TESTS = [
     80         {
     81             'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
     82             'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
     83             'info_dict': {
     84                 'id': '62986583',
     85                 'ext': 'mp3',
     86                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
     87                 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
     88                 'uploader': 'E.T. ExTerrestrial Music',
     89                 'uploader_id': '1571244',
     90                 'timestamp': 1349920598,
     91                 'upload_date': '20121011',
     92                 'duration': 143.216,
     93                 'license': 'all-rights-reserved',
     94                 'view_count': int,
     95                 'like_count': int,
     96                 'comment_count': int,
     97                 'repost_count': int,
     98             }
     99         },
    100         # geo-restricted
    101         {
    102             'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
    103             'info_dict': {
    104                 'id': '47127627',
    105                 'ext': 'mp3',
    106                 'title': 'Goldrushed',
    107                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
    108                 'uploader': 'The Royal Concept',
    109                 'uploader_id': '9615865',
    110                 'timestamp': 1337635207,
    111                 'upload_date': '20120521',
    112                 'duration': 227.155,
    113                 'license': 'all-rights-reserved',
    114                 'view_count': int,
    115                 'like_count': int,
    116                 'comment_count': int,
    117                 'repost_count': int,
    118             },
    119         },
    120         # private link
    121         {
    122             'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
    123             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
    124             'info_dict': {
    125                 'id': '123998367',
    126                 'ext': 'mp3',
    127                 'title': 'Youtube - Dl Test Video \'\' Ä↭',
    128                 'description': 'test chars:  \"\'/\\ä↭',
    129                 'uploader': 'jaimeMF',
    130                 'uploader_id': '69767071',
    131                 'timestamp': 1386604920,
    132                 'upload_date': '20131209',
    133                 'duration': 9.927,
    134                 'license': 'all-rights-reserved',
    135                 'view_count': int,
    136                 'like_count': int,
    137                 'comment_count': int,
    138                 'repost_count': int,
    139             },
    140         },
    141         # private link (alt format)
    142         {
    143             'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
    144             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
    145             'info_dict': {
    146                 'id': '123998367',
    147                 'ext': 'mp3',
    148                 'title': 'Youtube - Dl Test Video \'\' Ä↭',
    149                 'description': 'test chars:  \"\'/\\ä↭',
    150                 'uploader': 'jaimeMF',
    151                 'uploader_id': '69767071',
    152                 'timestamp': 1386604920,
    153                 'upload_date': '20131209',
    154                 'duration': 9.927,
    155                 'license': 'all-rights-reserved',
    156                 'view_count': int,
    157                 'like_count': int,
    158                 'comment_count': int,
    159                 'repost_count': int,
    160             },
    161         },
    162         # downloadable song
    163         {
    164             'url': 'https://soundcloud.com/oddsamples/bus-brakes',
    165             'md5': '7624f2351f8a3b2e7cd51522496e7631',
    166             'info_dict': {
    167                 'id': '128590877',
    168                 'ext': 'mp3',
    169                 'title': 'Bus Brakes',
    170                 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
    171                 'uploader': 'oddsamples',
    172                 'uploader_id': '73680509',
    173                 'timestamp': 1389232924,
    174                 'upload_date': '20140109',
    175                 'duration': 17.346,
    176                 'license': 'cc-by-sa',
    177                 'view_count': int,
    178                 'like_count': int,
    179                 'comment_count': int,
    180                 'repost_count': int,
    181             },
    182         },
    183         # private link, downloadable format
    184         {
    185             'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
    186             'md5': '64a60b16e617d41d0bef032b7f55441e',
    187             'info_dict': {
    188                 'id': '340344461',
    189                 'ext': 'wav',
    190                 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
    191                 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
    192                 'uploader': 'Ori Uplift Music',
    193                 'uploader_id': '12563093',
    194                 'timestamp': 1504206263,
    195                 'upload_date': '20170831',
    196                 'duration': 7449.096,
    197                 'license': 'all-rights-reserved',
    198                 'view_count': int,
    199                 'like_count': int,
    200                 'comment_count': int,
    201                 'repost_count': int,
    202             },
    203         },
    204         # no album art, use avatar pic for thumbnail
    205         {
    206             'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
    207             'md5': '59c7872bc44e5d99b7211891664760c2',
    208             'info_dict': {
    209                 'id': '309699954',
    210                 'ext': 'mp3',
    211                 'title': 'Sideways (Prod. Mad Real)',
    212                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
    213                 'uploader': 'garyvee',
    214                 'uploader_id': '2366352',
    215                 'timestamp': 1488152409,
    216                 'upload_date': '20170226',
    217                 'duration': 207.012,
    218                 'thumbnail': r're:https?://.*\.jpg',
    219                 'license': 'all-rights-reserved',
    220                 'view_count': int,
    221                 'like_count': int,
    222                 'comment_count': int,
    223                 'repost_count': int,
    224             },
    225             'params': {
    226                 'skip_download': True,
    227             },
    228         },
    229         {
    230             'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
    231             'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
    232             'info_dict': {
    233                 'id': '583011102',
    234                 'ext': 'mp3',
    235                 'title': 'Mezzo Valzer',
    236                 'description': 'md5:4138d582f81866a530317bae316e8b61',
    237                 'uploader': 'Micronie',
    238                 'uploader_id': '3352531',
    239                 'timestamp': 1551394171,
    240                 'upload_date': '20190228',
    241                 'duration': 180.157,
    242                 'thumbnail': r're:https?://.*\.jpg',
    243                 'license': 'all-rights-reserved',
    244                 'view_count': int,
    245                 'like_count': int,
    246                 'comment_count': int,
    247                 'repost_count': int,
    248             },
    249         },
    250         {
    251             # with AAC HQ format available via OAuth token
    252             'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
    253             'only_matching': True,
    254         },
    255     ]
    256 
    257     _API_V2_BASE = 'https://api-v2.soundcloud.com/'
    258     _BASE_URL = 'https://soundcloud.com/'
    259     _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
    260 
    261     _ARTWORK_MAP = {
    262         'mini': 16,
    263         'tiny': 20,
    264         'small': 32,
    265         'badge': 47,
    266         't67x67': 67,
    267         'large': 100,
    268         't300x300': 300,
    269         'crop': 400,
    270         't500x500': 500,
    271         'original': 0,
    272     }
    273 
    274     def _store_client_id(self, client_id):
    275         self._downloader.cache.store('soundcloud', 'client_id', client_id)
    276 
    277     def _update_client_id(self):
    278         webpage = self._download_webpage('https://soundcloud.com/', None)
    279         for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
    280             script = self._download_webpage(src, None, fatal=False)
    281             if script:
    282                 client_id = self._search_regex(
    283                     r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
    284                     script, 'client id', default=None)
    285                 if client_id:
    286                     self._CLIENT_ID = client_id
    287                     self._store_client_id(client_id)
    288                     return
    289         raise ExtractorError('Unable to extract client id')
    290 
    291     def _download_json(self, *args, **kwargs):
    292         non_fatal = kwargs.get('fatal') is False
    293         if non_fatal:
    294             del kwargs['fatal']
    295         query = kwargs.get('query', {}).copy()
    296         for _ in range(2):
    297             query['client_id'] = self._CLIENT_ID
    298             kwargs['query'] = query
    299             try:
    300                 return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
    301             except ExtractorError as e:
    302                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
    303                     self._store_client_id(None)
    304                     self._update_client_id()
    305                     continue
    306                 elif non_fatal:
    307                     self._downloader.report_warning(error_to_compat_str(e))
    308                     return False
    309                 raise
    310 
    311     def _real_initialize(self):
    312         self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
    313 
    314     @classmethod
    315     def _resolv_url(cls, url):
    316         return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
    317 
    318     def _extract_info_dict(self, info, full_title=None, secret_token=None):
    319         track_id = compat_str(info['id'])
    320         title = info['title']
    321 
    322         format_urls = set()
    323         formats = []
    324         query = {'client_id': self._CLIENT_ID}
    325         if secret_token:
    326             query['secret_token'] = secret_token
    327 
    328         if info.get('downloadable') and info.get('has_downloads_left'):
    329             download_url = update_url_query(
    330                 self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
    331             redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
    332             if redirect_url:
    333                 urlh = self._request_webpage(
    334                     HEADRequest(redirect_url), track_id, fatal=False)
    335                 if urlh:
    336                     format_url = urlh.geturl()
    337                     format_urls.add(format_url)
    338                     formats.append({
    339                         'format_id': 'download',
    340                         'ext': urlhandle_detect_ext(urlh) or 'mp3',
    341                         'filesize': int_or_none(urlh.headers.get('Content-Length')),
    342                         'url': format_url,
    343                         'preference': 10,
    344                     })
    345 
    346         def invalid_url(url):
    347             return not url or url in format_urls
    348 
    349         def add_format(f, protocol, is_preview=False):
    350             mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
    351             if mobj:
    352                 for k, v in mobj.groupdict().items():
    353                     if not f.get(k):
    354                         f[k] = v
    355             format_id_list = []
    356             if protocol:
    357                 format_id_list.append(protocol)
    358             ext = f.get('ext')
    359             if ext == 'aac':
    360                 f['abr'] = '256'
    361             for k in ('ext', 'abr'):
    362                 v = f.get(k)
    363                 if v:
    364                     format_id_list.append(v)
    365             preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
    366             if preview:
    367                 format_id_list.append('preview')
    368             abr = f.get('abr')
    369             if abr:
    370                 f['abr'] = int(abr)
    371             if protocol == 'hls':
    372                 protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
    373             else:
    374                 protocol = 'http'
    375             f.update({
    376                 'format_id': '_'.join(format_id_list),
    377                 'protocol': protocol,
    378                 'preference': -10 if preview else None,
    379             })
    380             formats.append(f)
    381 
    382         # New API
    383         transcodings = try_get(
    384             info, lambda x: x['media']['transcodings'], list) or []
    385         for t in transcodings:
    386             if not isinstance(t, dict):
    387                 continue
    388             format_url = url_or_none(t.get('url'))
    389             if not format_url:
    390                 continue
    391             stream = self._download_json(
    392                 format_url, track_id, query=query, fatal=False)
    393             if not isinstance(stream, dict):
    394                 continue
    395             stream_url = url_or_none(stream.get('url'))
    396             if invalid_url(stream_url):
    397                 continue
    398             format_urls.add(stream_url)
    399             stream_format = t.get('format') or {}
    400             protocol = stream_format.get('protocol')
    401             if protocol != 'hls' and '/hls' in format_url:
    402                 protocol = 'hls'
    403             ext = None
    404             preset = str_or_none(t.get('preset'))
    405             if preset:
    406                 ext = preset.split('_')[0]
    407             if ext not in KNOWN_EXTENSIONS:
    408                 ext = mimetype2ext(stream_format.get('mime_type'))
    409             add_format({
    410                 'url': stream_url,
    411                 'ext': ext,
    412             }, 'http' if protocol == 'progressive' else protocol,
    413                 t.get('snipped') or '/preview/' in format_url)
    414 
    415         for f in formats:
    416             f['vcodec'] = 'none'
    417 
    418         if not formats and info.get('policy') == 'BLOCK':
    419             self.raise_geo_restricted()
    420         self._sort_formats(formats)
    421 
    422         user = info.get('user') or {}
    423 
    424         thumbnails = []
    425         artwork_url = info.get('artwork_url')
    426         thumbnail = artwork_url or user.get('avatar_url')
    427         if isinstance(thumbnail, compat_str):
    428             if re.search(self._IMAGE_REPL_RE, thumbnail):
    429                 for image_id, size in self._ARTWORK_MAP.items():
    430                     i = {
    431                         'id': image_id,
    432                         'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
    433                     }
    434                     if image_id == 'tiny' and not artwork_url:
    435                         size = 18
    436                     elif image_id == 'original':
    437                         i['preference'] = 10
    438                     if size:
    439                         i.update({
    440                             'width': size,
    441                             'height': size,
    442                         })
    443                     thumbnails.append(i)
    444             else:
    445                 thumbnails = [{'url': thumbnail}]
    446 
    447         def extract_count(key):
    448             return int_or_none(info.get('%s_count' % key))
    449 
    450         return {
    451             'id': track_id,
    452             'uploader': user.get('username'),
    453             'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
    454             'uploader_url': user.get('permalink_url'),
    455             'timestamp': unified_timestamp(info.get('created_at')),
    456             'title': title,
    457             'description': info.get('description'),
    458             'thumbnails': thumbnails,
    459             'duration': float_or_none(info.get('duration'), 1000),
    460             'webpage_url': info.get('permalink_url'),
    461             'license': info.get('license'),
    462             'view_count': extract_count('playback'),
    463             'like_count': extract_count('favoritings') or extract_count('likes'),
    464             'comment_count': extract_count('comment'),
    465             'repost_count': extract_count('reposts'),
    466             'genre': info.get('genre'),
    467             'formats': formats
    468         }
    469 
    470     def _real_extract(self, url):
    471         mobj = re.match(self._VALID_URL, url)
    472 
    473         track_id = mobj.group('track_id')
    474 
    475         query = {}
    476         if track_id:
    477             info_json_url = self._API_V2_BASE + 'tracks/' + track_id
    478             full_title = track_id
    479             token = mobj.group('secret_token')
    480             if token:
    481                 query['secret_token'] = token
    482         else:
    483             full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
    484             token = mobj.group('token')
    485             if token:
    486                 resolve_title += '/%s' % token
    487             info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
    488 
    489         info = self._download_json(
    490             info_json_url, full_title, 'Downloading info JSON', query=query)
    491 
    492         return self._extract_info_dict(info, full_title, token)
    493 
    494 
    495 class SoundcloudPlaylistBaseIE(SoundcloudIE):
    496     def _extract_set(self, playlist, token=None):
    497         playlist_id = compat_str(playlist['id'])
    498         tracks = playlist.get('tracks') or []
    499         if not all([t.get('permalink_url') for t in tracks]) and token:
    500             tracks = self._download_json(
    501                 self._API_V2_BASE + 'tracks', playlist_id,
    502                 'Downloading tracks', query={
    503                     'ids': ','.join([compat_str(t['id']) for t in tracks]),
    504                     'playlistId': playlist_id,
    505                     'playlistSecretToken': token,
    506                 })
    507         entries = []
    508         for track in tracks:
    509             track_id = str_or_none(track.get('id'))
    510             url = track.get('permalink_url')
    511             if not url:
    512                 if not track_id:
    513                     continue
    514                 url = self._API_V2_BASE + 'tracks/' + track_id
    515                 if token:
    516                     url += '?secret_token=' + token
    517             entries.append(self.url_result(
    518                 url, SoundcloudIE.ie_key(), track_id))
    519         return self.playlist_result(
    520             entries, playlist_id,
    521             playlist.get('title'),
    522             playlist.get('description'))
    523 
    524 
    525 class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
    526     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
    527     IE_NAME = 'soundcloud:set'
    528     _TESTS = [{
    529         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
    530         'info_dict': {
    531             'id': '2284613',
    532             'title': 'The Royal Concept EP',
    533             'description': 'md5:71d07087c7a449e8941a70a29e34671e',
    534         },
    535         'playlist_mincount': 5,
    536     }, {
    537         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
    538         'only_matching': True,
    539     }]
    540 
    541     def _real_extract(self, url):
    542         mobj = re.match(self._VALID_URL, url)
    543 
    544         full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
    545         token = mobj.group('token')
    546         if token:
    547             full_title += '/' + token
    548 
    549         info = self._download_json(self._resolv_url(
    550             self._BASE_URL + full_title), full_title)
    551 
    552         if 'errors' in info:
    553             msgs = (compat_str(err['error_message']) for err in info['errors'])
    554             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
    555 
    556         return self._extract_set(info, token)
    557 
    558 
    559 class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
    560     def _extract_playlist(self, base_url, playlist_id, playlist_title):
    561         # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
    562         # https://developers.soundcloud.com/blog/offset-pagination-deprecated
    563         COMMON_QUERY = {
    564             'limit': 200,
    565             'linked_partitioning': '1',
    566         }
    567 
    568         query = COMMON_QUERY.copy()
    569         query['offset'] = 0
    570 
    571         next_href = base_url
    572 
    573         entries = []
    574         for i in itertools.count():
    575             response = self._download_json(
    576                 next_href, playlist_id,
    577                 'Downloading track page %s' % (i + 1), query=query)
    578 
    579             collection = response['collection']
    580 
    581             if not isinstance(collection, list):
    582                 collection = []
    583 
    584             # Empty collection may be returned, in this case we proceed
    585             # straight to next_href
    586 
    587             def resolve_entry(candidates):
    588                 for cand in candidates:
    589                     if not isinstance(cand, dict):
    590                         continue
    591                     permalink_url = url_or_none(cand.get('permalink_url'))
    592                     if not permalink_url:
    593                         continue
    594                     return self.url_result(
    595                         permalink_url,
    596                         SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
    597                         str_or_none(cand.get('id')), cand.get('title'))
    598 
    599             for e in collection:
    600                 entry = resolve_entry((e, e.get('track'), e.get('playlist')))
    601                 if entry:
    602                     entries.append(entry)
    603 
    604             next_href = response.get('next_href')
    605             if not next_href:
    606                 break
    607 
    608             next_href = response['next_href']
    609             parsed_next_href = compat_urlparse.urlparse(next_href)
    610             query = compat_urlparse.parse_qs(parsed_next_href.query)
    611             query.update(COMMON_QUERY)
    612 
    613         return {
    614             '_type': 'playlist',
    615             'id': playlist_id,
    616             'title': playlist_title,
    617             'entries': entries,
    618         }
    619 
    620 
    621 class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
    622     _VALID_URL = r'''(?x)
    623                         https?://
    624                             (?:(?:www|m)\.)?soundcloud\.com/
    625                             (?P<user>[^/]+)
    626                             (?:/
    627                                 (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
    628                             )?
    629                             /?(?:[?#].*)?$
    630                     '''
    631     IE_NAME = 'soundcloud:user'
    632     _TESTS = [{
    633         'url': 'https://soundcloud.com/soft-cell-official',
    634         'info_dict': {
    635             'id': '207965082',
    636             'title': 'Soft Cell (All)',
    637         },
    638         'playlist_mincount': 28,
    639     }, {
    640         'url': 'https://soundcloud.com/soft-cell-official/tracks',
    641         'info_dict': {
    642             'id': '207965082',
    643             'title': 'Soft Cell (Tracks)',
    644         },
    645         'playlist_mincount': 27,
    646     }, {
    647         'url': 'https://soundcloud.com/soft-cell-official/albums',
    648         'info_dict': {
    649             'id': '207965082',
    650             'title': 'Soft Cell (Albums)',
    651         },
    652         'playlist_mincount': 1,
    653     }, {
    654         'url': 'https://soundcloud.com/jcv246/sets',
    655         'info_dict': {
    656             'id': '12982173',
    657             'title': 'Jordi / cv (Sets)',
    658         },
    659         'playlist_mincount': 2,
    660     }, {
    661         'url': 'https://soundcloud.com/jcv246/reposts',
    662         'info_dict': {
    663             'id': '12982173',
    664             'title': 'Jordi / cv (Reposts)',
    665         },
    666         'playlist_mincount': 6,
    667     }, {
    668         'url': 'https://soundcloud.com/clalberg/likes',
    669         'info_dict': {
    670             'id': '11817582',
    671             'title': 'clalberg (Likes)',
    672         },
    673         'playlist_mincount': 5,
    674     }, {
    675         'url': 'https://soundcloud.com/grynpyret/spotlight',
    676         'info_dict': {
    677             'id': '7098329',
    678             'title': 'Grynpyret (Spotlight)',
    679         },
    680         'playlist_mincount': 1,
    681     }]
    682 
    683     _BASE_URL_MAP = {
    684         'all': 'stream/users/%s',
    685         'tracks': 'users/%s/tracks',
    686         'albums': 'users/%s/albums',
    687         'sets': 'users/%s/playlists',
    688         'reposts': 'stream/users/%s/reposts',
    689         'likes': 'users/%s/likes',
    690         'spotlight': 'users/%s/spotlight',
    691     }
    692 
    693     def _real_extract(self, url):
    694         mobj = re.match(self._VALID_URL, url)
    695         uploader = mobj.group('user')
    696 
    697         user = self._download_json(
    698             self._resolv_url(self._BASE_URL + uploader),
    699             uploader, 'Downloading user info')
    700 
    701         resource = mobj.group('rsrc') or 'all'
    702 
    703         return self._extract_playlist(
    704             self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
    705             str_or_none(user.get('id')),
    706             '%s (%s)' % (user['username'], resource.capitalize()))
    707 
    708 
    709 class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
    710     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
    711     IE_NAME = 'soundcloud:trackstation'
    712     _TESTS = [{
    713         'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
    714         'info_dict': {
    715             'id': '286017854',
    716             'title': 'Track station: your text',
    717         },
    718         'playlist_mincount': 47,
    719     }]
    720 
    721     def _real_extract(self, url):
    722         track_name = self._match_id(url)
    723 
    724         track = self._download_json(self._resolv_url(url), track_name)
    725         track_id = self._search_regex(
    726             r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
    727 
    728         return self._extract_playlist(
    729             self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
    730             track_id, 'Track station: %s' % track['title'])
    731 
    732 
    733 class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
    734     _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
    735     IE_NAME = 'soundcloud:playlist'
    736     _TESTS = [{
    737         'url': 'https://api.soundcloud.com/playlists/4110309',
    738         'info_dict': {
    739             'id': '4110309',
    740             'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
    741             'description': 're:.*?TILT Brass - Bowery Poetry Club',
    742         },
    743         'playlist_count': 6,
    744     }]
    745 
    746     def _real_extract(self, url):
    747         mobj = re.match(self._VALID_URL, url)
    748         playlist_id = mobj.group('id')
    749 
    750         query = {}
    751         token = mobj.group('token')
    752         if token:
    753             query['secret_token'] = token
    754 
    755         data = self._download_json(
    756             self._API_V2_BASE + 'playlists/' + playlist_id,
    757             playlist_id, 'Downloading playlist', query=query)
    758 
    759         return self._extract_set(data, token)
    760 
    761 
    762 class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
    763     IE_NAME = 'soundcloud:search'
    764     IE_DESC = 'Soundcloud search'
    765     _MAX_RESULTS = float('inf')
    766     _TESTS = [{
    767         'url': 'scsearch15:post-avant jazzcore',
    768         'info_dict': {
    769             'title': 'post-avant jazzcore',
    770         },
    771         'playlist_count': 15,
    772     }]
    773 
    774     _SEARCH_KEY = 'scsearch'
    775     _MAX_RESULTS_PER_PAGE = 200
    776     _DEFAULT_RESULTS_PER_PAGE = 50
    777 
    778     def _get_collection(self, endpoint, collection_id, **query):
    779         limit = min(
    780             query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
    781             self._MAX_RESULTS_PER_PAGE)
    782         query.update({
    783             'limit': limit,
    784             'linked_partitioning': 1,
    785             'offset': 0,
    786         })
    787         next_url = update_url_query(self._API_V2_BASE + endpoint, query)
    788 
    789         collected_results = 0
    790 
    791         for i in itertools.count(1):
    792             response = self._download_json(
    793                 next_url, collection_id, 'Downloading page {0}'.format(i),
    794                 'Unable to download API page')
    795 
    796             collection = response.get('collection', [])
    797             if not collection:
    798                 break
    799 
    800             collection = list(filter(bool, collection))
    801             collected_results += len(collection)
    802 
    803             for item in collection:
    804                 yield self.url_result(item['uri'], SoundcloudIE.ie_key())
    805 
    806             if not collection or collected_results >= limit:
    807                 break
    808 
    809             next_url = response.get('next_href')
    810             if not next_url:
    811                 break
    812 
    813     def _get_n_results(self, query, n):
    814         tracks = self._get_collection('search/tracks', query, limit=n, q=query)
    815         return self.playlist_result(tracks, playlist_title=query)
	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE