[soundcloud:trackstation] Add extractor (closes #13733) - youtube-dl

commit 836ef2648613f4ca565b319af4769c02e35f60f6
parent c04017519da74a375d6c1c95733d921e96d8ee82
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 29 Jul 2017 18:41:42 +0700

[soundcloud:trackstation] Add extractor (closes #13733)

Diffstat:
M youtube_dl/extractor/extractors.py  | 3 ++-
M youtube_dl/extractor/soundcloud.py  | 141 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------

2 files changed, 89 insertions(+), 55 deletions(-)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -935,8 +935,9 @@ from .soundcloud import (
     SoundcloudIE,
     SoundcloudSetIE,
     SoundcloudUserIE,
+    SoundcloudTrackStationIE,
     SoundcloudPlaylistIE,
-    SoundcloudSearchIE
+    SoundcloudSearchIE,
 )
 from .soundgasm import (
     SoundgasmIE,
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
@@ -31,6 +31,7 @@ class SoundcloudIE(InfoExtractor):
 
     _VALID_URL = r'''(?x)^(?:https?://)?
                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
+                            (?!stations/track)
                             (?P<uploader>[\w\d-]+)/
                             (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
                             (?P<title>[\w\d-]+)/?
@@ -330,7 +331,63 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
         }
 
 
-class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
+class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
+    _API_BASE = 'https://api.soundcloud.com'
+    _API_V2_BASE = 'https://api-v2.soundcloud.com'
+
+    def _extract_playlist(self, base_url, playlist_id, playlist_title):
+        COMMON_QUERY = {
+            'limit': 50,
+            'client_id': self._CLIENT_ID,
+            'linked_partitioning': '1',
+        }
+
+        query = COMMON_QUERY.copy()
+        query['offset'] = 0
+
+        next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
+
+        entries = []
+        for i in itertools.count():
+            response = self._download_json(
+                next_href, playlist_id, 'Downloading track page %s' % (i + 1))
+
+            collection = response['collection']
+            if not collection:
+                break
+
+            def resolve_permalink_url(candidates):
+                for cand in candidates:
+                    if isinstance(cand, dict):
+                        permalink_url = cand.get('permalink_url')
+                        entry_id = self._extract_id(cand)
+                        if permalink_url and permalink_url.startswith('http'):
+                            return permalink_url, entry_id
+
+            for e in collection:
+                permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
+                if permalink_url:
+                    entries.append(self.url_result(permalink_url, video_id=entry_id))
+
+            next_href = response.get('next_href')
+            if not next_href:
+                break
+
+            parsed_next_href = compat_urlparse.urlparse(response['next_href'])
+            qs = compat_urlparse.parse_qs(parsed_next_href.query)
+            qs.update(COMMON_QUERY)
+            next_href = compat_urlparse.urlunparse(
+                parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': playlist_title,
+            'entries': entries,
+        }
+
+
+class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
     _VALID_URL = r'''(?x)
                         https?://
                             (?:(?:www|m)\.)?soundcloud\.com/
@@ -385,16 +442,13 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
         'playlist_mincount': 1,
     }]
 
-    _API_BASE = 'https://api.soundcloud.com'
-    _API_V2_BASE = 'https://api-v2.soundcloud.com'
-
     _BASE_URL_MAP = {
-        'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
-        'tracks': '%s/users/%%s/tracks' % _API_BASE,
-        'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
-        'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
-        'likes': '%s/users/%%s/likes' % _API_V2_BASE,
-        'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
+        'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+        'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
+        'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+        'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+        'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+        'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
     }
 
     _TITLE_MAP = {
@@ -416,57 +470,36 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
             resolv_url, uploader, 'Downloading user info')
 
         resource = mobj.group('rsrc') or 'all'
-        base_url = self._BASE_URL_MAP[resource] % user['id']
 
-        COMMON_QUERY = {
-            'limit': 50,
-            'client_id': self._CLIENT_ID,
-            'linked_partitioning': '1',
-        }
+        return self._extract_playlist(
+            self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
+            '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
 
-        query = COMMON_QUERY.copy()
-        query['offset'] = 0
 
-        next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
-
-        entries = []
-        for i in itertools.count():
-            response = self._download_json(
-                next_href, uploader, 'Downloading track page %s' % (i + 1))
-
-            collection = response['collection']
-            if not collection:
-                break
-
-            def resolve_permalink_url(candidates):
-                for cand in candidates:
-                    if isinstance(cand, dict):
-                        permalink_url = cand.get('permalink_url')
-                        entry_id = self._extract_id(cand)
-                        if permalink_url and permalink_url.startswith('http'):
-                            return permalink_url, entry_id
+class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
+    IE_NAME = 'soundcloud:trackstation'
+    _TESTS = [{
+        'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
+        'info_dict': {
+            'id': '286017854',
+            'title': 'Track station: your-text',
+        },
+        'playlist_mincount': 47,
+    }]
 
-            for e in collection:
-                permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
-                if permalink_url:
-                    entries.append(self.url_result(permalink_url, video_id=entry_id))
+    def _real_extract(self, url):
+        track_name = self._match_id(url)
 
-            next_href = response.get('next_href')
-            if not next_href:
-                break
+        webpage = self._download_webpage(url, track_name)
 
-            parsed_next_href = compat_urlparse.urlparse(response['next_href'])
-            qs = compat_urlparse.parse_qs(parsed_next_href.query)
-            qs.update(COMMON_QUERY)
-            next_href = compat_urlparse.urlunparse(
-                parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+        track_id = self._search_regex(
+            r'soundcloud:track-stations:(\d+)', webpage, 'track id')
 
-        return {
-            '_type': 'playlist',
-            'id': compat_str(user['id']),
-            'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
-            'entries': entries,
-        }
+        return self._extract_playlist(
+            '%s/stations/soundcloud:track-stations:%s/tracks'
+            % (self._API_V2_BASE, track_id),
+            track_id, 'Track station: %s' % track_name)
 
 
 class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	youtube_dl/extractor/extractors.py	\|	3	++-
M	youtube_dl/extractor/soundcloud.py	\|	141	+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------