youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 20991253334c069efac6986bb51cc28058809026
parent b48f147d5a36b16e421b9b91fcc72b62e695c948
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Mon,  5 May 2014 03:12:41 +0200

[soundcloud/generic] Add support for playlists

Diffstat:
Mtest/test_playlists.py | 13+++++++++++++
Myoutube_dl/extractor/__init__.py | 7++++++-
Myoutube_dl/extractor/common.py | 8++++++++
Myoutube_dl/extractor/generic.py | 13+++++++++----
Myoutube_dl/extractor/soundcloud.py | 77++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
5 files changed, 86 insertions(+), 32 deletions(-)

diff --git a/test/test_playlists.py b/test/test_playlists.py @@ -10,6 +10,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import ( + assertRegexpMatches, expect_info_dict, FakeYDL, ) @@ -25,6 +26,7 @@ from youtube_dl.extractor import ( UstreamChannelIE, SoundcloudSetIE, SoundcloudUserIE, + SoundcloudPlaylistIE, LivestreamIE, NHLVideocenterIE, BambuserChannelIE, @@ -124,6 +126,17 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) + def test_soundcloud_playlist(self): + dl = FakeYDL() + ie = SoundcloudPlaylistIE(dl) + result = ie.extract('http://api.soundcloud.com/playlists/4110309') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '4110309') + self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]') + assertRegexpMatches( + self, result['description'], r'TILT Brass - Bowery Poetry Club') + self.assertEqual(len(result['entries']), 6) + def test_livestream_event(self): dl = FakeYDL() ie = LivestreamIE(dl) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -237,7 +237,12 @@ from .smotri import ( SmotriBroadcastIE, ) from .sohu import SohuIE -from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE +from .soundcloud import ( + SoundcloudIE, + SoundcloudSetIE, + SoundcloudUserIE, + SoundcloudPlaylistIE +) from .southparkstudios import ( SouthParkStudiosIE, SouthparkDeIE, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -548,6 +548,13 @@ class InfoExtractor(object): ) formats.sort(key=_formats_key) + def http_scheme(self): + """ Either "https:" or "https:", depending on the user's preferences """ + return ( + 'http:' + if self._downloader.params.get('prefer_insecure', False) + else 'https:') + class SearchInfoExtractor(InfoExtractor): """ @@ -591,3 +598,4 @@ class SearchInfoExtractor(InfoExtractor): @property def SEARCH_KEY(self): return self._SEARCH_KEY + diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -348,10 +348,7 @@ class GenericIE(InfoExtractor): if url.startswith('//'): return { '_type': 'url', - 'url': ( - 'http:' - if self._downloader.params.get('prefer_insecure', False) - else 'https:') + url, + 'url': self.http_scheme() + url, } parsed_url = compat_urlparse.urlparse(url) @@ -636,6 +633,14 @@ class GenericIE(InfoExtractor): if smotri_url: return self.url_result(smotri_url, 'Smotri') + # Look for embeded soundcloud player + mobj = re.search( + r'<iframe src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"', + webpage) + if mobj is not None: + url = unescapeHTML(mobj.group('url')) + return self.url_result(url) + # Start with something easy: JW Player in SWFObject found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if not found: diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py @@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor): of the stream token and uid """ - _VALID_URL = r'''^(?:https?://)? + _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?P<uploader>[\w\d-]+)/ (?!sets/)(?P<title>[\w\d-]+)/? @@ -94,10 +94,6 @@ class SoundcloudIE(InfoExtractor): _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' - @classmethod - def suitable(cls, url): - return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None - def report_resolve(self, video_id): """Report information extraction.""" self.to_screen('%s: Resolving id' % video_id) @@ -141,11 +137,10 @@ class SoundcloudIE(InfoExtractor): # We have to retrieve the url streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?' 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token)) - stream_json = self._download_webpage( + format_dict = self._download_json( streams_url, track_id, 'Downloading track url') - format_dict = json.loads(stream_json) for key, stream_url in format_dict.items(): if key.startswith('http'): formats.append({ @@ -198,7 +193,7 @@ class SoundcloudIE(InfoExtractor): full_title = track_id elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - return self.url_result(query['url'][0], ie='Soundcloud') + return self.url_result(query['url'][0]) else: # extract uploader (which is in the url) uploader = mobj.group('uploader') @@ -213,11 +208,11 @@ class SoundcloudIE(InfoExtractor): url = 'http://soundcloud.com/%s' % resolve_title info_json_url = self._resolv_url(url) - info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON') + info = self._download_json(info_json_url, full_title, 'Downloading info JSON') - info = json.loads(info_json) return self._extract_info_dict(info, full_title, secret_token=token) + class SoundcloudSetIE(SoundcloudIE): _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' IE_NAME = 'soundcloud:set' @@ -232,16 +227,15 @@ class SoundcloudSetIE(SoundcloudIE): # extract uploader (which is in the url) uploader = mobj.group(1) # extract simple title (uploader + slug of song title) - slug_title = mobj.group(2) + slug_title = mobj.group(2) full_title = '%s/sets/%s' % (uploader, slug_title) self.report_resolve(full_title) url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) resolv_url = self._resolv_url(url) - info_json = self._download_webpage(resolv_url, full_title) + info = self._download_json(resolv_url, full_title) - info = json.loads(info_json) if 'errors' in info: for err in info['errors']: self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message'])) @@ -268,26 +262,55 @@ class SoundcloudUserIE(SoundcloudIE): url = 'http://soundcloud.com/%s/' % uploader resolv_url = self._resolv_url(url) - user_json = self._download_webpage(resolv_url, uploader, - 'Downloading user info') - user = json.loads(user_json) + user = self._download_json( + resolv_url, uploader, 'Downloading user info') + base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader - tracks = [] + entries = [] for i in itertools.count(): - data = compat_urllib_parse.urlencode({'offset': i*50, - 'client_id': self._CLIENT_ID, - }) - tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data - response = self._download_webpage(tracks_url, uploader, - 'Downloading tracks page %s' % (i+1)) - new_tracks = json.loads(response) - tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks) - if len(new_tracks) < 50: + data = compat_urllib_parse.urlencode({ + 'offset': i * 50, + 'client_id': self._CLIENT_ID, + }) + new_entries = self._download_json( + base_url + data, uploader, 'Downloading track page %s' % (i + 1)) + entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) + if len(new_entries) < 50: break return { '_type': 'playlist', 'id': compat_str(user['id']), 'title': user['username'], - 'entries': tracks, + 'entries': entries, + } + + +class SoundcloudPlaylistIE(SoundcloudIE): + _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)' + IE_NAME = 'soundcloud:playlist' + + # it's in tests/test_playlists.py + _TESTS = [] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id) + + data = compat_urllib_parse.urlencode({ + 'client_id': self._CLIENT_ID, + }) + data = self._download_json( + base_url + data, playlist_id, 'Downloading playlist') + + entries = [ + self._extract_info_dict(t, quiet=True) for t in data['tracks']] + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': data.get('title'), + 'description': data.get('description'), + 'entries': entries, }