ExfmIE: extract Soundcloud songs using SoundcloudIE
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Wed, 24 Jul 2013 12:39:21 +0000 (14:39 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Wed, 24 Jul 2013 12:39:21 +0000 (14:39 +0200)
Now SouncloudIE accepts api urls.

youtube_dl/extractor/exfm.py
youtube_dl/extractor/soundcloud.py

index fe1582d1a60d6a906d90be4e5d4bae0334b1fe10..3443f19c5f9bb8e2853c95b4ca5e153b395a701f 100644 (file)
@@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor):
     IE_NAME = u'exfm'
     IE_DESC = u'ex.fm'
     _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
-    _SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
-    _TEST = {
-        u'url': u'http://ex.fm/song/1bgtzg',
-        u'file': u'1bgtzg.mp3',
-        u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
-        u'info_dict': {
-            u"title": u"We Can't Stop",
-            u"uploader": u"Miley Cyrus",
-            u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
-        }
-    }
+    _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
+    _TESTS = [
+        {
+            u'url': u'http://ex.fm/song/1bgtzg',
+            u'file': u'95223130.mp3',
+            u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
+            u'info_dict': {
+                u"title": u"We Can't Stop - Miley Cyrus",
+                u"uploader": u"Miley Cyrus",
+                u'upload_date': u'20130603',
+                u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
+            },
+            u'note': u'Soundcloud song',
+        },
+        {
+            u'url': u'http://ex.fm/song/wddt8',
+            u'file': u'wddt8.mp3',
+            u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
+            u'info_dict': {
+                u'title': u'Safe and Sound',
+                u'uploader': u'Capital Cities',
+            },
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor):
         info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
         webpage = self._download_webpage(info_url, song_id)
         info = json.loads(webpage)
-        song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
-        if song_url is not None:
-               song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
-        else:
-               song_url = info['song']['url']
+        song_url = info['song']['url']
+        if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
+            self.to_screen('Soundcloud song detected')
+            return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
         return [{
             'id':          song_id,
             'url':         song_url,
index 54ff8db12e338d2217d53e4cbb42c84ed2962d4e..7c9f1c6b65998d57515b65dea5e9120772e0b019 100644 (file)
@@ -19,7 +19,11 @@ class SoundcloudIE(InfoExtractor):
        of the stream token and uid
      """
 
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$'
+    _VALID_URL = r'''^(?:https?://)?
+                    (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
+                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
+                    )
+                    '''
     IE_NAME = u'soundcloud'
     _TEST = {
         u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
@@ -35,6 +39,10 @@ class SoundcloudIE(InfoExtractor):
 
     _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
 
+    @classmethod
+    def suitable(cls, url):
+        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
+
     def report_resolve(self, video_id):
         """Report information extraction."""
         self.to_screen(u'%s: Resolving id' % video_id)
@@ -63,21 +71,26 @@ class SoundcloudIE(InfoExtractor):
         }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
 
-        # extract uploader (which is in the url)
-        uploader = mobj.group(1)
-        # extract simple title (uploader + slug of song title)
-        slug_title =  mobj.group(2)
-        full_title = '%s/%s' % (uploader, slug_title)
-
-        self.report_resolve(full_title)
-
-        url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
-        resolv_url = self._resolv_url(url)
-        info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON')
+        track_id = mobj.group('track_id')
+        if track_id is not None:
+            info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
+            full_title = track_id
+        else:
+            # extract uploader (which is in the url)
+            uploader = mobj.group(1)
+            # extract simple title (uploader + slug of song title)
+            slug_title =  mobj.group(2)
+            full_title = '%s/%s' % (uploader, slug_title)
+    
+            self.report_resolve(full_title)
+    
+            url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
+            info_json_url = self._resolv_url(url)
+        info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
 
         info = json.loads(info_json)
         return self._extract_info_dict(info, full_title)