youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 57c7411f4615cda595f06fa461719b4336d808f4
parent d0a122348e56c68dad1f34d67103ab3a353f602a
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Tue, 13 May 2014 09:42:38 +0200

[mixcloud] Shed API dependency (#2904)

Diffstat:
Myoutube_dl/extractor/common.py | 10++++++++++
Myoutube_dl/extractor/mixcloud.py | 52+++++++++++++++++++++++++++++++++++++++-------------
2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -556,6 +556,16 @@ class InfoExtractor(object): if self._downloader.params.get('prefer_insecure', False) else 'https:') + def _proto_relative_url(self, url, scheme=None): + if url is None: + return url + if url.startswith('//'): + if scheme is None: + scheme = self.http_scheme() + return scheme + url + else: + return url + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py @@ -4,9 +4,10 @@ import re from .common import InfoExtractor from ..utils import ( - unified_strdate, compat_urllib_parse, ExtractorError, + int_or_none, + parse_iso8601, ) @@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor): 'uploader': 'Daniel Holbach', 'uploader_id': 'dholbach', 'upload_date': '20111115', + 'timestamp': 1321359578, + 'thumbnail': 're:https?://.*\.jpg', + 'view_count': int, + 'like_count': int, }, } @@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor): webpage = self._download_webpage(url, track_id) - api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) - info = self._download_json( - api_url, track_id, 'Downloading cloudcast info') - preview_url = self._search_regex( r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') song_url = preview_url.replace('/previews/', '/c/originals/') @@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor): template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') final_song_url = self._get_url(template_url) if final_song_url is None: - raise ExtractorError(u'Unable to extract track url') + raise ExtractorError('Unable to extract track url') + + PREFIX = ( + r'<div class="cloudcast-play-button-container"' + r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') + title = self._html_search_regex( + PREFIX + r'm-title="([^"]+)"', webpage, 'title') + thumbnail = self._proto_relative_url(self._html_search_regex( + PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', + fatal=False)) + uploader = self._html_search_regex( + PREFIX + r'm-owner-name="([^"]+)"', + webpage, 'uploader', fatal=False) + uploader_id = self._search_regex( + r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) + description = self._og_search_description(webpage) + like_count = int_or_none(self._search_regex( + r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', + webpage, 'like count', fatal=False)) + view_count = int_or_none(self._search_regex( + r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', + webpage, 'play count', fatal=False)) + timestamp = parse_iso8601(self._search_regex( + r'<time itemprop="dateCreated" datetime="([^"]+)">', + webpage, 'upload date')) return { 'id': track_id, - 'title': info['name'], + 'title': title, 'url': final_song_url, - 'description': info.get('description'), - 'thumbnail': info['pictures'].get('extra_large'), - 'uploader': info['user']['name'], - 'uploader_id': info['user']['username'], - 'upload_date': unified_strdate(info['created_time']), - 'view_count': info['play_count'], + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'timestamp': timestamp, + 'view_count': view_count, + 'like_count': like_count, }