youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit b55ee18ff3a9642fe25a977e1152472877294493
parent e5763a7a7e630cfd94b8993a9592cb3243890e0c
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu, 22 Jan 2015 12:04:07 +0100

[hearthisat] Add support for more high-quality download links

Diffstat:
Myoutube_dl/extractor/hearthisat.py | 49+++++++++++++++++++++++++++++++++++++------------
Myoutube_dl/utils.py | 8++++++++
2 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py @@ -4,10 +4,15 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_request +from ..compat import ( + compat_urllib_request, + compat_urlparse, +) from ..utils import ( + HEADRequest, str_to_int, urlencode_postdata, + urlhandle_detect_ext, ) @@ -16,10 +21,10 @@ class HearThisAtIE(InfoExtractor): _PLAYLIST_URL = 'https://hearthis.at/playlist.php' _TEST = { 'url': 'https://hearthis.at/moofi/dr-kreep', - 'md5': 'd594c573227a89f4256f0b03e68c80cc', + 'md5': 'ab6ec33c8fed6556029337c7885eb4e0', 'info_dict': { 'id': '150939', - 'ext': 'mp3', + 'ext': 'wav', 'title': 'Moofi - Dr. Kreep', 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1421564134, @@ -67,18 +72,38 @@ class HearThisAtIE(InfoExtractor): timestamp = str_to_int(self._search_regex( r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False)) - track_url = self._search_regex( - r'<a[^>]+data-mp3="([^"]+)"', webpage, 'track URL') - - formats = [{ - 'format_id': 'mp3', - 'url': track_url, - 'vcodec': 'none', - }] + formats = [] + mp3_url = self._search_regex( + r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"', + webpage, 'title', fatal=False) + if mp3_url: + formats.append({ + 'format_id': 'mp3', + 'vcodec': 'none', + 'acodec': 'mp3', + 'url': mp3_url, + }) + download_path = self._search_regex( + r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"', + webpage, 'download URL', default=None) + if download_path: + download_url = compat_urlparse.urljoin(url, download_path) + ext_req = HEADRequest(download_url) + ext_handle = self._request_webpage( + ext_req, display_id, note='Determining extension') + ext = urlhandle_detect_ext(ext_handle) + formats.append({ + 'format_id': 'download', + 'vcodec': 'none', + 'ext': ext, + 'url': download_url, + 'preference': 2, # Usually better quality + }) + self._sort_formats(formats) return { 'id': track_id, - 'display-id': display_id, + 'display_id': display_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1612,6 +1612,14 @@ def urlhandle_detect_ext(url_handle): except AttributeError: # Python < 3 getheader = url_handle.info().getheader + cd = getheader('Content-Disposition') + if cd: + m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd) + if m: + e = determine_ext(m.group('filename'), default_ext=None) + if e: + return e + return getheader('Content-Type').split("/")[1]