youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 30e6161799dfdf9f53d3c8eaa9e10afe615bc5dd
parent dc24a7d4a2210d6bf282e234e49193d204c41124
Author: Sergey M․ <dstftw@gmail.com>
Date:   Thu, 26 Oct 2017 23:16:16 +0700

[soundgasm] Improve extraction (closes #14588)

Diffstat:
Myoutube_dl/extractor/soundgasm.py | 35++++++++++++++++++++++++-----------
1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py @@ -8,36 +8,49 @@ from .common import InfoExtractor class SoundgasmIE(InfoExtractor): IE_NAME = 'soundgasm' - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)' + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)' _TEST = { 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', 'md5': '010082a2c802c5275bb00030743e75ad', 'info_dict': { 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', 'ext': 'm4a', - 'title': 'ytdl_Piano-sample', - 'description': 'Royalty Free Sample Music' + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('title') - audio_title = mobj.group('user') + '_' + mobj.group('title') + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) + audio_url = self._html_search_regex( - r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') - audio_id = re.split(r'\/|\.', audio_url)[-2] + r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'audio URL', group='url') + + title = self._search_regex( + r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)', + webpage, 'title', default=display_id) + description = self._html_search_regex( - r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', - fatal=False) + (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>', + r'(?s)<li>Description:\s(.*?)<\/li>'), + webpage, 'description', fatal=False) + + audio_id = self._search_regex( + r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id) return { 'id': audio_id, 'display_id': display_id, 'url': audio_url, - 'title': audio_title, - 'description': description + 'vcodec': 'none', + 'title': title, + 'description': description, + 'uploader': mobj.group('user'), }