soundgasm.py (2414B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 8 9 class SoundgasmIE(InfoExtractor): 10 IE_NAME = 'soundgasm' 11 _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)' 12 _TEST = { 13 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', 14 'md5': '010082a2c802c5275bb00030743e75ad', 15 'info_dict': { 16 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', 17 'ext': 'm4a', 18 'title': 'Piano sample', 19 'description': 'Royalty Free Sample Music', 20 'uploader': 'ytdl', 21 } 22 } 23 24 def _real_extract(self, url): 25 mobj = re.match(self._VALID_URL, url) 26 display_id = mobj.group('display_id') 27 28 webpage = self._download_webpage(url, display_id) 29 30 audio_url = self._html_search_regex( 31 r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 32 'audio URL', group='url') 33 34 title = self._search_regex( 35 r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)', 36 webpage, 'title', default=display_id) 37 38 description = self._html_search_regex( 39 (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>', 40 r'(?s)<li>Description:\s(.*?)<\/li>'), 41 webpage, 'description', fatal=False) 42 43 audio_id = self._search_regex( 44 r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id) 45 46 return { 47 'id': audio_id, 48 'display_id': display_id, 49 'url': audio_url, 50 'vcodec': 'none', 51 'title': title, 52 'description': description, 53 'uploader': mobj.group('user'), 54 } 55 56 57 class SoundgasmProfileIE(InfoExtractor): 58 IE_NAME = 'soundgasm:profile' 59 _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$' 60 _TEST = { 61 'url': 'http://soundgasm.net/u/ytdl', 62 'info_dict': { 63 'id': 'ytdl', 64 }, 65 'playlist_count': 1, 66 } 67 68 def _real_extract(self, url): 69 profile_id = self._match_id(url) 70 71 webpage = self._download_webpage(url, profile_id) 72 73 entries = [ 74 self.url_result(audio_url, 'Soundgasm') 75 for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)] 76 77 return self.playlist_result(entries, profile_id)