audioboom.py (2659B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 clean_html, 7 float_or_none, 8 ) 9 10 11 class AudioBoomIE(InfoExtractor): 12 _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)' 13 _TESTS = [{ 14 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry', 15 'md5': '7b00192e593ff227e6a315486979a42d', 16 'info_dict': { 17 'id': '7398103', 18 'ext': 'mp3', 19 'title': 'Asim Chaudhry', 20 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc', 21 'duration': 4000.99, 22 'uploader': 'Sue Perkins: An hour or so with...', 23 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', 24 } 25 }, { 26 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 27 'only_matching': True, 28 }] 29 30 def _real_extract(self, url): 31 video_id = self._match_id(url) 32 33 webpage = self._download_webpage(url, video_id) 34 35 clip = None 36 37 clip_store = self._parse_json( 38 self._html_search_regex( 39 r'data-new-clip-store=(["\'])(?P<json>{.+?})\1', 40 webpage, 'clip store', default='{}', group='json'), 41 video_id, fatal=False) 42 if clip_store: 43 clips = clip_store.get('clips') 44 if clips and isinstance(clips, list) and isinstance(clips[0], dict): 45 clip = clips[0] 46 47 def from_clip(field): 48 if clip: 49 return clip.get(field) 50 51 audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( 52 'audio', webpage, 'audio url') 53 title = from_clip('title') or self._html_search_meta( 54 ['og:title', 'og:audio:title', 'audio_title'], webpage) 55 description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage) 56 57 duration = float_or_none(from_clip('duration') or self._html_search_meta( 58 'weibo:audio:duration', webpage)) 59 60 uploader = from_clip('author') or self._html_search_meta( 61 ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader') 62 uploader_url = from_clip('author_url') or self._html_search_meta( 63 'audioboo:channel', webpage, 'uploader url') 64 65 return { 66 'id': video_id, 67 'url': audio_url, 68 'title': title, 69 'description': description, 70 'duration': duration, 71 'uploader': uploader, 72 'uploader_url': uploader_url, 73 }