spreaker.py (6030B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import itertools 5 6 from .common import InfoExtractor 7 from ..compat import compat_str 8 from ..utils import ( 9 float_or_none, 10 int_or_none, 11 str_or_none, 12 try_get, 13 unified_timestamp, 14 url_or_none, 15 ) 16 17 18 def _extract_episode(data, episode_id=None): 19 title = data['title'] 20 download_url = data['download_url'] 21 22 series = try_get(data, lambda x: x['show']['title'], compat_str) 23 uploader = try_get(data, lambda x: x['author']['fullname'], compat_str) 24 25 thumbnails = [] 26 for image in ('image_original', 'image_medium', 'image'): 27 image_url = url_or_none(data.get('%s_url' % image)) 28 if image_url: 29 thumbnails.append({'url': image_url}) 30 31 def stats(key): 32 return int_or_none(try_get( 33 data, 34 (lambda x: x['%ss_count' % key], 35 lambda x: x['stats']['%ss' % key]))) 36 37 def duration(key): 38 return float_or_none(data.get(key), scale=1000) 39 40 return { 41 'id': compat_str(episode_id or data['episode_id']), 42 'url': download_url, 43 'display_id': data.get('permalink'), 44 'title': title, 45 'description': data.get('description'), 46 'timestamp': unified_timestamp(data.get('published_at')), 47 'uploader': uploader, 48 'uploader_id': str_or_none(data.get('author_id')), 49 'creator': uploader, 50 'duration': duration('duration') or duration('length'), 51 'view_count': stats('play'), 52 'like_count': stats('like'), 53 'comment_count': stats('message'), 54 'format': 'MPEG Layer 3', 55 'format_id': 'mp3', 56 'container': 'mp3', 57 'ext': 'mp3', 58 'thumbnails': thumbnails, 59 'series': series, 60 'extractor_key': SpreakerIE.ie_key(), 61 } 62 63 64 class SpreakerIE(InfoExtractor): 65 _VALID_URL = r'''(?x) 66 https?:// 67 api\.spreaker\.com/ 68 (?: 69 (?:download/)?episode| 70 v2/episodes 71 )/ 72 (?P<id>\d+) 73 ''' 74 _TESTS = [{ 75 'url': 'https://api.spreaker.com/episode/12534508', 76 'info_dict': { 77 'id': '12534508', 78 'display_id': 'swm-ep15-how-to-market-your-music-part-2', 79 'ext': 'mp3', 80 'title': 'EP:15 | Music Marketing (Likes) - Part 2', 81 'description': 'md5:0588c43e27be46423e183076fa071177', 82 'timestamp': 1502250336, 83 'upload_date': '20170809', 84 'uploader': 'SWM', 85 'uploader_id': '9780658', 86 'duration': 1063.42, 87 'view_count': int, 88 'like_count': int, 89 'comment_count': int, 90 'series': 'Success With Music (SWM)', 91 }, 92 }, { 93 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', 94 'only_matching': True, 95 }, { 96 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments', 97 'only_matching': True, 98 }] 99 100 def _real_extract(self, url): 101 episode_id = self._match_id(url) 102 data = self._download_json( 103 'https://api.spreaker.com/v2/episodes/%s' % episode_id, 104 episode_id)['response']['episode'] 105 return _extract_episode(data, episode_id) 106 107 108 class SpreakerPageIE(InfoExtractor): 109 _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)' 110 _TESTS = [{ 111 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', 112 'only_matching': True, 113 }] 114 115 def _real_extract(self, url): 116 display_id = self._match_id(url) 117 webpage = self._download_webpage(url, display_id) 118 episode_id = self._search_regex( 119 (r'data-episode_id=["\'](?P<id>\d+)', 120 r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id') 121 return self.url_result( 122 'https://api.spreaker.com/episode/%s' % episode_id, 123 ie=SpreakerIE.ie_key(), video_id=episode_id) 124 125 126 class SpreakerShowIE(InfoExtractor): 127 _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)' 128 _TESTS = [{ 129 'url': 'https://api.spreaker.com/show/4652058', 130 'info_dict': { 131 'id': '4652058', 132 }, 133 'playlist_mincount': 118, 134 }] 135 136 def _entries(self, show_id): 137 for page_num in itertools.count(1): 138 episodes = self._download_json( 139 'https://api.spreaker.com/show/%s/episodes' % show_id, 140 show_id, note='Downloading JSON page %d' % page_num, query={ 141 'page': page_num, 142 'max_per_page': 100, 143 }) 144 pager = try_get(episodes, lambda x: x['response']['pager'], dict) 145 if not pager: 146 break 147 results = pager.get('results') 148 if not results or not isinstance(results, list): 149 break 150 for result in results: 151 if not isinstance(result, dict): 152 continue 153 yield _extract_episode(result) 154 if page_num == pager.get('last_page'): 155 break 156 157 def _real_extract(self, url): 158 show_id = self._match_id(url) 159 return self.playlist_result(self._entries(show_id), playlist_id=show_id) 160 161 162 class SpreakerShowPageIE(InfoExtractor): 163 _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)' 164 _TESTS = [{ 165 'url': 'https://www.spreaker.com/show/success-with-music', 166 'only_matching': True, 167 }] 168 169 def _real_extract(self, url): 170 display_id = self._match_id(url) 171 webpage = self._download_webpage(url, display_id) 172 show_id = self._search_regex( 173 r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id') 174 return self.url_result( 175 'https://api.spreaker.com/show/%s' % show_id, 176 ie=SpreakerShowIE.ie_key(), video_id=show_id)