mixcloud.py (11661B)
1 from __future__ import unicode_literals 2 3 import itertools 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import ( 8 compat_b64decode, 9 compat_chr, 10 compat_ord, 11 compat_str, 12 compat_urllib_parse_unquote, 13 compat_zip 14 ) 15 from ..utils import ( 16 int_or_none, 17 parse_iso8601, 18 strip_or_none, 19 try_get, 20 ) 21 22 23 class MixcloudBaseIE(InfoExtractor): 24 def _call_api(self, object_type, object_fields, display_id, username, slug=None): 25 lookup_key = object_type + 'Lookup' 26 return self._download_json( 27 'https://www.mixcloud.com/graphql', display_id, query={ 28 'query': '''{ 29 %s(lookup: {username: "%s"%s}) { 30 %s 31 } 32 }''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields) 33 })['data'][lookup_key] 34 35 36 class MixcloudIE(MixcloudBaseIE): 37 _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' 38 IE_NAME = 'mixcloud' 39 40 _TESTS = [{ 41 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 42 'info_dict': { 43 'id': 'dholbach_cryptkeeper', 44 'ext': 'm4a', 45 'title': 'Cryptkeeper', 46 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 47 'uploader': 'Daniel Holbach', 48 'uploader_id': 'dholbach', 49 'thumbnail': r're:https?://.*\.jpg', 50 'view_count': int, 51 'timestamp': 1321359578, 52 'upload_date': '20111115', 53 }, 54 }, { 55 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', 56 'info_dict': { 57 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat', 58 'ext': 'mp3', 59 'title': 'Caribou 7 inch Vinyl Mix & Chat', 60 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', 61 'uploader': 'Gilles Peterson Worldwide', 62 'uploader_id': 'gillespeterson', 63 'thumbnail': 're:https?://.*', 64 'view_count': int, 65 'timestamp': 1422987057, 66 'upload_date': '20150203', 67 }, 68 }, { 69 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', 70 'only_matching': True, 71 }] 72 _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD' 73 74 @staticmethod 75 def _decrypt_xor_cipher(key, ciphertext): 76 """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" 77 return ''.join([ 78 compat_chr(compat_ord(ch) ^ compat_ord(k)) 79 for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) 80 81 def _real_extract(self, url): 82 username, slug = re.match(self._VALID_URL, url).groups() 83 username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug) 84 track_id = '%s_%s' % (username, slug) 85 86 cloudcast = self._call_api('cloudcast', '''audioLength 87 comments(first: 100) { 88 edges { 89 node { 90 comment 91 created 92 user { 93 displayName 94 username 95 } 96 } 97 } 98 totalCount 99 } 100 description 101 favorites { 102 totalCount 103 } 104 featuringArtistList 105 isExclusive 106 name 107 owner { 108 displayName 109 url 110 username 111 } 112 picture(width: 1024, height: 1024) { 113 url 114 } 115 plays 116 publishDate 117 reposts { 118 totalCount 119 } 120 streamInfo { 121 dashUrl 122 hlsUrl 123 url 124 } 125 tags { 126 tag { 127 name 128 } 129 }''', track_id, username, slug) 130 131 title = cloudcast['name'] 132 133 stream_info = cloudcast['streamInfo'] 134 formats = [] 135 136 for url_key in ('url', 'hlsUrl', 'dashUrl'): 137 format_url = stream_info.get(url_key) 138 if not format_url: 139 continue 140 decrypted = self._decrypt_xor_cipher( 141 self._DECRYPTION_KEY, compat_b64decode(format_url)) 142 if url_key == 'hlsUrl': 143 formats.extend(self._extract_m3u8_formats( 144 decrypted, track_id, 'mp4', entry_protocol='m3u8_native', 145 m3u8_id='hls', fatal=False)) 146 elif url_key == 'dashUrl': 147 formats.extend(self._extract_mpd_formats( 148 decrypted, track_id, mpd_id='dash', fatal=False)) 149 else: 150 formats.append({ 151 'format_id': 'http', 152 'url': decrypted, 153 'downloader_options': { 154 # Mixcloud starts throttling at >~5M 155 'http_chunk_size': 5242880, 156 }, 157 }) 158 159 if not formats and cloudcast.get('isExclusive'): 160 self.raise_login_required() 161 162 self._sort_formats(formats) 163 164 comments = [] 165 for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []): 166 node = edge.get('node') or {} 167 text = strip_or_none(node.get('comment')) 168 if not text: 169 continue 170 user = node.get('user') or {} 171 comments.append({ 172 'author': user.get('displayName'), 173 'author_id': user.get('username'), 174 'text': text, 175 'timestamp': parse_iso8601(node.get('created')), 176 }) 177 178 tags = [] 179 for t in cloudcast.get('tags'): 180 tag = try_get(t, lambda x: x['tag']['name'], compat_str) 181 if not tag: 182 tags.append(tag) 183 184 get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount'])) 185 186 owner = cloudcast.get('owner') or {} 187 188 return { 189 'id': track_id, 190 'title': title, 191 'formats': formats, 192 'description': cloudcast.get('description'), 193 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str), 194 'uploader': owner.get('displayName'), 195 'timestamp': parse_iso8601(cloudcast.get('publishDate')), 196 'uploader_id': owner.get('username'), 197 'uploader_url': owner.get('url'), 198 'duration': int_or_none(cloudcast.get('audioLength')), 199 'view_count': int_or_none(cloudcast.get('plays')), 200 'like_count': get_count('favorites'), 201 'repost_count': get_count('reposts'), 202 'comment_count': get_count('comments'), 203 'comments': comments, 204 'tags': tags, 205 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None, 206 } 207 208 209 class MixcloudPlaylistBaseIE(MixcloudBaseIE): 210 def _get_cloudcast(self, node): 211 return node 212 213 def _get_playlist_title(self, title, slug): 214 return title 215 216 def _real_extract(self, url): 217 username, slug = re.match(self._VALID_URL, url).groups() 218 username = compat_urllib_parse_unquote(username) 219 if not slug: 220 slug = 'uploads' 221 else: 222 slug = compat_urllib_parse_unquote(slug) 223 playlist_id = '%s_%s' % (username, slug) 224 225 is_playlist_type = self._ROOT_TYPE == 'playlist' 226 playlist_type = 'items' if is_playlist_type else slug 227 list_filter = '' 228 229 has_next_page = True 230 entries = [] 231 while has_next_page: 232 playlist = self._call_api( 233 self._ROOT_TYPE, '''%s 234 %s 235 %s(first: 100%s) { 236 edges { 237 node { 238 %s 239 } 240 } 241 pageInfo { 242 endCursor 243 hasNextPage 244 } 245 }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE), 246 playlist_id, username, slug if is_playlist_type else None) 247 248 items = playlist.get(playlist_type) or {} 249 for edge in items.get('edges', []): 250 cloudcast = self._get_cloudcast(edge.get('node') or {}) 251 cloudcast_url = cloudcast.get('url') 252 if not cloudcast_url: 253 continue 254 slug = try_get(cloudcast, lambda x: x['slug'], compat_str) 255 owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str) 256 video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None 257 entries.append(self.url_result( 258 cloudcast_url, MixcloudIE.ie_key(), video_id)) 259 260 page_info = items['pageInfo'] 261 has_next_page = page_info['hasNextPage'] 262 list_filter = ', after: "%s"' % page_info['endCursor'] 263 264 return self.playlist_result( 265 entries, playlist_id, 266 self._get_playlist_title(playlist[self._TITLE_KEY], slug), 267 playlist.get(self._DESCRIPTION_KEY)) 268 269 270 class MixcloudUserIE(MixcloudPlaylistBaseIE): 271 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$' 272 IE_NAME = 'mixcloud:user' 273 274 _TESTS = [{ 275 'url': 'http://www.mixcloud.com/dholbach/', 276 'info_dict': { 277 'id': 'dholbach_uploads', 278 'title': 'Daniel Holbach (uploads)', 279 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', 280 }, 281 'playlist_mincount': 36, 282 }, { 283 'url': 'http://www.mixcloud.com/dholbach/uploads/', 284 'info_dict': { 285 'id': 'dholbach_uploads', 286 'title': 'Daniel Holbach (uploads)', 287 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', 288 }, 289 'playlist_mincount': 36, 290 }, { 291 'url': 'http://www.mixcloud.com/dholbach/favorites/', 292 'info_dict': { 293 'id': 'dholbach_favorites', 294 'title': 'Daniel Holbach (favorites)', 295 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', 296 }, 297 # 'params': { 298 # 'playlist_items': '1-100', 299 # }, 300 'playlist_mincount': 396, 301 }, { 302 'url': 'http://www.mixcloud.com/dholbach/listens/', 303 'info_dict': { 304 'id': 'dholbach_listens', 305 'title': 'Daniel Holbach (listens)', 306 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', 307 }, 308 # 'params': { 309 # 'playlist_items': '1-100', 310 # }, 311 'playlist_mincount': 1623, 312 'skip': 'Large list', 313 }, { 314 'url': 'https://www.mixcloud.com/FirstEar/stream/', 315 'info_dict': { 316 'id': 'FirstEar_stream', 317 'title': 'First Ear (stream)', 318 'description': 'Curators of good music\r\n\r\nfirstearmusic.com', 319 }, 320 'playlist_mincount': 271, 321 }] 322 323 _TITLE_KEY = 'displayName' 324 _DESCRIPTION_KEY = 'biog' 325 _ROOT_TYPE = 'user' 326 _NODE_TEMPLATE = '''slug 327 url 328 owner { username }''' 329 330 def _get_playlist_title(self, title, slug): 331 return '%s (%s)' % (title, slug) 332 333 334 class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): 335 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$' 336 IE_NAME = 'mixcloud:playlist' 337 338 _TESTS = [{ 339 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', 340 'info_dict': { 341 'id': 'maxvibes_jazzcat-on-ness-radio', 342 'title': 'Ness Radio sessions', 343 }, 344 'playlist_mincount': 59, 345 }] 346 _TITLE_KEY = 'name' 347 _DESCRIPTION_KEY = 'description' 348 _ROOT_TYPE = 'playlist' 349 _NODE_TEMPLATE = '''cloudcast { 350 slug 351 url 352 owner { username } 353 }''' 354 355 def _get_cloudcast(self, node): 356 return node.get('cloudcast') or {}