curiositystream.py (6829B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 int_or_none, 9 urlencode_postdata, 10 compat_str, 11 ExtractorError, 12 ) 13 14 15 class CuriosityStreamBaseIE(InfoExtractor): 16 _NETRC_MACHINE = 'curiositystream' 17 _auth_token = None 18 _API_BASE_URL = 'https://api.curiositystream.com/v1/' 19 20 def _handle_errors(self, result): 21 error = result.get('error', {}).get('message') 22 if error: 23 if isinstance(error, dict): 24 error = ', '.join(error.values()) 25 raise ExtractorError( 26 '%s said: %s' % (self.IE_NAME, error), expected=True) 27 28 def _call_api(self, path, video_id, query=None): 29 headers = {} 30 if self._auth_token: 31 headers['X-Auth-Token'] = self._auth_token 32 result = self._download_json( 33 self._API_BASE_URL + path, video_id, headers=headers, query=query) 34 self._handle_errors(result) 35 return result['data'] 36 37 def _real_initialize(self): 38 email, password = self._get_login_info() 39 if email is None: 40 return 41 result = self._download_json( 42 self._API_BASE_URL + 'login', None, data=urlencode_postdata({ 43 'email': email, 44 'password': password, 45 })) 46 self._handle_errors(result) 47 self._auth_token = result['message']['auth_token'] 48 49 50 class CuriosityStreamIE(CuriosityStreamBaseIE): 51 IE_NAME = 'curiositystream' 52 _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)' 53 _TEST = { 54 'url': 'https://app.curiositystream.com/video/2', 55 'info_dict': { 56 'id': '2', 57 'ext': 'mp4', 58 'title': 'How Did You Develop The Internet?', 59 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', 60 }, 61 'params': { 62 'format': 'bestvideo', 63 # m3u8 download 64 'skip_download': True, 65 }, 66 } 67 68 def _real_extract(self, url): 69 video_id = self._match_id(url) 70 71 formats = [] 72 for encoding_format in ('m3u8', 'mpd'): 73 media = self._call_api('media/' + video_id, video_id, query={ 74 'encodingsNew': 'true', 75 'encodingsFormat': encoding_format, 76 }) 77 for encoding in media.get('encodings', []): 78 playlist_url = encoding.get('master_playlist_url') 79 if encoding_format == 'm3u8': 80 # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol 81 formats.extend(self._extract_m3u8_formats( 82 playlist_url, video_id, 'mp4', 83 m3u8_id='hls', fatal=False)) 84 elif encoding_format == 'mpd': 85 formats.extend(self._extract_mpd_formats( 86 playlist_url, video_id, mpd_id='dash', fatal=False)) 87 encoding_url = encoding.get('url') 88 file_url = encoding.get('file_url') 89 if not encoding_url and not file_url: 90 continue 91 f = { 92 'width': int_or_none(encoding.get('width')), 93 'height': int_or_none(encoding.get('height')), 94 'vbr': int_or_none(encoding.get('video_bitrate')), 95 'abr': int_or_none(encoding.get('audio_bitrate')), 96 'filesize': int_or_none(encoding.get('size_in_bytes')), 97 'vcodec': encoding.get('video_codec'), 98 'acodec': encoding.get('audio_codec'), 99 'container': encoding.get('container_type'), 100 } 101 for f_url in (encoding_url, file_url): 102 if not f_url: 103 continue 104 fmt = f.copy() 105 rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url) 106 if rtmp: 107 fmt.update({ 108 'url': rtmp.group('url'), 109 'play_path': rtmp.group('playpath'), 110 'app': rtmp.group('app'), 111 'ext': 'flv', 112 'format_id': 'rtmp', 113 }) 114 else: 115 fmt.update({ 116 'url': f_url, 117 'format_id': 'http', 118 }) 119 formats.append(fmt) 120 self._sort_formats(formats) 121 122 title = media['title'] 123 124 subtitles = {} 125 for closed_caption in media.get('closed_captions', []): 126 sub_url = closed_caption.get('file') 127 if not sub_url: 128 continue 129 lang = closed_caption.get('code') or closed_caption.get('language') or 'en' 130 subtitles.setdefault(lang, []).append({ 131 'url': sub_url, 132 }) 133 134 return { 135 'id': video_id, 136 'formats': formats, 137 'title': title, 138 'description': media.get('description'), 139 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'), 140 'duration': int_or_none(media.get('duration')), 141 'tags': media.get('tags'), 142 'subtitles': subtitles, 143 } 144 145 146 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): 147 IE_NAME = 'curiositystream:collection' 148 _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)' 149 _TESTS = [{ 150 'url': 'https://app.curiositystream.com/collection/2', 151 'info_dict': { 152 'id': '2', 153 'title': 'Curious Minds: The Internet', 154 'description': 'How is the internet shaping our lives in the 21st Century?', 155 }, 156 'playlist_mincount': 16, 157 }, { 158 'url': 'https://curiositystream.com/series/2', 159 'only_matching': True, 160 }, { 161 'url': 'https://curiositystream.com/collections/36', 162 'only_matching': True, 163 }] 164 165 def _real_extract(self, url): 166 collection_id = self._match_id(url) 167 collection = self._call_api( 168 'collections/' + collection_id, collection_id) 169 entries = [] 170 for media in collection.get('media', []): 171 media_id = compat_str(media.get('id')) 172 entries.append(self.url_result( 173 'https://curiositystream.com/video/' + media_id, 174 CuriosityStreamIE.ie_key(), media_id)) 175 return self.playlist_result( 176 entries, collection_id, 177 collection.get('title'), collection.get('description'))