gaia.py (4700B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import ( 8 compat_str, 9 compat_urllib_parse_unquote, 10 ) 11 from ..utils import ( 12 ExtractorError, 13 int_or_none, 14 str_or_none, 15 strip_or_none, 16 try_get, 17 urlencode_postdata, 18 ) 19 20 21 class GaiaIE(InfoExtractor): 22 _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)' 23 _TESTS = [{ 24 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature', 25 'info_dict': { 26 'id': '89356', 27 'ext': 'mp4', 28 'title': 'Connecting with Universal Consciousness', 29 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f', 30 'upload_date': '20151116', 31 'timestamp': 1447707266, 32 'duration': 936, 33 }, 34 'params': { 35 # m3u8 download 36 'skip_download': True, 37 }, 38 }, { 39 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview', 40 'info_dict': { 41 'id': '89351', 42 'ext': 'mp4', 43 'title': 'Connecting with Universal Consciousness', 44 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f', 45 'upload_date': '20151116', 46 'timestamp': 1447707266, 47 'duration': 53, 48 }, 49 'params': { 50 # m3u8 download 51 'skip_download': True, 52 }, 53 }] 54 _NETRC_MACHINE = 'gaia' 55 _jwt = None 56 57 def _real_initialize(self): 58 auth = self._get_cookies('https://www.gaia.com/').get('auth') 59 if auth: 60 auth = self._parse_json( 61 compat_urllib_parse_unquote(auth.value), 62 None, fatal=False) 63 if not auth: 64 username, password = self._get_login_info() 65 if username is None: 66 return 67 auth = self._download_json( 68 'https://auth.gaia.com/v1/login', 69 None, data=urlencode_postdata({ 70 'username': username, 71 'password': password 72 })) 73 if auth.get('success') is False: 74 raise ExtractorError(', '.join(auth['messages']), expected=True) 75 if auth: 76 self._jwt = auth.get('jwt') 77 78 def _real_extract(self, url): 79 display_id, vtype = re.search(self._VALID_URL, url).groups() 80 node_id = self._download_json( 81 'https://brooklyn.gaia.com/pathinfo', display_id, query={ 82 'path': 'video/' + display_id, 83 })['id'] 84 node = self._download_json( 85 'https://brooklyn.gaia.com/node/%d' % node_id, node_id) 86 vdata = node[vtype] 87 media_id = compat_str(vdata['nid']) 88 title = node['title'] 89 90 headers = None 91 if self._jwt: 92 headers = {'Authorization': 'Bearer ' + self._jwt} 93 media = self._download_json( 94 'https://brooklyn.gaia.com/media/' + media_id, 95 media_id, headers=headers) 96 formats = self._extract_m3u8_formats( 97 media['mediaUrls']['bcHLS'], media_id, 'mp4') 98 self._sort_formats(formats) 99 100 subtitles = {} 101 text_tracks = media.get('textTracks', {}) 102 for key in ('captions', 'subtitles'): 103 for lang, sub_url in text_tracks.get(key, {}).items(): 104 subtitles.setdefault(lang, []).append({ 105 'url': sub_url, 106 }) 107 108 fivestar = node.get('fivestar', {}) 109 fields = node.get('fields', {}) 110 111 def get_field_value(key, value_key='value'): 112 return try_get(fields, lambda x: x[key][0][value_key]) 113 114 return { 115 'id': media_id, 116 'display_id': display_id, 117 'title': title, 118 'formats': formats, 119 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')), 120 'timestamp': int_or_none(node.get('created')), 121 'subtitles': subtitles, 122 'duration': int_or_none(vdata.get('duration')), 123 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])), 124 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])), 125 'comment_count': int_or_none(node.get('comment_count')), 126 'series': try_get(node, lambda x: x['series']['title'], compat_str), 127 'season_number': int_or_none(get_field_value('season')), 128 'season_id': str_or_none(get_field_value('series_nid', 'nid')), 129 'episode_number': int_or_none(get_field_value('episode')), 130 }