discoverygo.py (6079B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 determine_ext, 8 extract_attributes, 9 ExtractorError, 10 int_or_none, 11 parse_age_limit, 12 remove_end, 13 unescapeHTML, 14 url_or_none, 15 ) 16 17 18 class DiscoveryGoBaseIE(InfoExtractor): 19 _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: 20 discovery| 21 investigationdiscovery| 22 discoverylife| 23 animalplanet| 24 ahctv| 25 destinationamerica| 26 sciencechannel| 27 tlc| 28 velocitychannel 29 )go\.com/%s(?P<id>[^/?#&]+)''' 30 31 def _extract_video_info(self, video, stream, display_id): 32 title = video['name'] 33 34 if not stream: 35 if video.get('authenticated') is True: 36 raise ExtractorError( 37 'This video is only available via cable service provider subscription that' 38 ' is not currently supported. You may want to use --cookies.', expected=True) 39 else: 40 raise ExtractorError('Unable to find stream') 41 STREAM_URL_SUFFIX = 'streamUrl' 42 formats = [] 43 for stream_kind in ('', 'hds'): 44 suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX 45 stream_url = stream.get('%s%s' % (stream_kind, suffix)) 46 if not stream_url: 47 continue 48 if stream_kind == '': 49 formats.extend(self._extract_m3u8_formats( 50 stream_url, display_id, 'mp4', entry_protocol='m3u8_native', 51 m3u8_id='hls', fatal=False)) 52 elif stream_kind == 'hds': 53 formats.extend(self._extract_f4m_formats( 54 stream_url, display_id, f4m_id=stream_kind, fatal=False)) 55 self._sort_formats(formats) 56 57 video_id = video.get('id') or display_id 58 description = video.get('description', {}).get('detailed') 59 duration = int_or_none(video.get('duration')) 60 61 series = video.get('show', {}).get('name') 62 season_number = int_or_none(video.get('season', {}).get('number')) 63 episode_number = int_or_none(video.get('episodeNumber')) 64 65 tags = video.get('tags') 66 age_limit = parse_age_limit(video.get('parental', {}).get('rating')) 67 68 subtitles = {} 69 captions = stream.get('captions') 70 if isinstance(captions, list): 71 for caption in captions: 72 subtitle_url = url_or_none(caption.get('fileUrl')) 73 if not subtitle_url or not subtitle_url.startswith('http'): 74 continue 75 lang = caption.get('fileLang', 'en') 76 ext = determine_ext(subtitle_url) 77 subtitles.setdefault(lang, []).append({ 78 'url': subtitle_url, 79 'ext': 'ttml' if ext == 'xml' else ext, 80 }) 81 82 return { 83 'id': video_id, 84 'display_id': display_id, 85 'title': title, 86 'description': description, 87 'duration': duration, 88 'series': series, 89 'season_number': season_number, 90 'episode_number': episode_number, 91 'tags': tags, 92 'age_limit': age_limit, 93 'formats': formats, 94 'subtitles': subtitles, 95 } 96 97 98 class DiscoveryGoIE(DiscoveryGoBaseIE): 99 _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' 100 _GEO_COUNTRIES = ['US'] 101 _TEST = { 102 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', 103 'info_dict': { 104 'id': '58c167d86b66d12f2addeb01', 105 'ext': 'mp4', 106 'title': 'Reaper Madness', 107 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', 108 'duration': 2519, 109 'series': 'Bering Sea Gold', 110 'season_number': 8, 111 'episode_number': 6, 112 'age_limit': 14, 113 }, 114 } 115 116 def _real_extract(self, url): 117 display_id = self._match_id(url) 118 119 webpage = self._download_webpage(url, display_id) 120 121 container = extract_attributes( 122 self._search_regex( 123 r'(<div[^>]+class=["\']video-player-container[^>]+>)', 124 webpage, 'video container')) 125 126 video = self._parse_json( 127 container.get('data-video') or container.get('data-json'), 128 display_id) 129 130 stream = video.get('stream') 131 132 return self._extract_video_info(video, stream, display_id) 133 134 135 class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): 136 _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' 137 _TEST = { 138 'url': 'https://www.discoverygo.com/bering-sea-gold/', 139 'info_dict': { 140 'id': 'bering-sea-gold', 141 'title': 'Bering Sea Gold', 142 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', 143 }, 144 'playlist_mincount': 6, 145 } 146 147 @classmethod 148 def suitable(cls, url): 149 return False if DiscoveryGoIE.suitable(url) else super( 150 DiscoveryGoPlaylistIE, cls).suitable(url) 151 152 def _real_extract(self, url): 153 display_id = self._match_id(url) 154 155 webpage = self._download_webpage(url, display_id) 156 157 entries = [] 158 for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage): 159 data = self._parse_json( 160 mobj.group('json'), display_id, 161 transform_source=unescapeHTML, fatal=False) 162 if not isinstance(data, dict) or data.get('type') != 'episode': 163 continue 164 episode_url = data.get('socialUrl') 165 if not episode_url: 166 continue 167 entries.append(self.url_result( 168 episode_url, ie=DiscoveryGoIE.ie_key(), 169 video_id=data.get('id'))) 170 171 return self.playlist_result( 172 entries, display_id, 173 remove_end(self._og_search_title( 174 webpage, fatal=False), ' | Discovery GO'), 175 self._og_search_description(webpage))