kinja.py (8568B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import ( 8 compat_str, 9 compat_urllib_parse_unquote, 10 ) 11 from ..utils import ( 12 int_or_none, 13 parse_iso8601, 14 strip_or_none, 15 try_get, 16 unescapeHTML, 17 urljoin, 18 ) 19 20 21 class KinjaEmbedIE(InfoExtractor): 22 IENAME = 'kinja:embed' 23 _DOMAIN_REGEX = r'''(?:[^.]+\.)? 24 (?: 25 avclub| 26 clickhole| 27 deadspin| 28 gizmodo| 29 jalopnik| 30 jezebel| 31 kinja| 32 kotaku| 33 lifehacker| 34 splinternews| 35 the(?:inventory|onion|root|takeout) 36 )\.com''' 37 _COMMON_REGEX = r'''/ 38 (?: 39 ajax/inset| 40 embed/video 41 )/iframe\?.*?\bid=''' 42 _VALID_URL = r'''(?x)https?://%s%s 43 (?P<type> 44 fb| 45 imgur| 46 instagram| 47 jwp(?:layer)?-video| 48 kinjavideo| 49 mcp| 50 megaphone| 51 ooyala| 52 soundcloud(?:-playlist)?| 53 tumblr-post| 54 twitch-stream| 55 twitter| 56 ustream-channel| 57 vimeo| 58 vine| 59 youtube-(?:list|video) 60 )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) 61 _TESTS = [{ 62 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', 63 'only_matching': True, 64 }, { 65 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', 66 'only_matching': True, 67 }, { 68 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', 69 'only_matching': True, 70 }, { 71 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', 72 'only_matching': True, 73 }, { 74 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', 75 'only_matching': True, 76 }, { 77 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', 78 'only_matching': True, 79 }, { 80 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', 81 'only_matching': True, 82 }, { 83 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', 84 'only_matching': True, 85 }, { 86 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', 87 'only_matching': True, 88 }, { 89 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', 90 'only_matching': True, 91 }, { 92 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', 93 'only_matching': True, 94 }, { 95 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', 96 'only_matching': True, 97 }, { 98 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', 99 'only_matching': True, 100 }, { 101 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', 102 'only_matching': True, 103 }] 104 _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') 105 _PROVIDER_MAP = { 106 'fb': ('facebook.com/video.php?v=', 'Facebook'), 107 'imgur': ('imgur.com/', 'Imgur'), 108 'instagram': ('instagram.com/p/', 'Instagram'), 109 'jwplayer-video': _JWPLATFORM_PROVIDER, 110 'jwp-video': _JWPLATFORM_PROVIDER, 111 'megaphone': ('player.megaphone.fm/', 'Generic'), 112 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), 113 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), 114 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), 115 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), 116 'twitch-stream': ('twitch.tv/', 'TwitchStream'), 117 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), 118 'ustream-channel': ('ustream.tv/embed/', 'Ustream'), 119 'vimeo': ('vimeo.com/', 'Vimeo'), 120 'vine': ('vine.co/v/', 'Vine'), 121 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), 122 'youtube-video': ('youtube.com/embed/', 'Youtube'), 123 } 124 125 @staticmethod 126 def _extract_urls(webpage, url): 127 return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer( 128 r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX), 129 webpage)] 130 131 def _real_extract(self, url): 132 video_type, video_id = re.match(self._VALID_URL, url).groups() 133 134 provider = self._PROVIDER_MAP.get(video_type) 135 if provider: 136 video_id = compat_urllib_parse_unquote(video_id) 137 if video_type == 'tumblr-post': 138 video_id, blog = video_id.split('-', 1) 139 result_url = provider[0] % (blog, video_id) 140 elif video_type == 'youtube-list': 141 video_id, playlist_id = video_id.split('/') 142 result_url = provider[0] % (video_id, playlist_id) 143 else: 144 if video_type == 'ooyala': 145 video_id = video_id.split('/')[0] 146 result_url = provider[0] + video_id 147 return self.url_result('http://' + result_url, provider[1]) 148 149 if video_type == 'kinjavideo': 150 data = self._download_json( 151 'https://kinja.com/api/core/video/views/videoById', 152 video_id, query={'videoId': video_id})['data'] 153 title = data['title'] 154 155 formats = [] 156 for k in ('signedPlaylist', 'streaming'): 157 m3u8_url = data.get(k + 'Url') 158 if m3u8_url: 159 formats.extend(self._extract_m3u8_formats( 160 m3u8_url, video_id, 'mp4', 'm3u8_native', 161 m3u8_id='hls', fatal=False)) 162 self._sort_formats(formats) 163 164 thumbnail = None 165 poster = data.get('poster') or {} 166 poster_id = poster.get('id') 167 if poster_id: 168 thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') 169 170 return { 171 'id': video_id, 172 'title': title, 173 'description': strip_or_none(data.get('description')), 174 'formats': formats, 175 'tags': data.get('tags'), 176 'timestamp': int_or_none(try_get( 177 data, lambda x: x['postInfo']['publishTimeMillis']), 1000), 178 'thumbnail': thumbnail, 179 'uploader': data.get('network'), 180 } 181 else: 182 video_data = self._download_json( 183 'https://api.vmh.univision.com/metadata/v1/content/' + video_id, 184 video_id)['videoMetadata'] 185 iptc = video_data['photoVideoMetadataIPTC'] 186 title = iptc['title']['en'] 187 fmg = video_data.get('photoVideoMetadata_fmg') or {} 188 tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' 189 data = self._download_json( 190 tvss_domain + '/api/v3/video-auth/url-signature-tokens', 191 video_id, query={'mcpids': video_id})['data'][0] 192 formats = [] 193 194 rendition_url = data.get('renditionUrl') 195 if rendition_url: 196 formats = self._extract_m3u8_formats( 197 rendition_url, video_id, 'mp4', 198 'm3u8_native', m3u8_id='hls', fatal=False) 199 200 fallback_rendition_url = data.get('fallbackRenditionUrl') 201 if fallback_rendition_url: 202 formats.append({ 203 'format_id': 'fallback', 204 'tbr': int_or_none(self._search_regex( 205 r'_(\d+)\.mp4', fallback_rendition_url, 206 'bitrate', default=None)), 207 'url': fallback_rendition_url, 208 }) 209 210 self._sort_formats(formats) 211 212 return { 213 'id': video_id, 214 'title': title, 215 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), 216 'uploader': fmg.get('network'), 217 'duration': int_or_none(iptc.get('fileDuration')), 218 'formats': formats, 219 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), 220 'timestamp': parse_iso8601(iptc.get('dateReleased')), 221 }