vice.py (12414B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import functools 5 import hashlib 6 import json 7 import random 8 import re 9 import time 10 11 from .adobepass import AdobePassIE 12 from .common import InfoExtractor 13 from .youtube import YoutubeIE 14 from ..compat import ( 15 compat_HTTPError, 16 compat_str, 17 ) 18 from ..utils import ( 19 clean_html, 20 ExtractorError, 21 int_or_none, 22 OnDemandPagedList, 23 parse_age_limit, 24 str_or_none, 25 try_get, 26 ) 27 28 29 class ViceBaseIE(InfoExtractor): 30 def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''): 31 return self._download_json( 32 'https://video.vice.com/api/v1/graphql', resource_id, query={ 33 'query': '''{ 34 %s(locale: "%s", %s: "%s"%s) { 35 %s 36 } 37 }''' % (resource, locale, resource_key, resource_id, args, fields), 38 })['data'][resource] 39 40 41 class ViceIE(ViceBaseIE, AdobePassIE): 42 IE_NAME = 'vice' 43 _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})' 44 _TESTS = [{ 45 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7', 46 'info_dict': { 47 'id': '58c69e38a55424f1227dc3f7', 48 'ext': 'mp4', 49 'title': '10 Questions You Always Wanted To Ask: Pet Cremator', 50 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5', 51 'uploader': 'vice', 52 'uploader_id': '57a204088cb727dec794c67b', 53 'timestamp': 1489664942, 54 'upload_date': '20170316', 55 'age_limit': 14, 56 }, 57 'params': { 58 # m3u8 download 59 'skip_download': True, 60 }, 61 }, { 62 # geo restricted to US 63 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', 64 'info_dict': { 65 'id': '5816510690b70e6c5fd39a56', 66 'ext': 'mp4', 67 'uploader': 'vice', 68 'title': 'The Signal From Tölva', 69 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5', 70 'uploader_id': '57a204088cb727dec794c67b', 71 'timestamp': 1477941983, 72 'upload_date': '20161031', 73 }, 74 'params': { 75 # m3u8 download 76 'skip_download': True, 77 }, 78 }, { 79 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f', 80 'info_dict': { 81 'id': '581b12b60a0e1f4c0fb6ea2f', 82 'ext': 'mp4', 83 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1', 84 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.', 85 'uploader': 'vice', 86 'uploader_id': '57a204088cb727dec794c67b', 87 'timestamp': 1485368119, 88 'upload_date': '20170125', 89 'age_limit': 14, 90 }, 91 'params': { 92 # AES-encrypted m3u8 93 'skip_download': True, 94 }, 95 }, { 96 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', 97 'only_matching': True, 98 }, { 99 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060', 100 'only_matching': True, 101 }, { 102 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7', 103 'only_matching': True, 104 }, { 105 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1', 106 'only_matching': True, 107 }] 108 109 @staticmethod 110 def _extract_urls(webpage): 111 return re.findall( 112 r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})', 113 webpage) 114 115 @staticmethod 116 def _extract_url(webpage): 117 urls = ViceIE._extract_urls(webpage) 118 return urls[0] if urls else None 119 120 def _real_extract(self, url): 121 locale, video_id = re.match(self._VALID_URL, url).groups() 122 123 video = self._call_api('videos', 'id', video_id, locale, '''body 124 locked 125 rating 126 thumbnail_url 127 title''')[0] 128 title = video['title'].strip() 129 rating = video.get('rating') 130 131 query = {} 132 if video.get('locked'): 133 resource = self._get_mvpd_resource( 134 'VICELAND', title, video_id, rating) 135 query['tvetoken'] = self._extract_mvpd_auth( 136 url, video_id, 'VICELAND', resource) 137 138 # signature generation algorithm is reverse engineered from signatureGenerator in 139 # webpack:///../shared/~/vice-player/dist/js/vice-player.js in 140 # https://www.viceland.com/assets/common/js/web.vendor.bundle.js 141 # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js 142 exp = int(time.time()) + 1440 143 144 query.update({ 145 'exp': exp, 146 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), 147 'skipadstitching': 1, 148 'platform': 'desktop', 149 'rn': random.randint(10000, 100000), 150 }) 151 152 try: 153 preplay = self._download_json( 154 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id), 155 video_id, query=query) 156 except ExtractorError as e: 157 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401): 158 error = json.loads(e.cause.read().decode()) 159 error_message = error.get('error_description') or error['details'] 160 raise ExtractorError('%s said: %s' % ( 161 self.IE_NAME, error_message), expected=True) 162 raise 163 164 video_data = preplay['video'] 165 formats = self._extract_m3u8_formats( 166 preplay['playURL'], video_id, 'mp4', 'm3u8_native') 167 self._sort_formats(formats) 168 episode = video_data.get('episode') or {} 169 channel = video_data.get('channel') or {} 170 season = video_data.get('season') or {} 171 172 subtitles = {} 173 for subtitle in preplay.get('subtitleURLs', []): 174 cc_url = subtitle.get('url') 175 if not cc_url: 176 continue 177 language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en' 178 subtitles.setdefault(language_code, []).append({ 179 'url': cc_url, 180 }) 181 182 return { 183 'formats': formats, 184 'id': video_id, 185 'title': title, 186 'description': clean_html(video.get('body')), 187 'thumbnail': video.get('thumbnail_url'), 188 'duration': int_or_none(video_data.get('video_duration')), 189 'timestamp': int_or_none(video_data.get('created_at'), 1000), 190 'age_limit': parse_age_limit(video_data.get('video_rating') or rating), 191 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str), 192 'episode_number': int_or_none(episode.get('episode_number')), 193 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), 194 'season_number': int_or_none(season.get('season_number')), 195 'season_id': str_or_none(season.get('id') or video_data.get('season_id')), 196 'uploader': channel.get('name'), 197 'uploader_id': str_or_none(channel.get('id')), 198 'subtitles': subtitles, 199 } 200 201 202 class ViceShowIE(ViceBaseIE): 203 IE_NAME = 'vice:show' 204 _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)' 205 _PAGE_SIZE = 25 206 _TESTS = [{ 207 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious', 208 'info_dict': { 209 'id': '57a2040c8cb727dec794c901', 210 'title': 'F*ck, That’s Delicious', 211 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.', 212 }, 213 'playlist_mincount': 64, 214 }, { 215 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious', 216 'only_matching': True, 217 }] 218 219 def _fetch_page(self, locale, show_id, page): 220 videos = self._call_api('videos', 'show_id', show_id, locale, '''body 221 id 222 url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE)) 223 for video in videos: 224 yield self.url_result( 225 video['url'], ViceIE.ie_key(), video.get('id')) 226 227 def _real_extract(self, url): 228 locale, display_id = re.match(self._VALID_URL, url).groups() 229 show = self._call_api('shows', 'slug', display_id, locale, '''dek 230 id 231 title''')[0] 232 show_id = show['id'] 233 234 entries = OnDemandPagedList( 235 functools.partial(self._fetch_page, locale, show_id), 236 self._PAGE_SIZE) 237 238 return self.playlist_result( 239 entries, show_id, show.get('title'), show.get('dek')) 240 241 242 class ViceArticleIE(ViceBaseIE): 243 IE_NAME = 'vice:article' 244 _VALID_URL = r'https://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)' 245 246 _TESTS = [{ 247 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', 248 'info_dict': { 249 'id': '58dc0a3dee202d2a0ccfcbd8', 250 'ext': 'mp4', 251 'title': 'Mormon War on Porn', 252 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf', 253 'uploader': 'vice', 254 'uploader_id': '57a204088cb727dec794c67b', 255 'timestamp': 1491883129, 256 'upload_date': '20170411', 257 'age_limit': 17, 258 }, 259 'params': { 260 # AES-encrypted m3u8 261 'skip_download': True, 262 }, 263 'add_ie': [ViceIE.ie_key()], 264 }, { 265 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car', 266 'md5': '13010ee0bc694ea87ec40724397c2349', 267 'info_dict': { 268 'id': '3jstaBeXgAs', 269 'ext': 'mp4', 270 'title': 'How to Hack a Car: Phreaked Out (Episode 2)', 271 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30', 272 'uploader': 'Motherboard', 273 'uploader_id': 'MotherboardTV', 274 'upload_date': '20140529', 275 }, 276 'add_ie': [YoutubeIE.ie_key()], 277 }, { 278 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded', 279 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 280 'info_dict': { 281 'id': '57f41d3556a0a80f54726060', 282 'ext': 'mp4', 283 'title': "Making The World's First Male Sex Doll", 284 'description': 'md5:19b00b215b99961cf869c40fbe9df755', 285 'uploader': 'vice', 286 'uploader_id': '57a204088cb727dec794c67b', 287 'timestamp': 1476919911, 288 'upload_date': '20161019', 289 'age_limit': 17, 290 }, 291 'params': { 292 'skip_download': True, 293 'format': 'bestvideo', 294 }, 295 'add_ie': [ViceIE.ie_key()], 296 }, { 297 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1', 298 'only_matching': True, 299 }, { 300 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229', 301 'only_matching': True, 302 }] 303 304 def _real_extract(self, url): 305 locale, display_id = re.match(self._VALID_URL, url).groups() 306 307 article = self._call_api('articles', 'slug', display_id, locale, '''body 308 embed_code''')[0] 309 body = article['body'] 310 311 def _url_res(video_url, ie_key): 312 return { 313 '_type': 'url_transparent', 314 'url': video_url, 315 'display_id': display_id, 316 'ie_key': ie_key, 317 } 318 319 vice_url = ViceIE._extract_url(body) 320 if vice_url: 321 return _url_res(vice_url, ViceIE.ie_key()) 322 323 embed_code = self._search_regex( 324 r'embedCode=([^&\'"]+)', body, 325 'ooyala embed code', default=None) 326 if embed_code: 327 return _url_res('ooyala:%s' % embed_code, 'Ooyala') 328 329 youtube_url = YoutubeIE._extract_url(body) 330 if youtube_url: 331 return _url_res(youtube_url, YoutubeIE.ie_key()) 332 333 video_url = self._html_search_regex( 334 r'data-video-url="([^"]+)"', 335 article['embed_code'], 'video URL') 336 337 return _url_res(video_url, ViceIE.ie_key())