tmz.py (4382B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from .jwplatform import JWPlatformIE 6 from .kaltura import KalturaIE 7 from ..utils import ( 8 int_or_none, 9 unified_timestamp, 10 ) 11 12 13 class TMZIE(InfoExtractor): 14 _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#&]+)' 15 _TESTS = [{ 16 'url': 'http://www.tmz.com/videos/0-cegprt2p/', 17 'md5': '31f9223e20eef55954973359afa61a20', 18 'info_dict': { 19 'id': 'P6YjLBLk', 20 'ext': 'mp4', 21 'title': "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet", 22 'description': 'md5:b714359fc18607715ebccbd2da8ff488', 23 'timestamp': 1467831837, 24 'upload_date': '20160706', 25 }, 26 'add_ie': [JWPlatformIE.ie_key()], 27 }, { 28 'url': 'http://www.tmz.com/videos/0_okj015ty/', 29 'only_matching': True, 30 }, { 31 'url': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/', 32 'only_matching': True, 33 }, { 34 'url': 'https://www.tmz.com/videos/2021-02-19-021921-floyd-mayweather-1043872/', 35 'only_matching': True, 36 }] 37 38 def _real_extract(self, url): 39 video_id = self._match_id(url).replace('-', '_') 40 41 webpage = self._download_webpage(url, video_id, fatal=False) 42 if webpage: 43 tmz_video_id = self._search_regex( 44 r'nodeRef\s*:\s*["\']tmz:video:([\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12})', 45 webpage, 'video id', default=None) 46 video = self._download_json( 47 'https://www.tmz.com/_/video/%s' % tmz_video_id, video_id, 48 fatal=False) 49 if video: 50 message = video['message'] 51 info = { 52 '_type': 'url_transparent', 53 'title': message.get('title'), 54 'description': message.get('description'), 55 'timestamp': unified_timestamp(message.get('published_at')), 56 'duration': int_or_none(message.get('duration')), 57 } 58 jwplatform_id = message.get('jwplayer_media_id') 59 if jwplatform_id: 60 info.update({ 61 'url': 'jwplatform:%s' % jwplatform_id, 62 'ie_key': JWPlatformIE.ie_key(), 63 }) 64 else: 65 kaltura_entry_id = message.get('kaltura_entry_id') or video_id 66 kaltura_partner_id = message.get('kaltura_partner_id') or '591531' 67 info.update({ 68 'url': 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), 69 'ie_key': KalturaIE.ie_key(), 70 }) 71 return info 72 73 return self.url_result( 74 'kaltura:591531:%s' % video_id, KalturaIE.ie_key(), video_id) 75 76 77 class TMZArticleIE(InfoExtractor): 78 _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/?#&]+)' 79 _TEST = { 80 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', 81 'info_dict': { 82 'id': 'PAKZa97W', 83 'ext': 'mp4', 84 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', 85 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', 86 'timestamp': 1429466400, 87 'upload_date': '20150419', 88 }, 89 'params': { 90 'skip_download': True, 91 }, 92 'add_ie': [JWPlatformIE.ie_key()], 93 } 94 95 def _real_extract(self, url): 96 video_id = self._match_id(url) 97 98 webpage = self._download_webpage(url, video_id) 99 100 tmz_url = self._search_regex( 101 r'clickLink\s*\(\s*["\'](?P<url>%s)' % TMZIE._VALID_URL, webpage, 102 'video id', default=None, group='url') 103 if tmz_url: 104 return self.url_result(tmz_url, ie=TMZIE.ie_key()) 105 106 embedded_video_info = self._parse_json(self._html_search_regex( 107 r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'), 108 video_id) 109 return self.url_result( 110 'http://www.tmz.com/videos/%s/' % embedded_video_info['id'], 111 ie=TMZIE.ie_key())