sendtonews.py (3833B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 float_or_none, 9 parse_iso8601, 10 update_url_query, 11 int_or_none, 12 determine_protocol, 13 unescapeHTML, 14 ) 15 16 17 class SendtoNewsIE(InfoExtractor): 18 _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)' 19 20 _TEST = { 21 # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ 22 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', 23 'info_dict': { 24 'id': 'GxfCe0Zo7D-175909-5588' 25 }, 26 'playlist_count': 8, 27 # test the first video only to prevent lengthy tests 28 'playlist': [{ 29 'info_dict': { 30 'id': '240385', 31 'ext': 'mp4', 32 'title': 'Indians introduce Encarnacion', 33 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland', 34 'duration': 137.898, 35 'thumbnail': r're:https?://.*\.jpg$', 36 'upload_date': '20170105', 37 'timestamp': 1483649762, 38 }, 39 }], 40 'params': { 41 # m3u8 download 42 'skip_download': True, 43 }, 44 } 45 46 _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' 47 48 @classmethod 49 def _extract_url(cls, webpage): 50 mobj = re.search(r'''(?x)<script[^>]+src=([\'"]) 51 (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? 52 .*\bSC=(?P<SC>[0-9a-zA-Z-]+).* 53 \1>''', webpage) 54 if mobj: 55 sc = mobj.group('SC') 56 return cls._URL_TEMPLATE % sc 57 58 def _real_extract(self, url): 59 playlist_id = self._match_id(url) 60 61 data_url = update_url_query( 62 url.replace('embedplayer.php', 'data_read.php'), 63 {'cmd': 'loadInitial'}) 64 playlist_data = self._download_json(data_url, playlist_id) 65 66 entries = [] 67 for video in playlist_data['playlistData'][0]: 68 info_dict = self._parse_jwplayer_data( 69 video['jwconfiguration'], 70 require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True}) 71 72 for f in info_dict['formats']: 73 if f.get('tbr'): 74 continue 75 tbr = int_or_none(self._search_regex( 76 r'/(\d+)k/', f['url'], 'bitrate', default=None)) 77 if not tbr: 78 continue 79 f.update({ 80 'format_id': '%s-%d' % (determine_protocol(f), tbr), 81 'tbr': tbr, 82 }) 83 self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id')) 84 85 thumbnails = [] 86 if video.get('thumbnailUrl'): 87 thumbnails.append({ 88 'id': 'normal', 89 'url': video['thumbnailUrl'], 90 }) 91 if video.get('smThumbnailUrl'): 92 thumbnails.append({ 93 'id': 'small', 94 'url': video['smThumbnailUrl'], 95 }) 96 info_dict.update({ 97 'title': video['S_headLine'].strip(), 98 'description': unescapeHTML(video.get('S_fullStory')), 99 'thumbnails': thumbnails, 100 'duration': float_or_none(video.get('SM_length')), 101 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), 102 }) 103 entries.append(info_dict) 104 105 return self.playlist_result(entries, playlist_id)