sky.py (5418B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 extract_attributes, 9 smuggle_url, 10 strip_or_none, 11 urljoin, 12 ) 13 14 15 class SkyBaseIE(InfoExtractor): 16 BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' 17 _SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)' 18 19 def _process_ooyala_element(self, webpage, sdc_el, url): 20 sdc = extract_attributes(sdc_el) 21 provider = sdc.get('data-provider') 22 if provider == 'ooyala': 23 video_id = sdc['data-sdc-video-id'] 24 video_url = 'ooyala:%s' % video_id 25 ie_key = 'Ooyala' 26 ooyala_el = self._search_regex( 27 r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id, 28 webpage, 'video data', fatal=False) 29 if ooyala_el: 30 ooyala_attrs = extract_attributes(ooyala_el) or {} 31 if ooyala_attrs.get('data-token-required') == 'true': 32 token_fetch_url = (self._parse_json(ooyala_attrs.get( 33 'data-token-fetch-options', '{}'), 34 video_id, fatal=False) or {}).get('url') 35 if token_fetch_url: 36 embed_token = self._download_json(urljoin( 37 url, token_fetch_url), video_id, fatal=False) 38 if embed_token: 39 video_url = smuggle_url( 40 video_url, {'embed_token': embed_token}) 41 elif provider == 'brightcove': 42 video_id = sdc['data-video-id'] 43 account_id = sdc.get('data-account-id') or '6058004172001' 44 player_id = sdc.get('data-player-id') or 'RC9PQUaJ6' 45 video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id) 46 ie_key = 'BrightcoveNew' 47 48 return { 49 '_type': 'url_transparent', 50 'id': video_id, 51 'url': video_url, 52 'ie_key': ie_key, 53 } 54 55 def _real_extract(self, url): 56 video_id = self._match_id(url) 57 webpage = self._download_webpage(url, video_id) 58 info = self._process_ooyala_element(webpage, self._search_regex( 59 self._SDC_EL_REGEX, webpage, 'sdc element'), url) 60 info.update({ 61 'title': self._og_search_title(webpage), 62 'description': strip_or_none(self._og_search_description(webpage)), 63 }) 64 return info 65 66 67 class SkySportsIE(SkyBaseIE): 68 IE_NAME = 'sky:sports' 69 _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)' 70 _TESTS = [{ 71 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', 72 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec', 73 'info_dict': { 74 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ', 75 'ext': 'mp4', 76 'title': 'Bale: It\'s our time to shine', 77 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', 78 }, 79 'add_ie': ['Ooyala'], 80 }, { 81 'url': 'https://www.skysports.com/watch/video/sports/f1/12160544/abu-dhabi-gp-the-notebook', 82 'only_matching': True, 83 }, { 84 'url': 'https://www.skysports.com/watch/video/tv-shows/12118508/rainford-brent-how-ace-programme-helps', 85 'only_matching': True, 86 }] 87 88 89 class SkyNewsIE(SkyBaseIE): 90 IE_NAME = 'sky:news' 91 _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)' 92 _TEST = { 93 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962', 94 'md5': '411e8893fd216c75eaf7e4c65d364115', 95 'info_dict': { 96 'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM', 97 'ext': 'mp4', 98 'title': 'Russian plane inspected after deadly fire', 99 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.', 100 'uploader_id': '6058004172001', 101 'timestamp': 1567112345, 102 'upload_date': '20190829', 103 }, 104 'add_ie': ['BrightcoveNew'], 105 } 106 107 108 class SkySportsNewsIE(SkyBaseIE): 109 IE_NAME = 'sky:sports:news' 110 _VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)' 111 _TEST = { 112 'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass', 113 'info_dict': { 114 'id': '10871916', 115 'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass', 116 'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.', 117 }, 118 'playlist_count': 2, 119 } 120 121 def _real_extract(self, url): 122 article_id = self._match_id(url) 123 webpage = self._download_webpage(url, article_id) 124 125 entries = [] 126 for sdc_el in re.findall(self._SDC_EL_REGEX, webpage): 127 entries.append(self._process_ooyala_element(webpage, sdc_el, url)) 128 129 return self.playlist_result( 130 entries, article_id, self._og_search_title(webpage), 131 self._html_search_meta(['og:description', 'description'], webpage))