youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

skyit.py (8937B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..compat import (
      6     compat_str,
      7     compat_parse_qs,
      8     compat_urllib_parse_urlparse,
      9 )
     10 from ..utils import (
     11     dict_get,
     12     int_or_none,
     13     parse_duration,
     14     unified_timestamp,
     15 )
     16 
     17 
     18 class SkyItPlayerIE(InfoExtractor):
     19     IE_NAME = 'player.sky.it'
     20     _VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
     21     _GEO_BYPASS = False
     22     _DOMAIN = 'sky'
     23     _PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
     24     # http://static.sky.it/static/skyplayer/conf.json
     25     _TOKEN_MAP = {
     26         'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
     27         'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
     28         'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
     29         'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
     30         'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
     31         'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
     32         'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
     33         'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
     34         'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
     35     }
     36 
     37     def _player_url_result(self, video_id):
     38         return self.url_result(
     39             self._PLAYER_TMPL % (video_id, self._DOMAIN),
     40             SkyItPlayerIE.ie_key(), video_id)
     41 
     42     def _parse_video(self, video, video_id):
     43         title = video['title']
     44         is_live = video.get('type') == 'live'
     45         hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
     46         if not hls_url and video.get('geoblock' if is_live else 'geob'):
     47             self.raise_geo_restricted(countries=['IT'])
     48 
     49         if is_live:
     50             formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
     51         else:
     52             formats = self._extract_akamai_formats(
     53                 hls_url, video_id, {'http': 'videoplatform.sky.it'})
     54         self._sort_formats(formats)
     55 
     56         return {
     57             'id': video_id,
     58             'title': self._live_title(title) if is_live else title,
     59             'formats': formats,
     60             'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
     61             'description': video.get('short_desc') or None,
     62             'timestamp': unified_timestamp(video.get('create_date')),
     63             'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
     64             'is_live': is_live,
     65         }
     66 
     67     def _real_extract(self, url):
     68         video_id = self._match_id(url)
     69         domain = compat_parse_qs(compat_urllib_parse_urlparse(
     70             url).query).get('domain', [None])[0]
     71         token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
     72         video = self._download_json(
     73             'https://apid.sky.it/vdp/v1/getVideoData',
     74             video_id, query={
     75                 'caller': 'sky',
     76                 'id': video_id,
     77                 'token': token
     78             }, headers=self.geo_verification_headers())
     79         return self._parse_video(video, video_id)
     80 
     81 
     82 class SkyItVideoIE(SkyItPlayerIE):
     83     IE_NAME = 'video.sky.it'
     84     _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
     85     _TESTS = [{
     86         'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
     87         'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
     88         'info_dict': {
     89             'id': '631227',
     90             'ext': 'mp4',
     91             'title': 'Uomo ucciso da uno squalo in Australia',
     92             'timestamp': 1606036192,
     93             'upload_date': '20201122',
     94         }
     95     }, {
     96         'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
     97         'only_matching': True,
     98     }, {
     99         'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
    100         'only_matching': True,
    101     }]
    102 
    103     def _real_extract(self, url):
    104         video_id = self._match_id(url)
    105         return self._player_url_result(video_id)
    106 
    107 
    108 class SkyItVideoLiveIE(SkyItPlayerIE):
    109     IE_NAME = 'video.sky.it:live'
    110     _VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
    111     _TEST = {
    112         'url': 'https://video.sky.it/diretta/tg24',
    113         'info_dict': {
    114             'id': '1',
    115             'ext': 'mp4',
    116             'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
    117             'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
    118         },
    119         'params': {
    120             # m3u8 download
    121             'skip_download': True,
    122         },
    123     }
    124 
    125     def _real_extract(self, url):
    126         display_id = self._match_id(url)
    127         webpage = self._download_webpage(url, display_id)
    128         asset_id = compat_str(self._parse_json(self._search_regex(
    129             r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
    130             webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
    131         livestream = self._download_json(
    132             'https://apid.sky.it/vdp/v1/getLivestream',
    133             asset_id, query={'id': asset_id})
    134         return self._parse_video(livestream, asset_id)
    135 
    136 
    137 class SkyItIE(SkyItPlayerIE):
    138     IE_NAME = 'sky.it'
    139     _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
    140     _TESTS = [{
    141         'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
    142         'info_dict': {
    143             'id': '631201',
    144             'ext': 'mp4',
    145             'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
    146             'upload_date': '20201121',
    147             'timestamp': 1605995753,
    148         },
    149         'expected_warnings': ['Unable to download f4m manifest'],
    150     }, {
    151         'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
    152         'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
    153         'info_dict': {
    154             'id': '631227',
    155             'ext': 'mp4',
    156             'title': 'Uomo ucciso da uno squalo in Australia',
    157             'timestamp': 1606036192,
    158             'upload_date': '20201122',
    159         },
    160     }]
    161     _VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
    162 
    163     def _real_extract(self, url):
    164         display_id = self._match_id(url)
    165         webpage = self._download_webpage(url, display_id)
    166         video_id = self._search_regex(
    167             self._VIDEO_ID_REGEX, webpage, 'video id')
    168         return self._player_url_result(video_id)
    169 
    170 
    171 class SkyItAcademyIE(SkyItIE):
    172     IE_NAME = 'skyacademy.it'
    173     _VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
    174     _TESTS = [{
    175         'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
    176         'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
    177         'info_dict': {
    178             'id': '523458',
    179             'ext': 'mp4',
    180             'title': 'Sky Academy "The Best CineCamp 2019"',
    181             'timestamp': 1562843784,
    182             'upload_date': '20190711',
    183         }
    184     }]
    185     _DOMAIN = 'skyacademy'
    186     _VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
    187 
    188 
    189 class SkyItArteIE(SkyItIE):
    190     IE_NAME = 'arte.sky.it'
    191     _VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
    192     _TESTS = [{
    193         'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
    194         'md5': '515aee97b87d7a018b6c80727d3e7e17',
    195         'info_dict': {
    196             'id': '627926',
    197             'ext': 'mp4',
    198             'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
    199             'upload_date': '20201106',
    200             'timestamp': 1604664493,
    201         }
    202     }]
    203     _DOMAIN = 'skyarte'
    204     _VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
    205 
    206 
    207 class CieloTVItIE(SkyItIE):
    208     IE_NAME = 'cielotv.it'
    209     _VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
    210     _TESTS = [{
    211         'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
    212         'md5': 'c4deed77552ba901c2a0d9258320304b',
    213         'info_dict': {
    214             'id': '499240',
    215             'ext': 'mp4',
    216             'title': 'Il lunedì è sempre un dramma',
    217             'upload_date': '20190329',
    218             'timestamp': 1553862178,
    219         }
    220     }]
    221     _DOMAIN = 'cielo'
    222     _VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
    223 
    224 
    225 class TV8ItIE(SkyItVideoIE):
    226     IE_NAME = 'tv8.it'
    227     _VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
    228     _TESTS = [{
    229         'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
    230         'md5': '9ab906a3f75ea342ed928442f9dabd21',
    231         'info_dict': {
    232             'id': '630529',
    233             'ext': 'mp4',
    234             'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
    235             'timestamp': 1605721374,
    236             'upload_date': '20201118',
    237         }
    238     }]
    239     _DOMAIN = 'mtv8'