youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

internazionale.py (3328B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import unified_timestamp
      6 
      7 
      8 class InternazionaleIE(InfoExtractor):
      9     _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     10     _TESTS = [{
     11         'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
     12         'md5': '3e39d32b66882c1218e305acbf8348ca',
     13         'info_dict': {
     14             'id': '265968',
     15             'display_id': 'richard-linklater-racconta-una-scena-di-boyhood',
     16             'ext': 'mp4',
     17             'title': 'Richard Linklater racconta una scena di Boyhood',
     18             'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
     19             'timestamp': 1424354635,
     20             'upload_date': '20150219',
     21             'thumbnail': r're:^https?://.*\.jpg$',
     22         },
     23         'params': {
     24             'format': 'bestvideo',
     25         },
     26     }, {
     27         'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi',
     28         'md5': '9db8663704cab73eb972d1cee0082c79',
     29         'info_dict': {
     30             'id': '761344',
     31             'display_id': 'telefono-stare-con-noi-stessi',
     32             'ext': 'mp4',
     33             'title': 'Usiamo il telefono per evitare di stare con noi stessi',
     34             'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44',
     35             'timestamp': 1535528954,
     36             'upload_date': '20180829',
     37             'thumbnail': r're:^https?://.*\.jpg$',
     38         },
     39         'params': {
     40             'format': 'bestvideo',
     41         },
     42     }]
     43 
     44     def _real_extract(self, url):
     45         display_id = self._match_id(url)
     46 
     47         webpage = self._download_webpage(url, display_id)
     48 
     49         DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1'
     50 
     51         title = self._search_regex(
     52             DATA_RE % 'video-title', webpage, 'title', default=None,
     53             group='value') or self._og_search_title(webpage)
     54 
     55         video_id = self._search_regex(
     56             DATA_RE % 'job-id', webpage, 'video id', group='value')
     57         video_path = self._search_regex(
     58             DATA_RE % 'video-path', webpage, 'video path', group='value')
     59         video_available_abroad = self._search_regex(
     60             DATA_RE % 'video-available_abroad', webpage,
     61             'video available aboard', default='1', group='value')
     62         video_available_abroad = video_available_abroad == '1'
     63 
     64         video_base = 'https://video%s.internazionale.it/%s/%s.' % \
     65             ('' if video_available_abroad else '-ita', video_path, video_id)
     66 
     67         formats = self._extract_m3u8_formats(
     68             video_base + 'm3u8', display_id, 'mp4',
     69             entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
     70         formats.extend(self._extract_mpd_formats(
     71             video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
     72         self._sort_formats(formats)
     73 
     74         timestamp = unified_timestamp(self._html_search_meta(
     75             'article:published_time', webpage, 'timestamp'))
     76 
     77         return {
     78             'id': video_id,
     79             'display_id': display_id,
     80             'title': title,
     81             'thumbnail': self._og_search_thumbnail(webpage),
     82             'description': self._og_search_description(webpage),
     83             'timestamp': timestamp,
     84             'formats': formats,
     85         }