youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

wat.py (4111B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..compat import compat_str
      6 from ..utils import (
      7     ExtractorError,
      8     int_or_none,
      9     try_get,
     10     unified_strdate,
     11 )
     12 
     13 
     14 class WatIE(InfoExtractor):
     15     _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
     16     IE_NAME = 'wat.tv'
     17     _TESTS = [
     18         {
     19             'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
     20             'info_dict': {
     21                 'id': '11713067',
     22                 'ext': 'mp4',
     23                 'title': 'Soupe de figues à l\'orange et aux épices',
     24                 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
     25                 'upload_date': '20140819',
     26                 'duration': 120,
     27             },
     28             'params': {
     29                 # m3u8 download
     30                 'skip_download': True,
     31             },
     32             'expected_warnings': ['HTTP Error 404'],
     33             'skip': 'This content is no longer available',
     34         },
     35         {
     36             'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
     37             'md5': 'b16574df2c3cd1a36ca0098f2a791925',
     38             'info_dict': {
     39                 'id': '11713075',
     40                 'ext': 'mp4',
     41                 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
     42                 'upload_date': '20140816',
     43             },
     44             'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."],
     45             'skip': 'This content is no longer available',
     46         },
     47     ]
     48     _GEO_BYPASS = False
     49 
     50     def _real_extract(self, url):
     51         video_id = self._match_id(url)
     52         video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
     53 
     54         # 'contentv4' is used in the website, but it also returns the related
     55         # videos, we don't need them
     56         # video_data = self._download_json(
     57         #     'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
     58         video_data = self._download_json(
     59             'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
     60             video_id, query={'context': 'MYTF1'})
     61         video_info = video_data['media']
     62 
     63         error_desc = video_info.get('error_desc')
     64         if error_desc:
     65             if video_info.get('error_code') == 'GEOBLOCKED':
     66                 self.raise_geo_restricted(error_desc, video_info.get('geoList'))
     67             raise ExtractorError(error_desc, expected=True)
     68 
     69         title = video_info['title']
     70 
     71         formats = []
     72 
     73         def extract_formats(manifest_urls):
     74             for f, f_url in manifest_urls.items():
     75                 if not f_url:
     76                     continue
     77                 if f in ('dash', 'mpd'):
     78                     formats.extend(self._extract_mpd_formats(
     79                         f_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
     80                         video_id, mpd_id='dash', fatal=False))
     81                 elif f == 'hls':
     82                     formats.extend(self._extract_m3u8_formats(
     83                         f_url, video_id, 'mp4',
     84                         'm3u8_native', m3u8_id='hls', fatal=False))
     85 
     86         delivery = video_data.get('delivery') or {}
     87         extract_formats({delivery.get('format'): delivery.get('url')})
     88         if not formats:
     89             if delivery.get('drm'):
     90                 raise ExtractorError('This video is DRM protected.', expected=True)
     91             manifest_urls = self._download_json(
     92                 'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False)
     93             if manifest_urls:
     94                 extract_formats(manifest_urls)
     95 
     96         self._sort_formats(formats)
     97 
     98         return {
     99             'id': video_id,
    100             'title': title,
    101             'thumbnail': video_info.get('preview'),
    102             'upload_date': unified_strdate(try_get(
    103                 video_data, lambda x: x['mediametrie']['chapters'][0]['estatS4'])),
    104             'duration': int_or_none(video_info.get('duration')),
    105             'formats': formats,
    106         }