youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

digiteka.py (3509B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import int_or_none
      8 
      9 
     10 class DigitekaIE(InfoExtractor):
     11     _VALID_URL = r'''(?x)
     12         https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
     13         (?:
     14             deliver/
     15             (?P<embed_type>
     16                 generic|
     17                 musique
     18             )
     19             (?:/[^/]+)*/
     20             (?:
     21                 src|
     22                 article
     23             )|
     24             default/index/video
     25             (?P<site_type>
     26                 generic|
     27                 music
     28             )
     29             /id
     30         )/(?P<id>[\d+a-z]+)'''
     31     _TESTS = [{
     32         # news
     33         'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
     34         'md5': '276a0e49de58c7e85d32b057837952a2',
     35         'info_dict': {
     36             'id': 's8uk0r',
     37             'ext': 'mp4',
     38             'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
     39             'thumbnail': r're:^https?://.*\.jpg',
     40             'duration': 74,
     41             'upload_date': '20150317',
     42             'timestamp': 1426604939,
     43             'uploader_id': '3fszv',
     44         },
     45     }, {
     46         # music
     47         'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
     48         'md5': '2ea3513813cf230605c7e2ffe7eca61c',
     49         'info_dict': {
     50             'id': 'xvpfp8',
     51             'ext': 'mp4',
     52             'title': 'Two - C\'est La Vie (clip)',
     53             'thumbnail': r're:^https?://.*\.jpg',
     54             'duration': 233,
     55             'upload_date': '20150224',
     56             'timestamp': 1424760500,
     57             'uploader_id': '3rfzk',
     58         },
     59     }, {
     60         'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
     61         'only_matching': True,
     62     }]
     63 
     64     @staticmethod
     65     def _extract_url(webpage):
     66         mobj = re.search(
     67             r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
     68             webpage)
     69         if mobj:
     70             return mobj.group('url')
     71 
     72     def _real_extract(self, url):
     73         mobj = re.match(self._VALID_URL, url)
     74         video_id = mobj.group('id')
     75         video_type = mobj.group('embed_type') or mobj.group('site_type')
     76         if video_type == 'music':
     77             video_type = 'musique'
     78 
     79         deliver_info = self._download_json(
     80             'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
     81             video_id)
     82 
     83         yt_id = deliver_info.get('yt_id')
     84         if yt_id:
     85             return self.url_result(yt_id, 'Youtube')
     86 
     87         jwconf = deliver_info['jwconf']
     88 
     89         formats = []
     90         for source in jwconf['playlist'][0]['sources']:
     91             formats.append({
     92                 'url': source['file'],
     93                 'format_id': source.get('label'),
     94             })
     95 
     96         self._sort_formats(formats)
     97 
     98         title = deliver_info['title']
     99         thumbnail = jwconf.get('image')
    100         duration = int_or_none(deliver_info.get('duration'))
    101         timestamp = int_or_none(deliver_info.get('release_time'))
    102         uploader_id = deliver_info.get('owner_id')
    103 
    104         return {
    105             'id': video_id,
    106             'title': title,
    107             'thumbnail': thumbnail,
    108             'duration': duration,
    109             'timestamp': timestamp,
    110             'uploader_id': uploader_id,
    111             'formats': formats,
    112         }