youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

allocine.py (4962B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..compat import compat_str
      6 from ..utils import (
      7     int_or_none,
      8     qualities,
      9     remove_end,
     10     try_get,
     11     unified_timestamp,
     12     url_basename,
     13 )
     14 
     15 
     16 class AllocineIE(InfoExtractor):
     17     _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
     18 
     19     _TESTS = [{
     20         'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
     21         'md5': '0c9fcf59a841f65635fa300ac43d8269',
     22         'info_dict': {
     23             'id': '19546517',
     24             'display_id': '18635087',
     25             'ext': 'mp4',
     26             'title': 'Astérix - Le Domaine des Dieux Teaser VF',
     27             'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
     28             'thumbnail': r're:http://.*\.jpg',
     29             'duration': 39,
     30             'timestamp': 1404273600,
     31             'upload_date': '20140702',
     32             'view_count': int,
     33         },
     34     }, {
     35         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
     36         'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
     37         'info_dict': {
     38             'id': '19540403',
     39             'display_id': '19540403',
     40             'ext': 'mp4',
     41             'title': 'Planes 2 Bande-annonce VF',
     42             'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
     43             'thumbnail': r're:http://.*\.jpg',
     44             'duration': 69,
     45             'timestamp': 1385659800,
     46             'upload_date': '20131128',
     47             'view_count': int,
     48         },
     49     }, {
     50         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
     51         'md5': '101250fb127ef9ca3d73186ff22a47ce',
     52         'info_dict': {
     53             'id': '19544709',
     54             'display_id': '19544709',
     55             'ext': 'mp4',
     56             'title': 'Dragons 2 - Bande annonce finale VF',
     57             'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
     58             'thumbnail': r're:http://.*\.jpg',
     59             'duration': 144,
     60             'timestamp': 1397589900,
     61             'upload_date': '20140415',
     62             'view_count': int,
     63         },
     64     }, {
     65         'url': 'http://www.allocine.fr/video/video-19550147/',
     66         'md5': '3566c0668c0235e2d224fd8edb389f67',
     67         'info_dict': {
     68             'id': '19550147',
     69             'ext': 'mp4',
     70             'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
     71             'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
     72             'thumbnail': r're:http://.*\.jpg',
     73         },
     74     }]
     75 
     76     def _real_extract(self, url):
     77         display_id = self._match_id(url)
     78 
     79         webpage = self._download_webpage(url, display_id)
     80 
     81         formats = []
     82         quality = qualities(['ld', 'md', 'hd'])
     83 
     84         model = self._html_search_regex(
     85             r'data-model="([^"]+)"', webpage, 'data model', default=None)
     86         if model:
     87             model_data = self._parse_json(model, display_id)
     88             video = model_data['videos'][0]
     89             title = video['title']
     90             for video_url in video['sources'].values():
     91                 video_id, format_id = url_basename(video_url).split('_')[:2]
     92                 formats.append({
     93                     'format_id': format_id,
     94                     'quality': quality(format_id),
     95                     'url': video_url,
     96                 })
     97             duration = int_or_none(video.get('duration'))
     98             view_count = int_or_none(video.get('view_count'))
     99             timestamp = unified_timestamp(try_get(
    100                 video, lambda x: x['added_at']['date'], compat_str))
    101         else:
    102             video_id = display_id
    103             media_data = self._download_json(
    104                 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
    105             title = remove_end(
    106                 self._html_search_regex(
    107                     r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
    108                 ' - AlloCiné')
    109             for key, value in media_data['video'].items():
    110                 if not key.endswith('Path'):
    111                     continue
    112                 format_id = key[:-len('Path')]
    113                 formats.append({
    114                     'format_id': format_id,
    115                     'quality': quality(format_id),
    116                     'url': value,
    117                 })
    118             duration, view_count, timestamp = [None] * 3
    119 
    120         self._sort_formats(formats)
    121 
    122         return {
    123             'id': video_id,
    124             'display_id': display_id,
    125             'title': title,
    126             'description': self._og_search_description(webpage),
    127             'thumbnail': self._og_search_thumbnail(webpage),
    128             'duration': duration,
    129             'timestamp': timestamp,
    130             'view_count': view_count,
    131             'formats': formats,
    132         }