allocine.py (4962B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..compat import compat_str 6 from ..utils import ( 7 int_or_none, 8 qualities, 9 remove_end, 10 try_get, 11 unified_timestamp, 12 url_basename, 13 ) 14 15 16 class AllocineIE(InfoExtractor): 17 _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?' 18 19 _TESTS = [{ 20 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', 21 'md5': '0c9fcf59a841f65635fa300ac43d8269', 22 'info_dict': { 23 'id': '19546517', 24 'display_id': '18635087', 25 'ext': 'mp4', 26 'title': 'Astérix - Le Domaine des Dieux Teaser VF', 27 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 28 'thumbnail': r're:http://.*\.jpg', 29 'duration': 39, 30 'timestamp': 1404273600, 31 'upload_date': '20140702', 32 'view_count': int, 33 }, 34 }, { 35 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', 36 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', 37 'info_dict': { 38 'id': '19540403', 39 'display_id': '19540403', 40 'ext': 'mp4', 41 'title': 'Planes 2 Bande-annonce VF', 42 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 43 'thumbnail': r're:http://.*\.jpg', 44 'duration': 69, 45 'timestamp': 1385659800, 46 'upload_date': '20131128', 47 'view_count': int, 48 }, 49 }, { 50 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', 51 'md5': '101250fb127ef9ca3d73186ff22a47ce', 52 'info_dict': { 53 'id': '19544709', 54 'display_id': '19544709', 55 'ext': 'mp4', 56 'title': 'Dragons 2 - Bande annonce finale VF', 57 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 58 'thumbnail': r're:http://.*\.jpg', 59 'duration': 144, 60 'timestamp': 1397589900, 61 'upload_date': '20140415', 62 'view_count': int, 63 }, 64 }, { 65 'url': 'http://www.allocine.fr/video/video-19550147/', 66 'md5': '3566c0668c0235e2d224fd8edb389f67', 67 'info_dict': { 68 'id': '19550147', 69 'ext': 'mp4', 70 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', 71 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', 72 'thumbnail': r're:http://.*\.jpg', 73 }, 74 }] 75 76 def _real_extract(self, url): 77 display_id = self._match_id(url) 78 79 webpage = self._download_webpage(url, display_id) 80 81 formats = [] 82 quality = qualities(['ld', 'md', 'hd']) 83 84 model = self._html_search_regex( 85 r'data-model="([^"]+)"', webpage, 'data model', default=None) 86 if model: 87 model_data = self._parse_json(model, display_id) 88 video = model_data['videos'][0] 89 title = video['title'] 90 for video_url in video['sources'].values(): 91 video_id, format_id = url_basename(video_url).split('_')[:2] 92 formats.append({ 93 'format_id': format_id, 94 'quality': quality(format_id), 95 'url': video_url, 96 }) 97 duration = int_or_none(video.get('duration')) 98 view_count = int_or_none(video.get('view_count')) 99 timestamp = unified_timestamp(try_get( 100 video, lambda x: x['added_at']['date'], compat_str)) 101 else: 102 video_id = display_id 103 media_data = self._download_json( 104 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) 105 title = remove_end( 106 self._html_search_regex( 107 r'(?s)<title>(.+?)</title>', webpage, 'title').strip(), 108 ' - AlloCiné') 109 for key, value in media_data['video'].items(): 110 if not key.endswith('Path'): 111 continue 112 format_id = key[:-len('Path')] 113 formats.append({ 114 'format_id': format_id, 115 'quality': quality(format_id), 116 'url': value, 117 }) 118 duration, view_count, timestamp = [None] * 3 119 120 self._sort_formats(formats) 121 122 return { 123 'id': video_id, 124 'display_id': display_id, 125 'title': title, 126 'description': self._og_search_description(webpage), 127 'thumbnail': self._og_search_thumbnail(webpage), 128 'duration': duration, 129 'timestamp': timestamp, 130 'view_count': view_count, 131 'formats': formats, 132 }