jeuxvideo.py (2041B)
1 # coding: utf-8 2 3 from __future__ import unicode_literals 4 5 import re 6 7 from .common import InfoExtractor 8 9 10 class JeuxVideoIE(InfoExtractor): 11 _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' 12 13 _TESTS = [{ 14 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', 15 'md5': '046e491afb32a8aaac1f44dd4ddd54ee', 16 'info_dict': { 17 'id': '114765', 18 'ext': 'mp4', 19 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité', 20 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.', 21 }, 22 }, { 23 'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm', 24 'only_matching': True, 25 }] 26 27 def _real_extract(self, url): 28 mobj = re.match(self._VALID_URL, url) 29 title = mobj.group(1) 30 webpage = self._download_webpage(url, title) 31 title = self._html_search_meta('name', webpage) or self._og_search_title(webpage) 32 config_url = self._html_search_regex( 33 r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"', 34 webpage, 'config URL') 35 config_url = 'http://www.jeuxvideo.com' + config_url 36 37 video_id = self._search_regex( 38 r'id=(\d+)', 39 config_url, 'video ID') 40 41 config = self._download_json( 42 config_url, title, 'Downloading JSON config') 43 44 formats = [{ 45 'url': source['file'], 46 'format_id': source['label'], 47 'resolution': source['label'], 48 } for source in reversed(config['sources'])] 49 50 return { 51 'id': video_id, 52 'title': title, 53 'formats': formats, 54 'description': self._og_search_description(webpage), 55 'thumbnail': config.get('image'), 56 }