telemb.py (2966B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import remove_start 8 9 10 class TeleMBIE(InfoExtractor): 11 _VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P<display_id>.+?)_d_(?P<id>\d+)\.html' 12 _TESTS = [ 13 { 14 'url': 'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html', 15 'md5': 'f45ea69878516ba039835794e0f8f783', 16 'info_dict': { 17 'id': '13466', 18 'display_id': 'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-', 19 'ext': 'mp4', 20 'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages', 21 'description': 'md5:bc5225f47b17c309761c856ad4776265', 22 'thumbnail': r're:^http://.*\.(?:jpg|png)$', 23 } 24 }, 25 { 26 # non-ASCII characters in download URL 27 'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html', 28 'md5': '6e9682736e5ccd4eab7f21e855350733', 29 'info_dict': { 30 'id': '13514', 31 'display_id': 'les-reportages-havre-incendie-mortel', 32 'ext': 'mp4', 33 'title': 'Havré - Incendie mortel - Les reportages', 34 'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a', 35 'thumbnail': r're:^http://.*\.(?:jpg|png)$', 36 } 37 }, 38 ] 39 40 def _real_extract(self, url): 41 mobj = re.match(self._VALID_URL, url) 42 video_id = mobj.group('id') 43 display_id = mobj.group('display_id') 44 45 webpage = self._download_webpage(url, display_id) 46 47 formats = [] 48 for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage): 49 fmt = { 50 'url': video_url, 51 'format_id': video_url.split(':')[0] 52 } 53 rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) 54 if rtmp: 55 fmt.update({ 56 'play_path': rtmp.group('playpath'), 57 'app': rtmp.group('app'), 58 'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf', 59 'page_url': 'http://www.telemb.be', 60 'preference': -1, 61 }) 62 formats.append(fmt) 63 self._sort_formats(formats) 64 65 title = remove_start(self._og_search_title(webpage), 'TéléMB : ') 66 description = self._html_search_regex( 67 r'<meta property="og:description" content="(.+?)" />', 68 webpage, 'description', fatal=False) 69 thumbnail = self._og_search_thumbnail(webpage) 70 71 return { 72 'id': video_id, 73 'display_id': display_id, 74 'title': title, 75 'description': description, 76 'thumbnail': thumbnail, 77 'formats': formats, 78 }