movingimage.py (1774B)
1 from __future__ import unicode_literals 2 3 from .common import InfoExtractor 4 from ..utils import ( 5 unescapeHTML, 6 parse_duration, 7 ) 8 9 10 class MovingImageIE(InfoExtractor): 11 _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)' 12 _TEST = { 13 'url': 'http://movingimage.nls.uk/film/3561', 14 'md5': '4caa05c2b38453e6f862197571a7be2f', 15 'info_dict': { 16 'id': '3561', 17 'ext': 'mp4', 18 'title': 'SHETLAND WOOL', 19 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04', 20 'duration': 900, 21 'thumbnail': r're:^https?://.*\.jpg$', 22 }, 23 } 24 25 def _real_extract(self, url): 26 video_id = self._match_id(url) 27 28 webpage = self._download_webpage(url, video_id) 29 30 formats = self._extract_m3u8_formats( 31 self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'), 32 video_id, ext='mp4', entry_protocol='m3u8_native') 33 34 def search_field(field_name, fatal=False): 35 return self._search_regex( 36 r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name, 37 webpage, 'title', fatal=fatal) 38 39 title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]') 40 description = unescapeHTML(search_field('Description')) 41 duration = parse_duration(search_field('Running time')) 42 thumbnail = self._search_regex( 43 r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) 44 45 return { 46 'id': video_id, 47 'formats': formats, 48 'title': title, 49 'description': description, 50 'duration': duration, 51 'thumbnail': thumbnail, 52 }