discoveryvr.py (2129B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import parse_duration 6 7 8 class DiscoveryVRIE(InfoExtractor): 9 _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)' 10 _TEST = { 11 'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction', 12 'md5': '32b1929798c464a54356378b7912eca4', 13 'info_dict': { 14 'id': 'discovery-vr-an-introduction', 15 'ext': 'mp4', 16 'title': 'Discovery VR - An Introduction', 17 'description': 'md5:80d418a10efb8899d9403e61d8790f06', 18 } 19 } 20 21 def _real_extract(self, url): 22 display_id = self._match_id(url) 23 webpage = self._download_webpage(url, display_id) 24 25 bootstrap_data = self._search_regex( 26 r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";', 27 webpage, 'bootstrap data') 28 bootstrap_data = self._parse_json( 29 bootstrap_data.encode('utf-8').decode('unicode_escape'), 30 display_id) 31 videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos'] 32 video_data = next(video for video in videos if video.get('slug') == display_id) 33 34 series = video_data.get('showTitle') 35 title = episode = video_data.get('title') or series 36 if series and series != title: 37 title = '%s - %s' % (series, title) 38 39 formats = [] 40 for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')): 41 f_url = video_data.get(f) 42 if not f_url: 43 continue 44 formats.append({ 45 'format_id': format_id, 46 'url': f_url, 47 }) 48 49 return { 50 'id': display_id, 51 'display_id': display_id, 52 'title': title, 53 'description': video_data.get('description'), 54 'thumbnail': video_data.get('thumbnail'), 55 'duration': parse_duration(video_data.get('runTime')), 56 'formats': formats, 57 'episode': episode, 58 'series': series, 59 }