nobelprize.py (2123B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 js_to_json, 7 mimetype2ext, 8 determine_ext, 9 update_url_query, 10 get_element_by_attribute, 11 int_or_none, 12 ) 13 14 15 class NobelPrizeIE(InfoExtractor): 16 _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)' 17 _TEST = { 18 'url': 'http://www.nobelprize.org/mediaplayer/?id=2636', 19 'md5': '04c81e5714bb36cc4e2232fee1d8157f', 20 'info_dict': { 21 'id': '2636', 22 'ext': 'mp4', 23 'title': 'Announcement of the 2016 Nobel Prize in Physics', 24 'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739', 25 } 26 } 27 28 def _real_extract(self, url): 29 video_id = self._match_id(url) 30 webpage = self._download_webpage(url, video_id) 31 media = self._parse_json(self._search_regex( 32 r'(?s)var\s*config\s*=\s*({.+?});', webpage, 33 'config'), video_id, js_to_json)['media'] 34 title = media['title'] 35 36 formats = [] 37 for source in media.get('source', []): 38 source_src = source.get('src') 39 if not source_src: 40 continue 41 ext = mimetype2ext(source.get('type')) or determine_ext(source_src) 42 if ext == 'm3u8': 43 formats.extend(self._extract_m3u8_formats( 44 source_src, video_id, 'mp4', 'm3u8_native', 45 m3u8_id='hls', fatal=False)) 46 elif ext == 'f4m': 47 formats.extend(self._extract_f4m_formats( 48 update_url_query(source_src, {'hdcore': '3.7.0'}), 49 video_id, f4m_id='hds', fatal=False)) 50 else: 51 formats.append({ 52 'url': source_src, 53 }) 54 self._sort_formats(formats) 55 56 return { 57 'id': video_id, 58 'title': title, 59 'description': get_element_by_attribute('itemprop', 'description', webpage), 60 'duration': int_or_none(media.get('duration')), 61 'formats': formats, 62 }