konserthusetplay.py (4505B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 determine_ext, 7 float_or_none, 8 int_or_none, 9 url_or_none, 10 ) 11 12 13 class KonserthusetPlayIE(InfoExtractor): 14 _VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)' 15 _TESTS = [{ 16 'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A', 17 'md5': 'e3fd47bf44e864bd23c08e487abe1967', 18 'info_dict': { 19 'id': 'CKDDnlCY-dhWAAqiMERd-A', 20 'ext': 'mp4', 21 'title': 'Orkesterns instrument: Valthornen', 22 'description': 'md5:f10e1f0030202020396a4d712d2fa827', 23 'thumbnail': 're:^https?://.*$', 24 'duration': 398.76, 25 }, 26 }, { 27 'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw', 28 'only_matching': True, 29 }] 30 31 def _real_extract(self, url): 32 video_id = self._match_id(url) 33 34 webpage = self._download_webpage(url, video_id) 35 36 e = self._search_regex( 37 r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e') 38 39 rest = self._download_json( 40 'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e, 41 video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) 42 43 media = rest['media'] 44 player_config = media['playerconfig'] 45 playlist = player_config['playlist'] 46 47 source = next(f for f in playlist if f.get('bitrates') or f.get('provider')) 48 49 FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4' 50 51 formats = [] 52 53 m3u8_url = source.get('url') 54 if m3u8_url and determine_ext(m3u8_url) == 'm3u8': 55 formats.extend(self._extract_m3u8_formats( 56 m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', 57 m3u8_id='hls', fatal=False)) 58 59 fallback_url = source.get('fallbackUrl') 60 fallback_format_id = None 61 if fallback_url: 62 fallback_format_id = self._search_regex( 63 FORMAT_ID_REGEX, fallback_url, 'format id', default=None) 64 65 connection_url = (player_config.get('rtmp', {}).get( 66 'netConnectionUrl') or player_config.get( 67 'plugins', {}).get('bwcheck', {}).get('netConnectionUrl')) 68 if connection_url: 69 for f in source['bitrates']: 70 video_url = f.get('url') 71 if not video_url: 72 continue 73 format_id = self._search_regex( 74 FORMAT_ID_REGEX, video_url, 'format id', default=None) 75 f_common = { 76 'vbr': int_or_none(f.get('bitrate')), 77 'width': int_or_none(f.get('width')), 78 'height': int_or_none(f.get('height')), 79 } 80 f = f_common.copy() 81 f.update({ 82 'url': connection_url, 83 'play_path': video_url, 84 'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp', 85 'ext': 'flv', 86 }) 87 formats.append(f) 88 if format_id and format_id == fallback_format_id: 89 f = f_common.copy() 90 f.update({ 91 'url': fallback_url, 92 'format_id': 'http-%s' % format_id if format_id else 'http', 93 }) 94 formats.append(f) 95 96 if not formats and fallback_url: 97 formats.append({ 98 'url': fallback_url, 99 }) 100 101 self._sort_formats(formats) 102 103 title = player_config.get('title') or media['title'] 104 description = player_config.get('mediaInfo', {}).get('description') 105 thumbnail = media.get('image') 106 duration = float_or_none(media.get('duration'), 1000) 107 108 subtitles = {} 109 captions = source.get('captionsAvailableLanguages') 110 if isinstance(captions, dict): 111 for lang, subtitle_url in captions.items(): 112 subtitle_url = url_or_none(subtitle_url) 113 if lang != 'none' and subtitle_url: 114 subtitles.setdefault(lang, []).append({'url': subtitle_url}) 115 116 return { 117 'id': video_id, 118 'title': title, 119 'description': description, 120 'thumbnail': thumbnail, 121 'duration': duration, 122 'formats': formats, 123 'subtitles': subtitles, 124 }