ruutu.py (9346B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..compat import compat_urllib_parse_urlparse 6 from ..utils import ( 7 determine_ext, 8 ExtractorError, 9 find_xpath_attr, 10 int_or_none, 11 unified_strdate, 12 url_or_none, 13 xpath_attr, 14 xpath_text, 15 ) 16 17 18 class RuutuIE(InfoExtractor): 19 _VALID_URL = r'''(?x) 20 https?:// 21 (?: 22 (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/| 23 static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid= 24 ) 25 (?P<id>\d+) 26 ''' 27 _TESTS = [ 28 { 29 'url': 'http://www.ruutu.fi/video/2058907', 30 'md5': 'ab2093f39be1ca8581963451b3c0234f', 31 'info_dict': { 32 'id': '2058907', 33 'ext': 'mp4', 34 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', 35 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', 36 'thumbnail': r're:^https?://.*\.jpg$', 37 'duration': 114, 38 'age_limit': 0, 39 }, 40 }, 41 { 42 'url': 'http://www.ruutu.fi/video/2057306', 43 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', 44 'info_dict': { 45 'id': '2057306', 46 'ext': 'mp4', 47 'title': 'Superpesis: katso koko kausi Ruudussa', 48 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', 49 'thumbnail': r're:^https?://.*\.jpg$', 50 'duration': 40, 51 'age_limit': 0, 52 }, 53 }, 54 { 55 'url': 'http://www.supla.fi/supla/2231370', 56 'md5': 'df14e782d49a2c0df03d3be2a54ef949', 57 'info_dict': { 58 'id': '2231370', 59 'ext': 'mp4', 60 'title': 'Osa 1: Mikael Jungner', 61 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', 62 'thumbnail': r're:^https?://.*\.jpg$', 63 'age_limit': 0, 64 }, 65 }, 66 # Episode where <SourceFile> is "NOT-USED", but has other 67 # downloadable sources available. 68 { 69 'url': 'http://www.ruutu.fi/video/3193728', 70 'only_matching': True, 71 }, 72 { 73 # audio podcast 74 'url': 'https://www.supla.fi/supla/3382410', 75 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', 76 'info_dict': { 77 'id': '3382410', 78 'ext': 'mp3', 79 'title': 'Mikä ihmeen poltergeist?', 80 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', 81 'thumbnail': r're:^https?://.*\.jpg$', 82 'age_limit': 0, 83 }, 84 'expected_warnings': [ 85 'HTTP Error 502: Bad Gateway', 86 'Failed to download m3u8 information', 87 ], 88 }, 89 { 90 'url': 'http://www.supla.fi/audio/2231370', 91 'only_matching': True, 92 }, 93 { 94 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790', 95 'only_matching': True, 96 }, 97 { 98 # episode 99 'url': 'https://www.ruutu.fi/video/3401964', 100 'info_dict': { 101 'id': '3401964', 102 'ext': 'mp4', 103 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17', 104 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba', 105 'thumbnail': r're:^https?://.*\.jpg$', 106 'duration': 2582, 107 'age_limit': 12, 108 'upload_date': '20190508', 109 'series': 'Temptation Island Suomi', 110 'season_number': 5, 111 'episode_number': 17, 112 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'], 113 }, 114 'params': { 115 'skip_download': True, 116 }, 117 }, 118 { 119 # premium 120 'url': 'https://www.ruutu.fi/video/3618715', 121 'only_matching': True, 122 }, 123 ] 124 _API_BASE = 'https://gatling.nelonenmedia.fi' 125 126 def _real_extract(self, url): 127 video_id = self._match_id(url) 128 129 video_xml = self._download_xml( 130 '%s/media-xml-cache' % self._API_BASE, video_id, 131 query={'id': video_id}) 132 133 formats = [] 134 processed_urls = [] 135 136 def extract_formats(node): 137 for child in node: 138 if child.tag.endswith('Files'): 139 extract_formats(child) 140 elif child.tag.endswith('File'): 141 video_url = child.text 142 if (not video_url or video_url in processed_urls 143 or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): 144 continue 145 processed_urls.append(video_url) 146 ext = determine_ext(video_url) 147 auth_video_url = url_or_none(self._download_webpage( 148 '%s/auth/access/v2' % self._API_BASE, video_id, 149 note='Downloading authenticated %s stream URL' % ext, 150 fatal=False, query={'stream': video_url})) 151 if auth_video_url: 152 processed_urls.append(auth_video_url) 153 video_url = auth_video_url 154 if ext == 'm3u8': 155 formats.extend(self._extract_m3u8_formats( 156 video_url, video_id, 'mp4', 157 entry_protocol='m3u8_native', m3u8_id='hls', 158 fatal=False)) 159 elif ext == 'f4m': 160 formats.extend(self._extract_f4m_formats( 161 video_url, video_id, f4m_id='hds', fatal=False)) 162 elif ext == 'mpd': 163 # video-only and audio-only streams are of different 164 # duration resulting in out of sync issue 165 continue 166 formats.extend(self._extract_mpd_formats( 167 video_url, video_id, mpd_id='dash', fatal=False)) 168 elif ext == 'mp3' or child.tag == 'AudioMediaFile': 169 formats.append({ 170 'format_id': 'audio', 171 'url': video_url, 172 'vcodec': 'none', 173 }) 174 else: 175 proto = compat_urllib_parse_urlparse(video_url).scheme 176 if not child.tag.startswith('HTTP') and proto != 'rtmp': 177 continue 178 preference = -1 if proto == 'rtmp' else 1 179 label = child.get('label') 180 tbr = int_or_none(child.get('bitrate')) 181 format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto 182 if not self._is_valid_url(video_url, video_id, format_id): 183 continue 184 width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] 185 formats.append({ 186 'format_id': format_id, 187 'url': video_url, 188 'width': width, 189 'height': height, 190 'tbr': tbr, 191 'preference': preference, 192 }) 193 194 extract_formats(video_xml.find('./Clip')) 195 196 def pv(name): 197 node = find_xpath_attr( 198 video_xml, './Clip/PassthroughVariables/variable', 'name', name) 199 if node is not None: 200 return node.get('value') 201 202 if not formats: 203 drm = xpath_text(video_xml, './Clip/DRM', default=None) 204 if drm: 205 raise ExtractorError('This video is DRM protected.', expected=True) 206 ns_st_cds = pv('ns_st_cds') 207 if ns_st_cds != 'free': 208 raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) 209 210 self._sort_formats(formats) 211 212 themes = pv('themes') 213 214 return { 215 'id': video_id, 216 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), 217 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), 218 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), 219 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')), 220 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), 221 'upload_date': unified_strdate(pv('date_start')), 222 'series': pv('series_name'), 223 'season_number': int_or_none(pv('season_number')), 224 'episode_number': int_or_none(pv('episode_number')), 225 'categories': themes.split(',') if themes else [], 226 'formats': formats, 227 }