pladform.py (4244B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_urlparse 8 from ..utils import ( 9 determine_ext, 10 ExtractorError, 11 int_or_none, 12 xpath_text, 13 qualities, 14 ) 15 16 17 class PladformIE(InfoExtractor): 18 _VALID_URL = r'''(?x) 19 https?:// 20 (?: 21 (?: 22 out\.pladform\.ru/player| 23 static\.pladform\.ru/player\.swf 24 ) 25 \?.*\bvideoid=| 26 video\.pladform\.ru/catalog/video/videoid/ 27 ) 28 (?P<id>\d+) 29 ''' 30 _TESTS = [{ 31 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0', 32 'md5': '53362fac3a27352da20fa2803cc5cd6f', 33 'info_dict': { 34 'id': '3777899', 35 'ext': 'mp4', 36 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко', 37 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95', 38 'thumbnail': r're:^https?://.*\.jpg$', 39 'duration': 3190, 40 }, 41 }, { 42 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0', 43 'only_matching': True, 44 }, { 45 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0', 46 'only_matching': True, 47 }] 48 49 @staticmethod 50 def _extract_url(webpage): 51 mobj = re.search( 52 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage) 53 if mobj: 54 return mobj.group('url') 55 56 def _real_extract(self, url): 57 video_id = self._match_id(url) 58 59 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) 60 pl = qs.get('pl', ['1'])[0] 61 62 video = self._download_xml( 63 'http://out.pladform.ru/getVideo', video_id, query={ 64 'pl': pl, 65 'videoid': video_id, 66 }) 67 68 def fail(text): 69 raise ExtractorError( 70 '%s returned error: %s' % (self.IE_NAME, text), 71 expected=True) 72 73 if video.tag == 'error': 74 fail(video.text) 75 76 quality = qualities(('ld', 'sd', 'hd')) 77 78 formats = [] 79 for src in video.findall('./src'): 80 if src is None: 81 continue 82 format_url = src.text 83 if not format_url: 84 continue 85 if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8': 86 formats.extend(self._extract_m3u8_formats( 87 format_url, video_id, 'mp4', entry_protocol='m3u8_native', 88 m3u8_id='hls', fatal=False)) 89 else: 90 formats.append({ 91 'url': src.text, 92 'format_id': src.get('quality'), 93 'quality': quality(src.get('quality')), 94 }) 95 96 if not formats: 97 error = xpath_text(video, './cap', 'error', default=None) 98 if error: 99 fail(error) 100 101 self._sort_formats(formats) 102 103 webpage = self._download_webpage( 104 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, 105 video_id) 106 107 title = self._og_search_title(webpage, fatal=False) or xpath_text( 108 video, './/title', 'title', fatal=True) 109 description = self._search_regex( 110 r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False) 111 thumbnail = self._og_search_thumbnail(webpage) or xpath_text( 112 video, './/cover', 'cover') 113 114 duration = int_or_none(xpath_text(video, './/time', 'duration')) 115 age_limit = int_or_none(xpath_text(video, './/age18', 'age limit')) 116 117 return { 118 'id': video_id, 119 'title': title, 120 'description': description, 121 'thumbnail': thumbnail, 122 'duration': duration, 123 'age_limit': age_limit, 124 'formats': formats, 125 }