miomio.py (5068B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import random 5 6 from .common import InfoExtractor 7 from ..compat import compat_urlparse 8 from ..utils import ( 9 xpath_text, 10 int_or_none, 11 ExtractorError, 12 sanitized_Request, 13 ) 14 15 16 class MioMioIE(InfoExtractor): 17 IE_NAME = 'miomio.tv' 18 _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)' 19 _TESTS = [{ 20 # "type=video" in flashvars 21 'url': 'http://www.miomio.tv/watch/cc88912/', 22 'info_dict': { 23 'id': '88912', 24 'ext': 'flv', 25 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', 26 'duration': 5923, 27 }, 28 'skip': 'Unable to load videos', 29 }, { 30 'url': 'http://www.miomio.tv/watch/cc184024/', 31 'info_dict': { 32 'id': '43729', 33 'title': '《动漫同人插画绘制》', 34 }, 35 'playlist_mincount': 86, 36 'skip': 'Unable to load videos', 37 }, { 38 'url': 'http://www.miomio.tv/watch/cc173113/', 39 'info_dict': { 40 'id': '173113', 41 'title': 'The New Macbook 2015 上手试玩与简评' 42 }, 43 'playlist_mincount': 2, 44 'skip': 'Unable to load videos', 45 }, { 46 # new 'h5' player 47 'url': 'http://www.miomio.tv/watch/cc273997/', 48 'md5': '0b27a4b4495055d826813f8c3a6b2070', 49 'info_dict': { 50 'id': '273997', 51 'ext': 'mp4', 52 'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31', 53 }, 54 'skip': 'Unable to load videos', 55 }] 56 57 def _extract_mioplayer(self, webpage, video_id, title, http_headers): 58 xml_config = self._search_regex( 59 r'flashvars="type=(?:sina|video)&(.+?)&', 60 webpage, 'xml config') 61 62 # skipping the following page causes lags and eventually connection drop-outs 63 self._request_webpage( 64 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), 65 video_id) 66 67 vid_config_request = sanitized_Request( 68 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), 69 headers=http_headers) 70 71 # the following xml contains the actual configuration information on the video file(s) 72 vid_config = self._download_xml(vid_config_request, video_id) 73 74 if not int_or_none(xpath_text(vid_config, 'timelength')): 75 raise ExtractorError('Unable to load videos!', expected=True) 76 77 entries = [] 78 for f in vid_config.findall('./durl'): 79 segment_url = xpath_text(f, 'url', 'video url') 80 if not segment_url: 81 continue 82 order = xpath_text(f, 'order', 'order') 83 segment_id = video_id 84 segment_title = title 85 if order: 86 segment_id += '-%s' % order 87 segment_title += ' part %s' % order 88 entries.append({ 89 'id': segment_id, 90 'url': segment_url, 91 'title': segment_title, 92 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000), 93 'http_headers': http_headers, 94 }) 95 96 return entries 97 98 def _download_chinese_webpage(self, *args, **kwargs): 99 # Requests with English locales return garbage 100 headers = { 101 'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3', 102 } 103 kwargs.setdefault('headers', {}).update(headers) 104 return self._download_webpage(*args, **kwargs) 105 106 def _real_extract(self, url): 107 video_id = self._match_id(url) 108 webpage = self._download_chinese_webpage( 109 url, video_id) 110 111 title = self._html_search_meta( 112 'description', webpage, 'title', fatal=True) 113 114 mioplayer_path = self._search_regex( 115 r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path') 116 117 if '_h5' in mioplayer_path: 118 player_url = compat_urlparse.urljoin(url, mioplayer_path) 119 player_webpage = self._download_chinese_webpage( 120 player_url, video_id, 121 note='Downloading player webpage', headers={'Referer': url}) 122 entries = self._parse_html5_media_entries(player_url, player_webpage, video_id) 123 http_headers = {'Referer': player_url} 124 else: 125 http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} 126 entries = self._extract_mioplayer(webpage, video_id, title, http_headers) 127 128 if len(entries) == 1: 129 segment = entries[0] 130 segment['id'] = video_id 131 segment['title'] = title 132 segment['http_headers'] = http_headers 133 return segment 134 135 return { 136 '_type': 'multi_video', 137 'id': video_id, 138 'entries': entries, 139 'title': title, 140 'http_headers': http_headers, 141 }