youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

miomio.py (5068B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import random
      5 
      6 from .common import InfoExtractor
      7 from ..compat import compat_urlparse
      8 from ..utils import (
      9     xpath_text,
     10     int_or_none,
     11     ExtractorError,
     12     sanitized_Request,
     13 )
     14 
     15 
     16 class MioMioIE(InfoExtractor):
     17     IE_NAME = 'miomio.tv'
     18     _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
     19     _TESTS = [{
     20         # "type=video" in flashvars
     21         'url': 'http://www.miomio.tv/watch/cc88912/',
     22         'info_dict': {
     23             'id': '88912',
     24             'ext': 'flv',
     25             'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
     26             'duration': 5923,
     27         },
     28         'skip': 'Unable to load videos',
     29     }, {
     30         'url': 'http://www.miomio.tv/watch/cc184024/',
     31         'info_dict': {
     32             'id': '43729',
     33             'title': '《动漫同人插画绘制》',
     34         },
     35         'playlist_mincount': 86,
     36         'skip': 'Unable to load videos',
     37     }, {
     38         'url': 'http://www.miomio.tv/watch/cc173113/',
     39         'info_dict': {
     40             'id': '173113',
     41             'title': 'The New Macbook 2015 上手试玩与简评'
     42         },
     43         'playlist_mincount': 2,
     44         'skip': 'Unable to load videos',
     45     }, {
     46         # new 'h5' player
     47         'url': 'http://www.miomio.tv/watch/cc273997/',
     48         'md5': '0b27a4b4495055d826813f8c3a6b2070',
     49         'info_dict': {
     50             'id': '273997',
     51             'ext': 'mp4',
     52             'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31',
     53         },
     54         'skip': 'Unable to load videos',
     55     }]
     56 
     57     def _extract_mioplayer(self, webpage, video_id, title, http_headers):
     58         xml_config = self._search_regex(
     59             r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;',
     60             webpage, 'xml config')
     61 
     62         # skipping the following page causes lags and eventually connection drop-outs
     63         self._request_webpage(
     64             'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
     65             video_id)
     66 
     67         vid_config_request = sanitized_Request(
     68             'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
     69             headers=http_headers)
     70 
     71         # the following xml contains the actual configuration information on the video file(s)
     72         vid_config = self._download_xml(vid_config_request, video_id)
     73 
     74         if not int_or_none(xpath_text(vid_config, 'timelength')):
     75             raise ExtractorError('Unable to load videos!', expected=True)
     76 
     77         entries = []
     78         for f in vid_config.findall('./durl'):
     79             segment_url = xpath_text(f, 'url', 'video url')
     80             if not segment_url:
     81                 continue
     82             order = xpath_text(f, 'order', 'order')
     83             segment_id = video_id
     84             segment_title = title
     85             if order:
     86                 segment_id += '-%s' % order
     87                 segment_title += ' part %s' % order
     88             entries.append({
     89                 'id': segment_id,
     90                 'url': segment_url,
     91                 'title': segment_title,
     92                 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
     93                 'http_headers': http_headers,
     94             })
     95 
     96         return entries
     97 
     98     def _download_chinese_webpage(self, *args, **kwargs):
     99         # Requests with English locales return garbage
    100         headers = {
    101             'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3',
    102         }
    103         kwargs.setdefault('headers', {}).update(headers)
    104         return self._download_webpage(*args, **kwargs)
    105 
    106     def _real_extract(self, url):
    107         video_id = self._match_id(url)
    108         webpage = self._download_chinese_webpage(
    109             url, video_id)
    110 
    111         title = self._html_search_meta(
    112             'description', webpage, 'title', fatal=True)
    113 
    114         mioplayer_path = self._search_regex(
    115             r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path')
    116 
    117         if '_h5' in mioplayer_path:
    118             player_url = compat_urlparse.urljoin(url, mioplayer_path)
    119             player_webpage = self._download_chinese_webpage(
    120                 player_url, video_id,
    121                 note='Downloading player webpage', headers={'Referer': url})
    122             entries = self._parse_html5_media_entries(player_url, player_webpage, video_id)
    123             http_headers = {'Referer': player_url}
    124         else:
    125             http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
    126             entries = self._extract_mioplayer(webpage, video_id, title, http_headers)
    127 
    128         if len(entries) == 1:
    129             segment = entries[0]
    130             segment['id'] = video_id
    131             segment['title'] = title
    132             segment['http_headers'] = http_headers
    133             return segment
    134 
    135         return {
    136             '_type': 'multi_video',
    137             'id': video_id,
    138             'entries': entries,
    139             'title': title,
    140             'http_headers': http_headers,
    141         }