youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

yapfiles.py (3212B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     ExtractorError,
      9     int_or_none,
     10     qualities,
     11     unescapeHTML,
     12     url_or_none,
     13 )
     14 
     15 
     16 class YapFilesIE(InfoExtractor):
     17     _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
     18     _VALID_URL = r'https?:%s' % _YAPFILES_URL
     19     _TESTS = [{
     20         # with hd
     21         'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
     22         'md5': '2db19e2bfa2450568868548a1aa1956c',
     23         'info_dict': {
     24             'id': 'vMDE1NjcyNDUt0413',
     25             'ext': 'mp4',
     26             'title': 'Самый худший пароль WIFI',
     27             'thumbnail': r're:^https?://.*\.jpg$',
     28             'duration': 72,
     29         },
     30     }, {
     31         # without hd
     32         'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
     33         'only_matching': True,
     34     }]
     35 
     36     @staticmethod
     37     def _extract_urls(webpage):
     38         return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
     39             r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
     40             % YapFilesIE._YAPFILES_URL, webpage)]
     41 
     42     def _real_extract(self, url):
     43         video_id = self._match_id(url)
     44 
     45         webpage = self._download_webpage(url, video_id, fatal=False)
     46 
     47         player_url = None
     48         query = {}
     49         if webpage:
     50             player_url = self._search_regex(
     51                 r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
     52                 'player url', default=None, group='url')
     53 
     54         if not player_url:
     55             player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
     56             query = {
     57                 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
     58                 'type': 'json',
     59                 'ref': url,
     60             }
     61 
     62         player = self._download_json(
     63             player_url, video_id, query=query)['player']
     64 
     65         playlist_url = player['playlist']
     66         title = player['title']
     67         thumbnail = player.get('poster')
     68 
     69         if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
     70             raise ExtractorError(
     71                 'Video %s has been removed' % video_id, expected=True)
     72 
     73         playlist = self._download_json(
     74             playlist_url, video_id)['player']['main']
     75 
     76         hd_height = int_or_none(player.get('hd'))
     77 
     78         QUALITIES = ('sd', 'hd')
     79         quality_key = qualities(QUALITIES)
     80         formats = []
     81         for format_id in QUALITIES:
     82             is_hd = format_id == 'hd'
     83             format_url = url_or_none(playlist.get(
     84                 'file%s' % ('_hd' if is_hd else '')))
     85             if not format_url:
     86                 continue
     87             formats.append({
     88                 'url': format_url,
     89                 'format_id': format_id,
     90                 'quality': quality_key(format_id),
     91                 'height': hd_height if is_hd else None,
     92             })
     93         self._sort_formats(formats)
     94 
     95         return {
     96             'id': video_id,
     97             'title': title,
     98             'thumbnail': thumbnail,
     99             'duration': int_or_none(player.get('length')),
    100             'formats': formats,
    101         }