youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

keezmovies.py (4716B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..aes import aes_decrypt_text
      7 from ..compat import compat_urllib_parse_unquote
      8 from ..utils import (
      9     determine_ext,
     10     ExtractorError,
     11     int_or_none,
     12     str_to_int,
     13     strip_or_none,
     14     url_or_none,
     15 )
     16 
     17 
     18 class KeezMoviesIE(InfoExtractor):
     19     _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
     20     _TESTS = [{
     21         'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681',
     22         'md5': '2ac69cdb882055f71d82db4311732a1a',
     23         'info_dict': {
     24             'id': '18070681',
     25             'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money',
     26             'ext': 'mp4',
     27             'title': 'Arab wife want it so bad I see she thirsty and has tiny money.',
     28             'thumbnail': None,
     29             'view_count': int,
     30             'age_limit': 18,
     31         }
     32     }, {
     33         'url': 'http://www.keezmovies.com/video/18070681',
     34         'only_matching': True,
     35     }]
     36 
     37     def _extract_info(self, url, fatal=True):
     38         mobj = re.match(self._VALID_URL, url)
     39         video_id = mobj.group('id')
     40         display_id = (mobj.group('display_id')
     41                       if 'display_id' in mobj.groupdict()
     42                       else None) or mobj.group('id')
     43 
     44         webpage = self._download_webpage(
     45             url, display_id, headers={'Cookie': 'age_verified=1'})
     46 
     47         formats = []
     48         format_urls = set()
     49 
     50         title = None
     51         thumbnail = None
     52         duration = None
     53         encrypted = False
     54 
     55         def extract_format(format_url, height=None):
     56             format_url = url_or_none(format_url)
     57             if not format_url or not format_url.startswith(('http', '//')):
     58                 return
     59             if format_url in format_urls:
     60                 return
     61             format_urls.add(format_url)
     62             tbr = int_or_none(self._search_regex(
     63                 r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
     64             if not height:
     65                 height = int_or_none(self._search_regex(
     66                     r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
     67             if encrypted:
     68                 format_url = aes_decrypt_text(
     69                     video_url, title, 32).decode('utf-8')
     70             formats.append({
     71                 'url': format_url,
     72                 'format_id': '%dp' % height if height else None,
     73                 'height': height,
     74                 'tbr': tbr,
     75             })
     76 
     77         flashvars = self._parse_json(
     78             self._search_regex(
     79                 r'flashvars\s*=\s*({.+?});', webpage,
     80                 'flashvars', default='{}'),
     81             display_id, fatal=False)
     82 
     83         if flashvars:
     84             title = flashvars.get('video_title')
     85             thumbnail = flashvars.get('image_url')
     86             duration = int_or_none(flashvars.get('video_duration'))
     87             encrypted = flashvars.get('encrypted') is True
     88             for key, value in flashvars.items():
     89                 mobj = re.search(r'quality_(\d+)[pP]', key)
     90                 if mobj:
     91                     extract_format(value, int(mobj.group(1)))
     92             video_url = flashvars.get('video_url')
     93             if video_url and determine_ext(video_url, None):
     94                 extract_format(video_url)
     95 
     96         video_url = self._html_search_regex(
     97             r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
     98             webpage, 'video url', default=None, group='url')
     99         if video_url:
    100             extract_format(compat_urllib_parse_unquote(video_url))
    101 
    102         if not formats:
    103             if 'title="This video is no longer available"' in webpage:
    104                 raise ExtractorError(
    105                     'Video %s is no longer available' % video_id, expected=True)
    106 
    107         try:
    108             self._sort_formats(formats)
    109         except ExtractorError:
    110             if fatal:
    111                 raise
    112 
    113         if not title:
    114             title = self._html_search_regex(
    115                 r'<h1[^>]*>([^<]+)', webpage, 'title')
    116 
    117         return webpage, {
    118             'id': video_id,
    119             'display_id': display_id,
    120             'title': strip_or_none(title),
    121             'thumbnail': thumbnail,
    122             'duration': duration,
    123             'age_limit': 18,
    124             'formats': formats,
    125         }
    126 
    127     def _real_extract(self, url):
    128         webpage, info = self._extract_info(url, fatal=False)
    129         if not info['formats']:
    130             return self.url_result(url, 'Generic')
    131         info['view_count'] = str_to_int(self._search_regex(
    132             r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
    133         return info