youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

cliphunter.py (2533B)


      1 from __future__ import unicode_literals
      2 
      3 from .common import InfoExtractor
      4 from ..utils import (
      5     int_or_none,
      6     url_or_none,
      7 )
      8 
      9 
     10 class CliphunterIE(InfoExtractor):
     11     IE_NAME = 'cliphunter'
     12 
     13     _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
     14         (?P<id>[0-9]+)/
     15         (?P<seo>.+?)(?:$|[#\?])
     16     '''
     17     _TESTS = [{
     18         'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
     19         'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
     20         'info_dict': {
     21             'id': '1012420',
     22             'ext': 'flv',
     23             'title': 'Fun Jynx Maze solo',
     24             'thumbnail': r're:^https?://.*\.jpg$',
     25             'age_limit': 18,
     26         },
     27         'skip': 'Video gone',
     28     }, {
     29         'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
     30         'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
     31         'info_dict': {
     32             'id': '2019449',
     33             'ext': 'mp4',
     34             'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
     35             'thumbnail': r're:^https?://.*\.jpg$',
     36             'age_limit': 18,
     37         },
     38     }]
     39 
     40     def _real_extract(self, url):
     41         video_id = self._match_id(url)
     42         webpage = self._download_webpage(url, video_id)
     43 
     44         video_title = self._search_regex(
     45             r'mediaTitle = "([^"]+)"', webpage, 'title')
     46 
     47         gexo_files = self._parse_json(
     48             self._search_regex(
     49                 r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
     50             video_id)
     51 
     52         formats = []
     53         for format_id, f in gexo_files.items():
     54             video_url = url_or_none(f.get('url'))
     55             if not video_url:
     56                 continue
     57             fmt = f.get('fmt')
     58             height = f.get('h')
     59             format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
     60             formats.append({
     61                 'url': video_url,
     62                 'format_id': format_id,
     63                 'width': int_or_none(f.get('w')),
     64                 'height': int_or_none(height),
     65                 'tbr': int_or_none(f.get('br')),
     66             })
     67         self._sort_formats(formats)
     68 
     69         thumbnail = self._search_regex(
     70             r"var\s+mov_thumb\s*=\s*'([^']+)';",
     71             webpage, 'thumbnail', fatal=False)
     72 
     73         return {
     74             'id': video_id,
     75             'title': video_title,
     76             'formats': formats,
     77             'age_limit': self._rta_search(webpage),
     78             'thumbnail': thumbnail,
     79         }