youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

eporner.py (4670B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     encode_base_n,
      9     ExtractorError,
     10     int_or_none,
     11     merge_dicts,
     12     parse_duration,
     13     str_to_int,
     14     url_or_none,
     15 )
     16 
     17 
     18 class EpornerIE(InfoExtractor):
     19     _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
     20     _TESTS = [{
     21         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
     22         'md5': '39d486f046212d8e1b911c52ab4691f8',
     23         'info_dict': {
     24             'id': 'qlDUmNsj6VS',
     25             'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
     26             'ext': 'mp4',
     27             'title': 'Infamous Tiffany Teen Strip Tease Video',
     28             'description': 'md5:764f39abf932daafa37485eb46efa152',
     29             'timestamp': 1232520922,
     30             'upload_date': '20090121',
     31             'duration': 1838,
     32             'view_count': int,
     33             'age_limit': 18,
     34         },
     35         'params': {
     36             'proxy': '127.0.0.1:8118'
     37         }
     38     }, {
     39         # New (May 2016) URL layout
     40         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
     41         'only_matching': True,
     42     }, {
     43         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
     44         'only_matching': True,
     45     }, {
     46         'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
     47         'only_matching': True,
     48     }, {
     49         'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
     50         'only_matching': True,
     51     }]
     52 
     53     def _real_extract(self, url):
     54         mobj = re.match(self._VALID_URL, url)
     55         video_id = mobj.group('id')
     56         display_id = mobj.group('display_id') or video_id
     57 
     58         webpage, urlh = self._download_webpage_handle(url, display_id)
     59 
     60         video_id = self._match_id(urlh.geturl())
     61 
     62         hash = self._search_regex(
     63             r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
     64 
     65         title = self._og_search_title(webpage, default=None) or self._html_search_regex(
     66             r'<title>(.+?) - EPORNER', webpage, 'title')
     67 
     68         # Reverse engineered from vjs.js
     69         def calc_hash(s):
     70             return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
     71 
     72         video = self._download_json(
     73             'http://www.eporner.com/xhr/video/%s' % video_id,
     74             display_id, note='Downloading video JSON',
     75             query={
     76                 'hash': calc_hash(hash),
     77                 'device': 'generic',
     78                 'domain': 'www.eporner.com',
     79                 'fallback': 'false',
     80             })
     81 
     82         if video.get('available') is False:
     83             raise ExtractorError(
     84                 '%s said: %s' % (self.IE_NAME, video['message']), expected=True)
     85 
     86         sources = video['sources']
     87 
     88         formats = []
     89         for kind, formats_dict in sources.items():
     90             if not isinstance(formats_dict, dict):
     91                 continue
     92             for format_id, format_dict in formats_dict.items():
     93                 if not isinstance(format_dict, dict):
     94                     continue
     95                 src = url_or_none(format_dict.get('src'))
     96                 if not src or not src.startswith('http'):
     97                     continue
     98                 if kind == 'hls':
     99                     formats.extend(self._extract_m3u8_formats(
    100                         src, display_id, 'mp4', entry_protocol='m3u8_native',
    101                         m3u8_id=kind, fatal=False))
    102                 else:
    103                     height = int_or_none(self._search_regex(
    104                         r'(\d+)[pP]', format_id, 'height', default=None))
    105                     fps = int_or_none(self._search_regex(
    106                         r'(\d+)fps', format_id, 'fps', default=None))
    107 
    108                     formats.append({
    109                         'url': src,
    110                         'format_id': format_id,
    111                         'height': height,
    112                         'fps': fps,
    113                     })
    114         self._sort_formats(formats)
    115 
    116         json_ld = self._search_json_ld(webpage, display_id, default={})
    117 
    118         duration = parse_duration(self._html_search_meta(
    119             'duration', webpage, default=None))
    120         view_count = str_to_int(self._search_regex(
    121             r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
    122             webpage, 'view count', default=None))
    123 
    124         return merge_dicts(json_ld, {
    125             'id': video_id,
    126             'display_id': display_id,
    127             'title': title,
    128             'duration': duration,
    129             'view_count': view_count,
    130             'formats': formats,
    131             'age_limit': 18,
    132         })