youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

sexu.py (2000B)


      1 from __future__ import unicode_literals
      2 
      3 from .common import InfoExtractor
      4 
      5 
      6 class SexuIE(InfoExtractor):
      7     _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
      8     _TEST = {
      9         'url': 'http://sexu.com/961791/',
     10         'md5': 'ff615aca9691053c94f8f10d96cd7884',
     11         'info_dict': {
     12             'id': '961791',
     13             'ext': 'mp4',
     14             'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
     15             'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
     16             'categories': list,  # NSFW
     17             'thumbnail': r're:https?://.*\.jpg$',
     18             'age_limit': 18,
     19         }
     20     }
     21 
     22     def _real_extract(self, url):
     23         video_id = self._match_id(url)
     24         webpage = self._download_webpage(url, video_id)
     25 
     26         jwvideo = self._parse_json(
     27             self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'),
     28             video_id)
     29 
     30         sources = jwvideo['sources']
     31 
     32         formats = [{
     33             'url': source['file'].replace('\\', ''),
     34             'format_id': source.get('label'),
     35             'height': int(self._search_regex(
     36                 r'^(\d+)[pP]', source.get('label', ''), 'height',
     37                 default=None)),
     38         } for source in sources if source.get('file')]
     39         self._sort_formats(formats)
     40 
     41         title = self._html_search_regex(
     42             r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
     43 
     44         description = self._html_search_meta(
     45             'description', webpage, 'description')
     46 
     47         thumbnail = jwvideo.get('image')
     48 
     49         categories_str = self._html_search_meta(
     50             'keywords', webpage, 'categories')
     51         categories = (
     52             None if categories_str is None
     53             else categories_str.split(','))
     54 
     55         return {
     56             'id': video_id,
     57             'title': title,
     58             'description': description,
     59             'thumbnail': thumbnail,
     60             'categories': categories,
     61             'formats': formats,
     62             'age_limit': 18,
     63         }