youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

watchindianporn.py (2297B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import parse_duration
      8 
      9 
     10 class WatchIndianPornIE(InfoExtractor):
     11     IE_DESC = 'Watch Indian Porn'
     12     _VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
     13     _TEST = {
     14         'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
     15         'md5': '249589a164dde236ec65832bfce17440',
     16         'info_dict': {
     17             'id': 'RZa2avywNPa',
     18             'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
     19             'ext': 'mp4',
     20             'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
     21             'thumbnail': r're:^https?://.*\.jpg$',
     22             'duration': 226,
     23             'view_count': int,
     24             'categories': list,
     25             'age_limit': 18,
     26         }
     27     }
     28 
     29     def _real_extract(self, url):
     30         mobj = re.match(self._VALID_URL, url)
     31         video_id = mobj.group('id')
     32         display_id = mobj.group('display_id')
     33 
     34         webpage = self._download_webpage(url, display_id)
     35 
     36         info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
     37 
     38         title = self._html_search_regex((
     39             r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
     40             r'<h4>(.+?)</h4>'
     41         ), webpage, 'title')
     42 
     43         duration = parse_duration(self._search_regex(
     44             r'Time:\s*<strong>\s*(.+?)\s*</strong>',
     45             webpage, 'duration', fatal=False))
     46 
     47         view_count = int(self._search_regex(
     48             r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
     49             webpage, 'view count', fatal=False))
     50 
     51         categories = re.findall(
     52             r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
     53             webpage)
     54 
     55         info_dict.update({
     56             'id': video_id,
     57             'display_id': display_id,
     58             'http_headers': {
     59                 'Referer': url,
     60             },
     61             'title': title,
     62             'duration': duration,
     63             'view_count': view_count,
     64             'categories': categories,
     65             'age_limit': 18,
     66         })
     67 
     68         return info_dict