class SexyKarmaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
+ IE_DESC = 'Sexy Karma and Watch Indian Porn'
+ _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
_TESTS = [{
'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
'display_id': 'taking-a-quick-pee',
'ext': 'mp4',
'title': 'Taking a quick pee.',
- 'description': '',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'wildginger7',
'upload_date': '20141007',
- 'duration': 81,
+ 'duration': 22,
'view_count': int,
'comment_count': int,
'categories': list,
'display_id': 'pot-pixie-tribute',
'ext': 'mp4',
'title': 'pot_pixie tribute',
- 'description': 'tribute',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'banffite',
'upload_date': '20141013',
'comment_count': int,
'categories': list,
}
+ }, {
+ 'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
+ 'md5': '9afb80675550406ed9a63ac2819ef69d',
+ 'info_dict': {
+ 'id': 'dW2mtctxJfs',
+ 'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
+ 'ext': 'mp4',
+ 'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'Don',
+ 'upload_date': '20140213',
+ 'duration': 83,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ }
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, display_id)
video_url = self._html_search_regex(
- r'<p>Save this video to your computer: </p><p><a href="([^"]+)"',
- webpage, 'url')
+ r"url: escape\('([^']+)'\)", webpage, 'url')
title = self._html_search_regex(
r'<h2 class="he2"><span>(.*?)</span>',
webpage, 'title')
- description = self._html_search_meta(
- 'description', webpage, 'description', fatal=False, default='')
thumbnail = self._html_search_regex(
r'<span id="container"><img\s+src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
webpage, 'comment count', fatal=False))
- categories = self._html_search_meta(
- 'keywords', webpage, 'categories',
- fatal=False, default='').split(',')
+ categories = re.findall(
+ r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
+ webpage)
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
- 'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,