camwithher.py (3229B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 int_or_none, 8 parse_duration, 9 unified_strdate, 10 ) 11 12 13 class CamWithHerIE(InfoExtractor): 14 _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)' 15 16 _TESTS = [{ 17 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=', 18 'info_dict': { 19 'id': '5644', 20 'ext': 'flv', 21 'title': 'Periscope Tease', 22 'description': 'In the clouds teasing on periscope to my favorite song', 23 'duration': 240, 24 'view_count': int, 25 'comment_count': int, 26 'uploader': 'MileenaK', 27 'upload_date': '20160322', 28 'age_limit': 18, 29 }, 30 'params': { 31 'skip_download': True, 32 } 33 }, { 34 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937', 35 'only_matching': True, 36 }, { 37 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=', 38 'only_matching': True, 39 }, { 40 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv', 41 'only_matching': True, 42 }] 43 44 def _real_extract(self, url): 45 video_id = self._match_id(url) 46 47 webpage = self._download_webpage(url, video_id) 48 49 flv_id = self._html_search_regex( 50 r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id') 51 52 # Video URL construction algorithm is reverse-engineered from cwhplayer.swf 53 rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % ( 54 ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id) 55 56 title = self._html_search_regex( 57 r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title') 58 description = self._html_search_regex( 59 r'>Description:</span>(.+?)</div>', webpage, 'description', default=None) 60 61 runtime = self._search_regex( 62 r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None) 63 if runtime: 64 runtime = re.sub(r'[\s-]', '', runtime) 65 duration = parse_duration(runtime) 66 view_count = int_or_none(self._search_regex( 67 r'Views\s*:\s*(\d+)', webpage, 'view count', default=None)) 68 comment_count = int_or_none(self._search_regex( 69 r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None)) 70 71 uploader = self._search_regex( 72 r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None) 73 upload_date = unified_strdate(self._search_regex( 74 r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None)) 75 76 return { 77 'id': flv_id, 78 'url': rtmp_url, 79 'ext': 'flv', 80 'no_resume': True, 81 'title': title, 82 'description': description, 83 'duration': duration, 84 'view_count': view_count, 85 'comment_count': comment_count, 86 'uploader': uploader, 87 'upload_date': upload_date, 88 'age_limit': 18 89 }