chaturbate.py (3911B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 ExtractorError, 8 lowercase_escape, 9 url_or_none, 10 ) 11 12 13 class ChaturbateIE(InfoExtractor): 14 _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)' 15 _TESTS = [{ 16 'url': 'https://www.chaturbate.com/siswet19/', 17 'info_dict': { 18 'id': 'siswet19', 19 'ext': 'mp4', 20 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 21 'age_limit': 18, 22 'is_live': True, 23 }, 24 'params': { 25 'skip_download': True, 26 }, 27 'skip': 'Room is offline', 28 }, { 29 'url': 'https://chaturbate.com/fullvideo/?b=caylin', 30 'only_matching': True, 31 }, { 32 'url': 'https://en.chaturbate.com/siswet19/', 33 'only_matching': True, 34 }] 35 36 _ROOM_OFFLINE = 'Room is currently offline' 37 38 def _real_extract(self, url): 39 video_id = self._match_id(url) 40 41 webpage = self._download_webpage( 42 'https://chaturbate.com/%s/' % video_id, video_id, 43 headers=self.geo_verification_headers()) 44 45 found_m3u8_urls = [] 46 47 data = self._parse_json( 48 self._search_regex( 49 r'initialRoomDossier\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', 50 webpage, 'data', default='{}', group='value'), 51 video_id, transform_source=lowercase_escape, fatal=False) 52 if data: 53 m3u8_url = url_or_none(data.get('hls_source')) 54 if m3u8_url: 55 found_m3u8_urls.append(m3u8_url) 56 57 if not found_m3u8_urls: 58 for m in re.finditer( 59 r'(\\u002[27])(?P<url>http.+?\.m3u8.*?)\1', webpage): 60 found_m3u8_urls.append(lowercase_escape(m.group('url'))) 61 62 if not found_m3u8_urls: 63 for m in re.finditer( 64 r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage): 65 found_m3u8_urls.append(m.group('url')) 66 67 m3u8_urls = [] 68 for found_m3u8_url in found_m3u8_urls: 69 m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '') 70 for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url): 71 if m3u8_url not in m3u8_urls: 72 m3u8_urls.append(m3u8_url) 73 74 if not m3u8_urls: 75 error = self._search_regex( 76 [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', 77 r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], 78 webpage, 'error', group='error', default=None) 79 if not error: 80 if any(p in webpage for p in ( 81 self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): 82 error = self._ROOM_OFFLINE 83 if error: 84 raise ExtractorError(error, expected=True) 85 raise ExtractorError('Unable to find stream URL') 86 87 formats = [] 88 for m3u8_url in m3u8_urls: 89 for known_id in ('fast', 'slow'): 90 if '_%s' % known_id in m3u8_url: 91 m3u8_id = known_id 92 break 93 else: 94 m3u8_id = None 95 formats.extend(self._extract_m3u8_formats( 96 m3u8_url, video_id, ext='mp4', 97 # ffmpeg skips segments for fast m3u8 98 preference=-10 if m3u8_id == 'fast' else None, 99 m3u8_id=m3u8_id, fatal=False, live=True)) 100 self._sort_formats(formats) 101 102 return { 103 'id': video_id, 104 'title': self._live_title(video_id), 105 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, 106 'age_limit': self._rta_search(webpage), 107 'is_live': True, 108 'formats': formats, 109 }