fourtube.py (11586B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..compat import ( 7 compat_b64decode, 8 compat_str, 9 compat_urllib_parse_unquote, 10 compat_urlparse, 11 ) 12 from ..utils import ( 13 int_or_none, 14 parse_duration, 15 parse_iso8601, 16 str_or_none, 17 str_to_int, 18 try_get, 19 unified_timestamp, 20 url_or_none, 21 ) 22 23 24 class FourTubeBaseIE(InfoExtractor): 25 def _extract_formats(self, url, video_id, media_id, sources): 26 token_url = 'https://%s/%s/desktop/%s' % ( 27 self._TKN_HOST, media_id, '+'.join(sources)) 28 29 parsed_url = compat_urlparse.urlparse(url) 30 tokens = self._download_json(token_url, video_id, data=b'', headers={ 31 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), 32 'Referer': url, 33 }) 34 formats = [{ 35 'url': tokens[format]['token'], 36 'format_id': format + 'p', 37 'resolution': format + 'p', 38 'quality': int(format), 39 } for format in sources] 40 self._sort_formats(formats) 41 return formats 42 43 def _real_extract(self, url): 44 mobj = re.match(self._VALID_URL, url) 45 kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') 46 47 if kind == 'm' or not display_id: 48 url = self._URL_TEMPLATE % video_id 49 50 webpage = self._download_webpage(url, video_id) 51 52 title = self._html_search_meta('name', webpage) 53 timestamp = parse_iso8601(self._html_search_meta( 54 'uploadDate', webpage)) 55 thumbnail = self._html_search_meta('thumbnailUrl', webpage) 56 uploader_id = self._html_search_regex( 57 r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/([^/"]+)" title="Go to [^"]+ page">', 58 webpage, 'uploader id', fatal=False) 59 uploader = self._html_search_regex( 60 r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/[^/"]+" title="Go to ([^"]+) page">', 61 webpage, 'uploader', fatal=False) 62 63 categories_html = self._search_regex( 64 r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="[^"]*?list[^"]*?">(.*?)</ul>', 65 webpage, 'categories', fatal=False) 66 categories = None 67 if categories_html: 68 categories = [ 69 c.strip() for c in re.findall( 70 r'(?s)<li><a.*?>(.*?)</a>', categories_html)] 71 72 view_count = str_to_int(self._search_regex( 73 r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">', 74 webpage, 'view count', default=None)) 75 like_count = str_to_int(self._search_regex( 76 r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">', 77 webpage, 'like count', default=None)) 78 duration = parse_duration(self._html_search_meta('duration', webpage)) 79 80 media_id = self._search_regex( 81 r'<button[^>]+data-id=(["\'])(?P<id>\d+)\1[^>]+data-quality=', webpage, 82 'media id', default=None, group='id') 83 sources = [ 84 quality 85 for _, quality in re.findall(r'<button[^>]+data-quality=(["\'])(.+?)\1', webpage)] 86 if not (media_id and sources): 87 player_js = self._download_webpage( 88 self._search_regex( 89 r'<script[^>]id=(["\'])playerembed\1[^>]+src=(["\'])(?P<url>.+?)\2', 90 webpage, 'player JS', group='url'), 91 video_id, 'Downloading player JS') 92 params_js = self._search_regex( 93 r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)', 94 player_js, 'initialization parameters') 95 params = self._parse_json('[%s]' % params_js, video_id) 96 media_id = params[0] 97 sources = ['%s' % p for p in params[2]] 98 99 formats = self._extract_formats(url, video_id, media_id, sources) 100 101 return { 102 'id': video_id, 103 'title': title, 104 'formats': formats, 105 'categories': categories, 106 'thumbnail': thumbnail, 107 'uploader': uploader, 108 'uploader_id': uploader_id, 109 'timestamp': timestamp, 110 'like_count': like_count, 111 'view_count': view_count, 112 'duration': duration, 113 'age_limit': 18, 114 } 115 116 117 class FourTubeIE(FourTubeBaseIE): 118 IE_NAME = '4tube' 119 _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' 120 _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video' 121 _TKN_HOST = 'token.4tube.com' 122 _TESTS = [{ 123 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', 124 'md5': '6516c8ac63b03de06bc8eac14362db4f', 125 'info_dict': { 126 'id': '209733', 127 'ext': 'mp4', 128 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', 129 'uploader': 'WCP Club', 130 'uploader_id': 'wcp-club', 131 'upload_date': '20131031', 132 'timestamp': 1383263892, 133 'duration': 583, 134 'view_count': int, 135 'like_count': int, 136 'categories': list, 137 'age_limit': 18, 138 }, 139 }, { 140 'url': 'http://www.4tube.com/embed/209733', 141 'only_matching': True, 142 }, { 143 'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', 144 'only_matching': True, 145 }] 146 147 148 class FuxIE(FourTubeBaseIE): 149 _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' 150 _URL_TEMPLATE = 'https://www.fux.com/video/%s/video' 151 _TKN_HOST = 'token.fux.com' 152 _TESTS = [{ 153 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', 154 'info_dict': { 155 'id': '195359', 156 'ext': 'mp4', 157 'title': 'Awesome fucking in the kitchen ends with cum swallow', 158 'uploader': 'alenci2342', 159 'uploader_id': 'alenci2342', 160 'upload_date': '20131230', 161 'timestamp': 1388361660, 162 'duration': 289, 163 'view_count': int, 164 'like_count': int, 165 'categories': list, 166 'age_limit': 18, 167 }, 168 'params': { 169 'skip_download': True, 170 }, 171 }, { 172 'url': 'https://www.fux.com/embed/195359', 173 'only_matching': True, 174 }, { 175 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', 176 'only_matching': True, 177 }] 178 179 180 class PornTubeIE(FourTubeBaseIE): 181 _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)' 182 _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s' 183 _TKN_HOST = 'tkn.porntube.com' 184 _TESTS = [{ 185 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759', 186 'info_dict': { 187 'id': '7089759', 188 'ext': 'mp4', 189 'title': 'Teen couple doing anal', 190 'uploader': 'Alexy', 191 'uploader_id': '91488', 192 'upload_date': '20150606', 193 'timestamp': 1433595647, 194 'duration': 5052, 195 'view_count': int, 196 'like_count': int, 197 'age_limit': 18, 198 }, 199 'params': { 200 'skip_download': True, 201 }, 202 }, { 203 'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406', 204 'info_dict': { 205 'id': '1331406', 206 'ext': 'mp4', 207 'title': 'Squirting Teen Ballerina on ECG', 208 'uploader': 'Exploited College Girls', 209 'uploader_id': '665', 210 'channel': 'Exploited College Girls', 211 'channel_id': '665', 212 'upload_date': '20130920', 213 'timestamp': 1379685485, 214 'duration': 851, 215 'view_count': int, 216 'like_count': int, 217 'age_limit': 18, 218 }, 219 'params': { 220 'skip_download': True, 221 }, 222 }, { 223 'url': 'https://www.porntube.com/embed/7089759', 224 'only_matching': True, 225 }, { 226 'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759', 227 'only_matching': True, 228 }] 229 230 def _real_extract(self, url): 231 mobj = re.match(self._VALID_URL, url) 232 video_id, display_id = mobj.group('id', 'display_id') 233 234 webpage = self._download_webpage(url, display_id) 235 236 video = self._parse_json( 237 self._search_regex( 238 r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', 239 webpage, 'data', group='value'), video_id, 240 transform_source=lambda x: compat_urllib_parse_unquote( 241 compat_b64decode(x).decode('utf-8')))['page']['video'] 242 243 title = video['title'] 244 media_id = video['mediaId'] 245 sources = [compat_str(e['height']) 246 for e in video['encodings'] if e.get('height')] 247 formats = self._extract_formats(url, video_id, media_id, sources) 248 249 thumbnail = url_or_none(video.get('masterThumb')) 250 uploader = try_get(video, lambda x: x['user']['username'], compat_str) 251 uploader_id = str_or_none(try_get( 252 video, lambda x: x['user']['id'], int)) 253 channel = try_get(video, lambda x: x['channel']['name'], compat_str) 254 channel_id = str_or_none(try_get( 255 video, lambda x: x['channel']['id'], int)) 256 like_count = int_or_none(video.get('likes')) 257 dislike_count = int_or_none(video.get('dislikes')) 258 view_count = int_or_none(video.get('playsQty')) 259 duration = int_or_none(video.get('durationInSeconds')) 260 timestamp = unified_timestamp(video.get('publishedAt')) 261 262 return { 263 'id': video_id, 264 'title': title, 265 'formats': formats, 266 'thumbnail': thumbnail, 267 'uploader': uploader or channel, 268 'uploader_id': uploader_id or channel_id, 269 'channel': channel, 270 'channel_id': channel_id, 271 'timestamp': timestamp, 272 'like_count': like_count, 273 'dislike_count': dislike_count, 274 'view_count': view_count, 275 'duration': duration, 276 'age_limit': 18, 277 } 278 279 280 class PornerBrosIE(FourTubeBaseIE): 281 _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)' 282 _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s' 283 _TKN_HOST = 'token.pornerbros.com' 284 _TESTS = [{ 285 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', 286 'md5': '6516c8ac63b03de06bc8eac14362db4f', 287 'info_dict': { 288 'id': '181369', 289 'ext': 'mp4', 290 'title': 'Skinny brunette takes big cock down her anal hole', 291 'uploader': 'PornerBros HD', 292 'uploader_id': 'pornerbros-hd', 293 'upload_date': '20130130', 294 'timestamp': 1359527401, 295 'duration': 1224, 296 'view_count': int, 297 'categories': list, 298 'age_limit': 18, 299 }, 300 'params': { 301 'skip_download': True, 302 }, 303 }, { 304 'url': 'https://www.pornerbros.com/embed/181369', 305 'only_matching': True, 306 }, { 307 'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', 308 'only_matching': True, 309 }]