vodlocker.py (2796B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 ExtractorError, 7 NO_DEFAULT, 8 sanitized_Request, 9 urlencode_postdata, 10 ) 11 12 13 class VodlockerIE(InfoExtractor): 14 _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' 15 16 _TESTS = [{ 17 'url': 'http://vodlocker.com/e8wvyzz4sl42', 18 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', 19 'info_dict': { 20 'id': 'e8wvyzz4sl42', 21 'ext': 'mp4', 22 'title': 'Germany vs Brazil', 23 'thumbnail': r're:http://.*\.jpg', 24 }, 25 }] 26 27 def _real_extract(self, url): 28 video_id = self._match_id(url) 29 webpage = self._download_webpage(url, video_id) 30 31 if any(p in webpage for p in ( 32 '>THIS FILE WAS DELETED<', 33 '>File Not Found<', 34 'The file you were looking for could not be found, sorry for any inconvenience.<', 35 '>The file was removed')): 36 raise ExtractorError('Video %s does not exist' % video_id, expected=True) 37 38 fields = self._hidden_inputs(webpage) 39 40 if fields['op'] == 'download1': 41 self._sleep(3, video_id) # they do detect when requests happen too fast! 42 post = urlencode_postdata(fields) 43 req = sanitized_Request(url, post) 44 req.add_header('Content-type', 'application/x-www-form-urlencoded') 45 webpage = self._download_webpage( 46 req, video_id, 'Downloading video page') 47 48 def extract_file_url(html, default=NO_DEFAULT): 49 return self._search_regex( 50 r'file:\s*"(http[^\"]+)",', html, 'file url', default=default) 51 52 video_url = extract_file_url(webpage, default=None) 53 54 if not video_url: 55 embed_url = self._search_regex( 56 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1', 57 webpage, 'embed url', group='url') 58 embed_webpage = self._download_webpage( 59 embed_url, video_id, 'Downloading embed webpage') 60 video_url = extract_file_url(embed_webpage) 61 thumbnail_webpage = embed_webpage 62 else: 63 thumbnail_webpage = webpage 64 65 title = self._search_regex( 66 r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title') 67 thumbnail = self._search_regex( 68 r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False) 69 70 formats = [{ 71 'format_id': 'sd', 72 'url': video_url, 73 }] 74 75 return { 76 'id': video_id, 77 'title': title, 78 'thumbnail': thumbnail, 79 'formats': formats, 80 }