playvid.py (3299B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..compat import ( 7 compat_urllib_parse_unquote, 8 compat_urllib_parse_unquote_plus, 9 ) 10 from ..utils import ( 11 clean_html, 12 ExtractorError, 13 ) 14 15 16 class PlayvidIE(InfoExtractor): 17 _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' 18 _TESTS = [{ 19 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', 20 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', 21 'info_dict': { 22 'id': 'RnmBNgtrrJu', 23 'ext': 'mp4', 24 'title': 'md5:9256d01c6317e3f703848b5906880dc8', 25 'duration': 82, 26 'age_limit': 18, 27 }, 28 'skip': 'Video removed due to ToS', 29 }, { 30 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH', 31 'md5': '39d49df503ad7b8f23a4432cbf046477', 32 'info_dict': { 33 'id': 'hwb0GpNkzgH', 34 'ext': 'mp4', 35 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', 36 'age_limit': 18, 37 'thumbnail': r're:^https?://.*\.jpg$', 38 }, 39 }] 40 41 def _real_extract(self, url): 42 video_id = self._match_id(url) 43 webpage = self._download_webpage(url, video_id) 44 45 m_error = re.search( 46 r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage) 47 if m_error: 48 raise ExtractorError(clean_html(m_error.group('msg')), expected=True) 49 50 video_title = None 51 duration = None 52 video_thumbnail = None 53 formats = [] 54 55 # most of the information is stored in the flashvars 56 flashvars = self._html_search_regex( 57 r'flashvars="(.+?)"', webpage, 'flashvars') 58 59 infos = compat_urllib_parse_unquote(flashvars).split(r'&') 60 for info in infos: 61 videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) 62 if videovars_match: 63 key = videovars_match.group(1) 64 val = videovars_match.group(2) 65 66 if key == 'title': 67 video_title = compat_urllib_parse_unquote_plus(val) 68 if key == 'duration': 69 try: 70 duration = int(val) 71 except ValueError: 72 pass 73 if key == 'big_thumb': 74 video_thumbnail = val 75 76 videourl_match = re.match( 77 r'^video_urls\]\[(?P<resolution>[0-9]+)p', key) 78 if videourl_match: 79 height = int(videourl_match.group('resolution')) 80 formats.append({ 81 'height': height, 82 'url': val, 83 }) 84 self._sort_formats(formats) 85 86 # Extract title - should be in the flashvars; if not, look elsewhere 87 if video_title is None: 88 video_title = self._html_search_regex( 89 r'<title>(.*?)</title', webpage, 'title') 90 91 return { 92 'id': video_id, 93 'formats': formats, 94 'title': video_title, 95 'thumbnail': video_thumbnail, 96 'duration': duration, 97 'description': None, 98 'age_limit': 18 99 }