shared.py (4385B)
1 from __future__ import unicode_literals 2 3 from .common import InfoExtractor 4 from ..compat import ( 5 compat_b64decode, 6 compat_urllib_parse_unquote_plus, 7 ) 8 from ..utils import ( 9 determine_ext, 10 ExtractorError, 11 int_or_none, 12 js_to_json, 13 KNOWN_EXTENSIONS, 14 parse_filesize, 15 rot47, 16 url_or_none, 17 urlencode_postdata, 18 ) 19 20 21 class SharedBaseIE(InfoExtractor): 22 def _real_extract(self, url): 23 video_id = self._match_id(url) 24 25 webpage, urlh = self._download_webpage_handle(url, video_id) 26 27 if self._FILE_NOT_FOUND in webpage: 28 raise ExtractorError( 29 'Video %s does not exist' % video_id, expected=True) 30 31 video_url = self._extract_video_url(webpage, video_id, url) 32 33 title = self._extract_title(webpage) 34 filesize = int_or_none(self._extract_filesize(webpage)) 35 36 return { 37 'id': video_id, 38 'url': video_url, 39 'ext': 'mp4', 40 'filesize': filesize, 41 'title': title, 42 } 43 44 def _extract_title(self, webpage): 45 return compat_b64decode(self._html_search_meta( 46 'full:title', webpage, 'title')).decode('utf-8') 47 48 def _extract_filesize(self, webpage): 49 return self._html_search_meta( 50 'full:size', webpage, 'file size', fatal=False) 51 52 53 class SharedIE(SharedBaseIE): 54 IE_DESC = 'shared.sx' 55 _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})' 56 _FILE_NOT_FOUND = '>File does not exist<' 57 58 _TEST = { 59 'url': 'http://shared.sx/0060718775', 60 'md5': '106fefed92a8a2adb8c98e6a0652f49b', 61 'info_dict': { 62 'id': '0060718775', 63 'ext': 'mp4', 64 'title': 'Bmp4', 65 'filesize': 1720110, 66 }, 67 } 68 69 def _extract_video_url(self, webpage, video_id, url): 70 download_form = self._hidden_inputs(webpage) 71 72 video_page = self._download_webpage( 73 url, video_id, 'Downloading video page', 74 data=urlencode_postdata(download_form), 75 headers={ 76 'Content-Type': 'application/x-www-form-urlencoded', 77 'Referer': url, 78 }) 79 80 video_url = self._html_search_regex( 81 r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', 82 video_page, 'video URL', group='url') 83 84 return video_url 85 86 87 class VivoIE(SharedBaseIE): 88 IE_DESC = 'vivo.sx' 89 _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})' 90 _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' 91 92 _TESTS = [{ 93 'url': 'http://vivo.sx/d7ddda0e78', 94 'md5': '15b3af41be0b4fe01f4df075c2678b2c', 95 'info_dict': { 96 'id': 'd7ddda0e78', 97 'ext': 'mp4', 98 'title': 'Chicken', 99 'filesize': 515659, 100 }, 101 }, { 102 'url': 'http://vivo.st/d7ddda0e78', 103 'only_matching': True, 104 }] 105 106 def _extract_title(self, webpage): 107 title = self._html_search_regex( 108 r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage, 109 'title', default=None, group='title') 110 if title: 111 ext = determine_ext(title) 112 if ext.lower() in KNOWN_EXTENSIONS: 113 title = title.rpartition('.' + ext)[0] 114 return title 115 return self._og_search_title(webpage) 116 117 def _extract_filesize(self, webpage): 118 return parse_filesize(self._search_regex( 119 r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', 120 webpage, 'filesize', fatal=False)) 121 122 def _extract_video_url(self, webpage, video_id, url): 123 def decode_url_old(encoded_url): 124 return compat_b64decode(encoded_url).decode('utf-8') 125 126 stream_url = self._search_regex( 127 r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 128 'stream url', default=None, group='url') 129 if stream_url: 130 stream_url = url_or_none(decode_url_old(stream_url)) 131 if stream_url: 132 return stream_url 133 134 def decode_url(encoded_url): 135 return rot47(compat_urllib_parse_unquote_plus(encoded_url)) 136 137 return decode_url(self._parse_json( 138 self._search_regex( 139 r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, 140 'stream'), 141 video_id, transform_source=js_to_json)['source'])