spankwire.py (6475B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 float_or_none, 8 int_or_none, 9 merge_dicts, 10 str_or_none, 11 str_to_int, 12 url_or_none, 13 ) 14 15 16 class SpankwireIE(InfoExtractor): 17 _VALID_URL = r'''(?x) 18 https?:// 19 (?:www\.)?spankwire\.com/ 20 (?: 21 [^/]+/video| 22 EmbedPlayer\.aspx/?\?.*?\bArticleId= 23 ) 24 (?P<id>\d+) 25 ''' 26 _TESTS = [{ 27 # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4 28 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', 29 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd', 30 'info_dict': { 31 'id': '103545', 32 'ext': 'mp4', 33 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', 34 'description': 'Crazy Bitch X rated music video.', 35 'duration': 222, 36 'uploader': 'oreusz', 37 'uploader_id': '124697', 38 'timestamp': 1178587885, 39 'upload_date': '20070508', 40 'average_rating': float, 41 'view_count': int, 42 'comment_count': int, 43 'age_limit': 18, 44 'categories': list, 45 'tags': list, 46 }, 47 }, { 48 # download URL pattern: */mp4_<format_id>_<video_id>.mp4 49 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', 50 'md5': '09b3c20833308b736ae8902db2f8d7e6', 51 'info_dict': { 52 'id': '1921551', 53 'ext': 'mp4', 54 'title': 'Titcums Compiloation I', 55 'description': 'cum on tits', 56 'uploader': 'dannyh78999', 57 'uploader_id': '3056053', 58 'upload_date': '20150822', 59 'age_limit': 18, 60 }, 61 'params': { 62 'proxy': '127.0.0.1:8118' 63 }, 64 'skip': 'removed', 65 }, { 66 'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true', 67 'only_matching': True, 68 }] 69 70 @staticmethod 71 def _extract_urls(webpage): 72 return re.findall( 73 r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)', 74 webpage) 75 76 def _real_extract(self, url): 77 video_id = self._match_id(url) 78 79 video = self._download_json( 80 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id) 81 82 title = video['title'] 83 84 formats = [] 85 videos = video.get('videos') 86 if isinstance(videos, dict): 87 for format_id, format_url in videos.items(): 88 video_url = url_or_none(format_url) 89 if not format_url: 90 continue 91 height = int_or_none(self._search_regex( 92 r'(\d+)[pP]', format_id, 'height', default=None)) 93 m = re.search( 94 r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url) 95 if m: 96 tbr = int(m.group('tbr')) 97 height = height or int(m.group('height')) 98 else: 99 tbr = None 100 formats.append({ 101 'url': video_url, 102 'format_id': '%dp' % height if height else format_id, 103 'height': height, 104 'tbr': tbr, 105 }) 106 m3u8_url = url_or_none(video.get('HLS')) 107 if m3u8_url: 108 formats.extend(self._extract_m3u8_formats( 109 m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', 110 m3u8_id='hls', fatal=False)) 111 self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id')) 112 113 view_count = str_to_int(video.get('viewed')) 114 115 thumbnails = [] 116 for preference, t in enumerate(('', '2x'), start=0): 117 thumbnail_url = url_or_none(video.get('poster%s' % t)) 118 if not thumbnail_url: 119 continue 120 thumbnails.append({ 121 'url': thumbnail_url, 122 'preference': preference, 123 }) 124 125 def extract_names(key): 126 entries_list = video.get(key) 127 if not isinstance(entries_list, list): 128 return 129 entries = [] 130 for entry in entries_list: 131 name = str_or_none(entry.get('name')) 132 if name: 133 entries.append(name) 134 return entries 135 136 categories = extract_names('categories') 137 tags = extract_names('tags') 138 139 uploader = None 140 info = {} 141 142 webpage = self._download_webpage( 143 'https://www.spankwire.com/_/video%s/' % video_id, video_id, 144 fatal=False) 145 if webpage: 146 info = self._search_json_ld(webpage, video_id, default={}) 147 thumbnail_url = None 148 if 'thumbnail' in info: 149 thumbnail_url = url_or_none(info['thumbnail']) 150 del info['thumbnail'] 151 if not thumbnail_url: 152 thumbnail_url = self._og_search_thumbnail(webpage) 153 if thumbnail_url: 154 thumbnails.append({ 155 'url': thumbnail_url, 156 'preference': 10, 157 }) 158 uploader = self._html_search_regex( 159 r'(?s)by\s*<a[^>]+\bclass=["\']uploaded__by[^>]*>(.+?)</a>', 160 webpage, 'uploader', fatal=False) 161 if not view_count: 162 view_count = str_to_int(self._search_regex( 163 r'data-views=["\']([\d,.]+)', webpage, 'view count', 164 fatal=False)) 165 166 return merge_dicts({ 167 'id': video_id, 168 'title': title, 169 'description': video.get('description'), 170 'duration': int_or_none(video.get('duration')), 171 'thumbnails': thumbnails, 172 'uploader': uploader, 173 'uploader_id': str_or_none(video.get('userId')), 174 'timestamp': int_or_none(video.get('time_approved_on')), 175 'average_rating': float_or_none(video.get('rating')), 176 'view_count': view_count, 177 'comment_count': int_or_none(video.get('comments')), 178 'age_limit': 18, 179 'categories': categories, 180 'tags': tags, 181 'formats': formats, 182 }, info)