newgrounds.py (5583B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 extract_attributes, 8 int_or_none, 9 parse_duration, 10 parse_filesize, 11 unified_timestamp, 12 ) 13 14 15 class NewgroundsIE(InfoExtractor): 16 _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)' 17 _TESTS = [{ 18 'url': 'https://www.newgrounds.com/audio/listen/549479', 19 'md5': 'fe6033d297591288fa1c1f780386f07a', 20 'info_dict': { 21 'id': '549479', 22 'ext': 'mp3', 23 'title': 'B7 - BusMode', 24 'uploader': 'Burn7', 25 'timestamp': 1378878540, 26 'upload_date': '20130911', 27 'duration': 143, 28 }, 29 }, { 30 'url': 'https://www.newgrounds.com/portal/view/673111', 31 'md5': '3394735822aab2478c31b1004fe5e5bc', 32 'info_dict': { 33 'id': '673111', 34 'ext': 'mp4', 35 'title': 'Dancin', 36 'uploader': 'Squirrelman82', 37 'timestamp': 1460256780, 38 'upload_date': '20160410', 39 }, 40 }, { 41 # source format unavailable, additional mp4 formats 42 'url': 'http://www.newgrounds.com/portal/view/689400', 43 'info_dict': { 44 'id': '689400', 45 'ext': 'mp4', 46 'title': 'ZTV News Episode 8', 47 'uploader': 'BennettTheSage', 48 'timestamp': 1487965140, 49 'upload_date': '20170224', 50 }, 51 'params': { 52 'skip_download': True, 53 }, 54 }] 55 56 def _real_extract(self, url): 57 media_id = self._match_id(url) 58 59 webpage = self._download_webpage(url, media_id) 60 61 title = self._html_search_regex( 62 r'<title>([^>]+)</title>', webpage, 'title') 63 64 media_url = self._parse_json(self._search_regex( 65 r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id) 66 67 formats = [{ 68 'url': media_url, 69 'format_id': 'source', 70 'quality': 1, 71 }] 72 73 max_resolution = int_or_none(self._search_regex( 74 r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution', 75 default=None)) 76 if max_resolution: 77 url_base = media_url.rpartition('.')[0] 78 for resolution in (360, 720, 1080): 79 if resolution > max_resolution: 80 break 81 formats.append({ 82 'url': '%s.%dp.mp4' % (url_base, resolution), 83 'format_id': '%dp' % resolution, 84 'height': resolution, 85 }) 86 87 self._check_formats(formats, media_id) 88 self._sort_formats(formats) 89 90 uploader = self._html_search_regex( 91 (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>', 92 r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader', 93 fatal=False) 94 95 timestamp = unified_timestamp(self._html_search_regex( 96 (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)', 97 r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp', 98 default=None)) 99 duration = parse_duration(self._search_regex( 100 r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage, 101 'duration', default=None)) 102 103 filesize_approx = parse_filesize(self._html_search_regex( 104 r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize', 105 default=None)) 106 if len(formats) == 1: 107 formats[0]['filesize_approx'] = filesize_approx 108 109 if '<dd>Song' in webpage: 110 formats[0]['vcodec'] = 'none' 111 112 return { 113 'id': media_id, 114 'title': title, 115 'uploader': uploader, 116 'timestamp': timestamp, 117 'duration': duration, 118 'formats': formats, 119 } 120 121 122 class NewgroundsPlaylistIE(InfoExtractor): 123 _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)' 124 _TESTS = [{ 125 'url': 'https://www.newgrounds.com/collection/cats', 126 'info_dict': { 127 'id': 'cats', 128 'title': 'Cats', 129 }, 130 'playlist_mincount': 46, 131 }, { 132 'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA', 133 'info_dict': { 134 'id': 'ZONE-SAMA', 135 'title': 'Portal Search: ZONE-SAMA', 136 }, 137 'playlist_mincount': 47, 138 }, { 139 'url': 'http://www.newgrounds.com/audio/search/title/cats', 140 'only_matching': True, 141 }] 142 143 def _real_extract(self, url): 144 playlist_id = self._match_id(url) 145 146 webpage = self._download_webpage(url, playlist_id) 147 148 title = self._search_regex( 149 r'<title>([^>]+)</title>', webpage, 'title', default=None) 150 151 # cut left menu 152 webpage = self._search_regex( 153 r'(?s)<div[^>]+\bclass=["\']column wide(.+)', 154 webpage, 'wide column', default=webpage) 155 156 entries = [] 157 for a, path, media_id in re.findall( 158 r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)', 159 webpage): 160 a_class = extract_attributes(a).get('class') 161 if a_class not in ('item-portalsubmission', 'item-audiosubmission'): 162 continue 163 entries.append( 164 self.url_result( 165 'https://www.newgrounds.com/%s' % path, 166 ie=NewgroundsIE.ie_key(), video_id=media_id)) 167 168 return self.playlist_result(entries, playlist_id, title)