ustudio.py (4392B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 int_or_none, 8 unified_strdate, 9 unescapeHTML, 10 ) 11 12 13 class UstudioIE(InfoExtractor): 14 IE_NAME = 'ustudio' 15 _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)' 16 _TEST = { 17 'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge', 18 'md5': '58bbfca62125378742df01fc2abbdef6', 19 'info_dict': { 20 'id': 'Uxu2my9bgSph', 21 'display_id': 'san_francisco_golden_gate_bridge', 22 'ext': 'mp4', 23 'title': 'San Francisco: Golden Gate Bridge', 24 'description': 'md5:23925500697f2c6d4830e387ba51a9be', 25 'thumbnail': r're:^https?://.*\.jpg$', 26 'upload_date': '20111107', 27 'uploader': 'Tony Farley', 28 } 29 } 30 31 def _real_extract(self, url): 32 video_id, display_id = re.match(self._VALID_URL, url).groups() 33 34 config = self._download_xml( 35 'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id, 36 display_id) 37 38 def extract(kind): 39 return [{ 40 'url': unescapeHTML(item.attrib['url']), 41 'width': int_or_none(item.get('width')), 42 'height': int_or_none(item.get('height')), 43 } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')] 44 45 formats = extract('video') 46 self._sort_formats(formats) 47 48 webpage = self._download_webpage(url, display_id) 49 50 title = self._og_search_title(webpage) 51 upload_date = unified_strdate(self._search_regex( 52 r'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>', 53 webpage, 'upload date', fatal=False)) 54 uploader = self._search_regex( 55 r'Uploaded by\s*<a[^>]*>([^<]+)<', 56 webpage, 'uploader', fatal=False) 57 58 return { 59 'id': video_id, 60 'display_id': display_id, 61 'title': title, 62 'description': self._og_search_description(webpage), 63 'thumbnails': extract('image'), 64 'upload_date': upload_date, 65 'uploader': uploader, 66 'formats': formats, 67 } 68 69 70 class UstudioEmbedIE(InfoExtractor): 71 IE_NAME = 'ustudio:embed' 72 _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)' 73 _TEST = { 74 'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T', 75 'md5': '47c0be52a09b23a7f40de9469cec58f4', 76 'info_dict': { 77 'id': 'Uw7G1kMCe65T', 78 'ext': 'mp4', 79 'title': '5 Things IT Should Know About Video', 80 'description': 'md5:93d32650884b500115e158c5677d25ad', 81 'uploader_id': 'DeN7VdYRDKhP', 82 } 83 } 84 85 def _real_extract(self, url): 86 uploader_id, video_id = re.match(self._VALID_URL, url).groups() 87 video_data = self._download_json( 88 'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id), 89 video_id)['videos'][0] 90 title = video_data['name'] 91 92 formats = [] 93 for ext, qualities in video_data.get('transcodes', {}).items(): 94 for quality in qualities: 95 quality_url = quality.get('url') 96 if not quality_url: 97 continue 98 height = int_or_none(quality.get('height')) 99 formats.append({ 100 'format_id': '%s-%dp' % (ext, height) if height else ext, 101 'url': quality_url, 102 'width': int_or_none(quality.get('width')), 103 'height': height, 104 }) 105 self._sort_formats(formats) 106 107 thumbnails = [] 108 for image in video_data.get('images', []): 109 image_url = image.get('url') 110 if not image_url: 111 continue 112 thumbnails.append({ 113 'url': image_url, 114 }) 115 116 return { 117 'id': video_id, 118 'title': title, 119 'description': video_data.get('description'), 120 'duration': int_or_none(video_data.get('duration')), 121 'uploader_id': uploader_id, 122 'tags': video_data.get('keywords'), 123 'thumbnails': thumbnails, 124 'formats': formats, 125 }