esri.py (2628B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_urlparse 8 from ..utils import ( 9 int_or_none, 10 parse_filesize, 11 unified_strdate, 12 ) 13 14 15 class EsriVideoIE(InfoExtractor): 16 _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' 17 _TEST = { 18 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications', 19 'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc', 20 'info_dict': { 21 'id': '1124', 22 'ext': 'mp4', 23 'title': 'ArcGIS Online - Developing Applications', 24 'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.', 25 'thumbnail': r're:^https?://.*\.jpg$', 26 'duration': 185, 27 'upload_date': '20120419', 28 } 29 } 30 31 def _real_extract(self, url): 32 video_id = self._match_id(url) 33 34 webpage = self._download_webpage(url, video_id) 35 36 formats = [] 37 for width, height, content in re.findall( 38 r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage): 39 for video_url, ext, filesize in re.findall( 40 r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content): 41 formats.append({ 42 'url': compat_urlparse.urljoin(url, video_url), 43 'ext': ext.lower(), 44 'format_id': '%s-%s' % (ext.lower(), height), 45 'width': int(width), 46 'height': int(height), 47 'filesize_approx': parse_filesize(filesize), 48 }) 49 self._sort_formats(formats) 50 51 title = self._html_search_meta('title', webpage, 'title') 52 description = self._html_search_meta( 53 'description', webpage, 'description', fatal=False) 54 55 thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False) 56 if thumbnail: 57 thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail) 58 59 duration = int_or_none(self._search_regex( 60 [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"], 61 webpage, 'duration', fatal=False)) 62 63 upload_date = unified_strdate(self._html_search_meta( 64 'last-modified', webpage, 'upload date', fatal=False)) 65 66 return { 67 'id': video_id, 68 'title': title, 69 'description': description, 70 'thumbnail': thumbnail, 71 'duration': duration, 72 'upload_date': upload_date, 73 'formats': formats 74 }