presstv.py (2390B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import remove_start 8 9 10 class PressTVIE(InfoExtractor): 11 _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?' 12 13 _TEST = { 14 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', 15 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', 16 'info_dict': { 17 'id': '459911', 18 'display_id': 'Australian-sewerage-treatment-facility-', 19 'ext': 'mp4', 20 'title': 'Organic mattresses used to clean waste water', 21 'upload_date': '20160409', 22 'thumbnail': r're:^https?://.*\.jpg', 23 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' 24 } 25 } 26 27 def _real_extract(self, url): 28 mobj = re.match(self._VALID_URL, url) 29 video_id = mobj.group('id') 30 display_id = mobj.group('display_id') or video_id 31 32 webpage = self._download_webpage(url, display_id) 33 34 # extract video URL from webpage 35 video_url = self._hidden_inputs(webpage)['inpPlayback'] 36 37 # build list of available formats 38 # specified in http://www.presstv.ir/Scripts/playback.js 39 base_url = 'http://192.99.219.222:82/presstv' 40 _formats = [ 41 (180, '_low200.mp4'), 42 (360, '_low400.mp4'), 43 (720, '_low800.mp4'), 44 (1080, '.mp4') 45 ] 46 47 formats = [{ 48 'url': base_url + video_url[:-4] + extension, 49 'format_id': '%dp' % height, 50 'height': height, 51 } for height, extension in _formats] 52 53 # extract video metadata 54 title = remove_start( 55 self._html_search_meta('title', webpage, fatal=True), 'PressTV-') 56 57 thumbnail = self._og_search_thumbnail(webpage) 58 description = self._og_search_description(webpage) 59 60 upload_date = '%04d%02d%02d' % ( 61 int(mobj.group('y')), 62 int(mobj.group('m')), 63 int(mobj.group('d')), 64 ) 65 66 return { 67 'id': video_id, 68 'display_id': display_id, 69 'title': title, 70 'formats': formats, 71 'thumbnail': thumbnail, 72 'upload_date': upload_date, 73 'description': description 74 }