expotv.py (2913B)
1 from __future__ import unicode_literals 2 3 from .common import InfoExtractor 4 from ..utils import ( 5 int_or_none, 6 unified_strdate, 7 ) 8 9 10 class ExpoTVIE(InfoExtractor): 11 _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])' 12 _TEST = { 13 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', 14 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', 15 'info_dict': { 16 'id': '667916', 17 'ext': 'mp4', 18 'title': 'NYX Butter Lipstick Little Susie', 19 'description': 'Goes on like butter, but looks better!', 20 'thumbnail': r're:^https?://.*\.jpg$', 21 'uploader': 'Stephanie S.', 22 'upload_date': '20150520', 23 'view_count': int, 24 } 25 } 26 27 def _real_extract(self, url): 28 video_id = self._match_id(url) 29 30 webpage = self._download_webpage(url, video_id) 31 player_key = self._search_regex( 32 r'<param name="playerKey" value="([^"]+)"', webpage, 'player key') 33 config = self._download_json( 34 'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key), 35 video_id, 'Downloading video configuration') 36 37 formats = [] 38 for fcfg in config['sources']: 39 media_url = fcfg.get('file') 40 if not media_url: 41 continue 42 if fcfg.get('type') == 'm3u8': 43 formats.extend(self._extract_m3u8_formats( 44 media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')) 45 else: 46 formats.append({ 47 'url': media_url, 48 'height': int_or_none(fcfg.get('height')), 49 'format_id': fcfg.get('label'), 50 'ext': self._search_regex( 51 r'filename=.*\.([a-z0-9_A-Z]+)&', media_url, 52 'file extension', default=None) or fcfg.get('type'), 53 }) 54 self._sort_formats(formats) 55 56 title = self._og_search_title(webpage) 57 description = self._og_search_description(webpage) 58 thumbnail = config.get('image') 59 view_count = int_or_none(self._search_regex( 60 r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts')) 61 uploader = self._search_regex( 62 r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader', 63 fatal=False) 64 upload_date = unified_strdate(self._search_regex( 65 r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date', 66 fatal=False), day_first=False) 67 68 return { 69 'id': video_id, 70 'formats': formats, 71 'title': title, 72 'description': description, 73 'view_count': view_count, 74 'thumbnail': thumbnail, 75 'uploader': uploader, 76 'upload_date': upload_date, 77 }