closertotruth.py (3095B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 8 9 class CloserToTruthIE(InfoExtractor): 10 _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' 11 _TESTS = [{ 12 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', 13 'info_dict': { 14 'id': '0_zof1ktre', 15 'display_id': 'solutions-the-mind-body-problem', 16 'ext': 'mov', 17 'title': 'Solutions to the Mind-Body Problem?', 18 'upload_date': '20140221', 19 'timestamp': 1392956007, 20 'uploader_id': 'CTTXML' 21 }, 22 'params': { 23 'skip_download': True, 24 }, 25 }, { 26 'url': 'http://closertotruth.com/episodes/how-do-brains-work', 27 'info_dict': { 28 'id': '0_iuxai6g6', 29 'display_id': 'how-do-brains-work', 30 'ext': 'mov', 31 'title': 'How do Brains Work?', 32 'upload_date': '20140221', 33 'timestamp': 1392956024, 34 'uploader_id': 'CTTXML' 35 }, 36 'params': { 37 'skip_download': True, 38 }, 39 }, { 40 'url': 'http://closertotruth.com/interviews/1725', 41 'info_dict': { 42 'id': '1725', 43 'title': 'AyaFr-002', 44 }, 45 'playlist_mincount': 2, 46 }] 47 48 def _real_extract(self, url): 49 display_id = self._match_id(url) 50 51 webpage = self._download_webpage(url, display_id) 52 53 partner_id = self._search_regex( 54 r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)', 55 webpage, 'kaltura partner_id') 56 57 title = self._search_regex( 58 r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title') 59 60 select = self._search_regex( 61 r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>', 62 webpage, 'select version', default=None) 63 if select: 64 entry_ids = set() 65 entries = [] 66 for mobj in re.finditer( 67 r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)', 68 webpage): 69 entry_id = mobj.group('id') 70 if entry_id in entry_ids: 71 continue 72 entry_ids.add(entry_id) 73 entries.append({ 74 '_type': 'url_transparent', 75 'url': 'kaltura:%s:%s' % (partner_id, entry_id), 76 'ie_key': 'Kaltura', 77 'title': mobj.group('title'), 78 }) 79 if entries: 80 return self.playlist_result(entries, display_id, title) 81 82 entry_id = self._search_regex( 83 r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2', 84 webpage, 'kaltura entry_id', group='id') 85 86 return { 87 '_type': 'url_transparent', 88 'display_id': display_id, 89 'url': 'kaltura:%s:%s' % (partner_id, entry_id), 90 'ie_key': 'Kaltura', 91 'title': title 92 }