youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

closertotruth.py (3095B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 
      8 
      9 class CloserToTruthIE(InfoExtractor):
     10     _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     11     _TESTS = [{
     12         'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
     13         'info_dict': {
     14             'id': '0_zof1ktre',
     15             'display_id': 'solutions-the-mind-body-problem',
     16             'ext': 'mov',
     17             'title': 'Solutions to the Mind-Body Problem?',
     18             'upload_date': '20140221',
     19             'timestamp': 1392956007,
     20             'uploader_id': 'CTTXML'
     21         },
     22         'params': {
     23             'skip_download': True,
     24         },
     25     }, {
     26         'url': 'http://closertotruth.com/episodes/how-do-brains-work',
     27         'info_dict': {
     28             'id': '0_iuxai6g6',
     29             'display_id': 'how-do-brains-work',
     30             'ext': 'mov',
     31             'title': 'How do Brains Work?',
     32             'upload_date': '20140221',
     33             'timestamp': 1392956024,
     34             'uploader_id': 'CTTXML'
     35         },
     36         'params': {
     37             'skip_download': True,
     38         },
     39     }, {
     40         'url': 'http://closertotruth.com/interviews/1725',
     41         'info_dict': {
     42             'id': '1725',
     43             'title': 'AyaFr-002',
     44         },
     45         'playlist_mincount': 2,
     46     }]
     47 
     48     def _real_extract(self, url):
     49         display_id = self._match_id(url)
     50 
     51         webpage = self._download_webpage(url, display_id)
     52 
     53         partner_id = self._search_regex(
     54             r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
     55             webpage, 'kaltura partner_id')
     56 
     57         title = self._search_regex(
     58             r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
     59 
     60         select = self._search_regex(
     61             r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
     62             webpage, 'select version', default=None)
     63         if select:
     64             entry_ids = set()
     65             entries = []
     66             for mobj in re.finditer(
     67                     r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
     68                     webpage):
     69                 entry_id = mobj.group('id')
     70                 if entry_id in entry_ids:
     71                     continue
     72                 entry_ids.add(entry_id)
     73                 entries.append({
     74                     '_type': 'url_transparent',
     75                     'url': 'kaltura:%s:%s' % (partner_id, entry_id),
     76                     'ie_key': 'Kaltura',
     77                     'title': mobj.group('title'),
     78                 })
     79             if entries:
     80                 return self.playlist_result(entries, display_id, title)
     81 
     82         entry_id = self._search_regex(
     83             r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
     84             webpage, 'kaltura entry_id', group='id')
     85 
     86         return {
     87             '_type': 'url_transparent',
     88             'display_id': display_id,
     89             'url': 'kaltura:%s:%s' % (partner_id, entry_id),
     90             'ie_key': 'Kaltura',
     91             'title': title
     92         }