youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

screencast.py (4680B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..compat import (
      6     compat_parse_qs,
      7     compat_urllib_request,
      8 )
      9 from ..utils import (
     10     ExtractorError,
     11 )
     12 
     13 
     14 class ScreencastIE(InfoExtractor):
     15     _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
     16     _TESTS = [{
     17         'url': 'http://www.screencast.com/t/3ZEjQXlT',
     18         'md5': '917df1c13798a3e96211dd1561fded83',
     19         'info_dict': {
     20             'id': '3ZEjQXlT',
     21             'ext': 'm4v',
     22             'title': 'Color Measurement with Ocean Optics Spectrometers',
     23             'description': 'md5:240369cde69d8bed61349a199c5fb153',
     24             'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
     25         }
     26     }, {
     27         'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
     28         'md5': 'e8e4b375a7660a9e7e35c33973410d34',
     29         'info_dict': {
     30             'id': 'V2uXehPJa1ZI',
     31             'ext': 'mov',
     32             'title': 'The Amadeus Spectrometer',
     33             'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
     34             'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
     35         }
     36     }, {
     37         'url': 'http://www.screencast.com/t/aAB3iowa',
     38         'md5': 'dedb2734ed00c9755761ccaee88527cd',
     39         'info_dict': {
     40             'id': 'aAB3iowa',
     41             'ext': 'mp4',
     42             'title': 'Google Earth Export',
     43             'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
     44             'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
     45         }
     46     }, {
     47         'url': 'http://www.screencast.com/t/X3ddTrYh',
     48         'md5': '669ee55ff9c51988b4ebc0877cc8b159',
     49         'info_dict': {
     50             'id': 'X3ddTrYh',
     51             'ext': 'wmv',
     52             'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
     53             'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
     54             'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
     55         }
     56     }, {
     57         'url': 'http://screencast.com/t/aAB3iowa',
     58         'only_matching': True,
     59     }]
     60 
     61     def _real_extract(self, url):
     62         video_id = self._match_id(url)
     63         webpage = self._download_webpage(url, video_id)
     64 
     65         video_url = self._html_search_regex(
     66             r'<embed name="Video".*?src="([^"]+)"', webpage,
     67             'QuickTime embed', default=None)
     68 
     69         if video_url is None:
     70             flash_vars_s = self._html_search_regex(
     71                 r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
     72                 default=None)
     73             if not flash_vars_s:
     74                 flash_vars_s = self._html_search_regex(
     75                     r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
     76                     default=None)
     77                 if flash_vars_s:
     78                     flash_vars_s = flash_vars_s.replace(',', '&')
     79             if flash_vars_s:
     80                 flash_vars = compat_parse_qs(flash_vars_s)
     81                 video_url_raw = compat_urllib_request.quote(
     82                     flash_vars['content'][0])
     83                 video_url = video_url_raw.replace('http%3A', 'http:')
     84 
     85         if video_url is None:
     86             video_meta = self._html_search_meta(
     87                 'og:video', webpage, default=None)
     88             if video_meta:
     89                 video_url = self._search_regex(
     90                     r'src=(.*?)(?:$|&)', video_meta,
     91                     'meta tag video URL', default=None)
     92 
     93         if video_url is None:
     94             video_url = self._html_search_regex(
     95                 r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
     96                 webpage, 'video url', default=None, group='url')
     97 
     98         if video_url is None:
     99             video_url = self._html_search_meta(
    100                 'og:video', webpage, default=None)
    101 
    102         if video_url is None:
    103             raise ExtractorError('Cannot find video')
    104 
    105         title = self._og_search_title(webpage, default=None)
    106         if title is None:
    107             title = self._html_search_regex(
    108                 [r'<b>Title:</b> ([^<]+)</div>',
    109                  r'class="tabSeperator">></span><span class="tabText">(.+?)<',
    110                  r'<title>([^<]+)</title>'],
    111                 webpage, 'title')
    112         thumbnail = self._og_search_thumbnail(webpage)
    113         description = self._og_search_description(webpage, default=None)
    114         if description is None:
    115             description = self._html_search_meta('description', webpage)
    116 
    117         return {
    118             'id': video_id,
    119             'url': video_url,
    120             'title': title,
    121             'description': description,
    122             'thumbnail': thumbnail,
    123         }