youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

camdemy.py (5772B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import (
      8     compat_urllib_parse_urlencode,
      9     compat_urlparse,
     10 )
     11 from ..utils import (
     12     clean_html,
     13     parse_duration,
     14     str_to_int,
     15     unified_strdate,
     16 )
     17 
     18 
     19 class CamdemyIE(InfoExtractor):
     20     _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
     21     _TESTS = [{
     22         # single file
     23         'url': 'http://www.camdemy.com/media/5181/',
     24         'md5': '5a5562b6a98b37873119102e052e311b',
     25         'info_dict': {
     26             'id': '5181',
     27             'ext': 'mp4',
     28             'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
     29             'thumbnail': r're:^https?://.*\.jpg$',
     30             'creator': 'ss11spring',
     31             'duration': 1591,
     32             'upload_date': '20130114',
     33             'view_count': int,
     34         }
     35     }, {
     36         # With non-empty description
     37         # webpage returns "No permission or not login"
     38         'url': 'http://www.camdemy.com/media/13885',
     39         'md5': '4576a3bb2581f86c61044822adbd1249',
     40         'info_dict': {
     41             'id': '13885',
     42             'ext': 'mp4',
     43             'title': 'EverCam + Camdemy QuickStart',
     44             'thumbnail': r're:^https?://.*\.jpg$',
     45             'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
     46             'creator': 'evercam',
     47             'duration': 318,
     48         }
     49     }, {
     50         # External source (YouTube)
     51         'url': 'http://www.camdemy.com/media/14842',
     52         'info_dict': {
     53             'id': '2vsYQzNIsJo',
     54             'ext': 'mp4',
     55             'title': 'Excel 2013 Tutorial - How to add Password Protection',
     56             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
     57             'upload_date': '20130211',
     58             'uploader': 'Hun Kim',
     59             'uploader_id': 'hunkimtutorials',
     60         },
     61         'params': {
     62             'skip_download': True,
     63         },
     64     }]
     65 
     66     def _real_extract(self, url):
     67         video_id = self._match_id(url)
     68 
     69         webpage = self._download_webpage(url, video_id)
     70 
     71         src_from = self._html_search_regex(
     72             r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
     73             webpage, 'external source', default=None, group='url')
     74         if src_from:
     75             return self.url_result(src_from)
     76 
     77         oembed_obj = self._download_json(
     78             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
     79 
     80         title = oembed_obj['title']
     81         thumb_url = oembed_obj['thumbnail_url']
     82         video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
     83         file_list_doc = self._download_xml(
     84             compat_urlparse.urljoin(video_folder, 'fileList.xml'),
     85             video_id, 'Downloading filelist XML')
     86         file_name = file_list_doc.find('./video/item/fileName').text
     87         video_url = compat_urlparse.urljoin(video_folder, file_name)
     88 
     89         # Some URLs return "No permission or not login" in a webpage despite being
     90         # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
     91         upload_date = unified_strdate(self._search_regex(
     92             r'>published on ([^<]+)<', webpage,
     93             'upload date', default=None))
     94         view_count = str_to_int(self._search_regex(
     95             r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
     96             webpage, 'view count', default=None))
     97         description = self._html_search_meta(
     98             'description', webpage, default=None) or clean_html(
     99             oembed_obj.get('description'))
    100 
    101         return {
    102             'id': video_id,
    103             'url': video_url,
    104             'title': title,
    105             'thumbnail': thumb_url,
    106             'description': description,
    107             'creator': oembed_obj.get('author_name'),
    108             'duration': parse_duration(oembed_obj.get('duration')),
    109             'upload_date': upload_date,
    110             'view_count': view_count,
    111         }
    112 
    113 
    114 class CamdemyFolderIE(InfoExtractor):
    115     _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
    116     _TESTS = [{
    117         # links with trailing slash
    118         'url': 'http://www.camdemy.com/folder/450',
    119         'info_dict': {
    120             'id': '450',
    121             'title': '信號與系統 2012 & 2011 (Signals and Systems)',
    122         },
    123         'playlist_mincount': 145
    124     }, {
    125         # links without trailing slash
    126         # and multi-page
    127         'url': 'http://www.camdemy.com/folder/853',
    128         'info_dict': {
    129             'id': '853',
    130             'title': '科學計算 - 使用 Matlab'
    131         },
    132         'playlist_mincount': 20
    133     }, {
    134         # with displayMode parameter. For testing the codes to add parameters
    135         'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
    136         'info_dict': {
    137             'id': '853',
    138             'title': '科學計算 - 使用 Matlab'
    139         },
    140         'playlist_mincount': 20
    141     }]
    142 
    143     def _real_extract(self, url):
    144         folder_id = self._match_id(url)
    145 
    146         # Add displayMode=list so that all links are displayed in a single page
    147         parsed_url = list(compat_urlparse.urlparse(url))
    148         query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
    149         query.update({'displayMode': 'list'})
    150         parsed_url[4] = compat_urllib_parse_urlencode(query)
    151         final_url = compat_urlparse.urlunparse(parsed_url)
    152 
    153         page = self._download_webpage(final_url, folder_id)
    154         matches = re.findall(r"href='(/media/\d+/?)'", page)
    155 
    156         entries = [self.url_result('http://www.camdemy.com' + media_path)
    157                    for media_path in matches]
    158 
    159         folder_title = self._html_search_meta('keywords', page)
    160 
    161         return self.playlist_result(entries, folder_id, folder_title)