youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

teachertube.py (4417B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     determine_ext,
      9     ExtractorError,
     10     qualities,
     11 )
     12 
     13 
     14 class TeacherTubeIE(InfoExtractor):
     15     IE_NAME = 'teachertube'
     16     IE_DESC = 'teachertube.com videos'
     17 
     18     _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
     19 
     20     _TESTS = [{
     21         # flowplayer
     22         'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
     23         'md5': 'f9434ef992fd65936d72999951ee254c',
     24         'info_dict': {
     25             'id': '339997',
     26             'ext': 'mp4',
     27             'title': 'Measures of dispersion from a frequency table',
     28             'description': 'Measures of dispersion from a frequency table',
     29             'thumbnail': r're:https?://.*\.(?:jpg|png)',
     30         },
     31     }, {
     32         # jwplayer
     33         'url': 'http://www.teachertube.com/music.php?music_id=8805',
     34         'md5': '01e8352006c65757caf7b961f6050e21',
     35         'info_dict': {
     36             'id': '8805',
     37             'ext': 'mp3',
     38             'title': 'PER ASPERA AD ASTRA',
     39             'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
     40         },
     41     }, {
     42         # unavailable video
     43         'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
     44         'only_matching': True,
     45     }]
     46 
     47     def _real_extract(self, url):
     48         video_id = self._match_id(url)
     49         webpage = self._download_webpage(url, video_id)
     50 
     51         error = self._search_regex(
     52             r'<div\b[^>]+\bclass=["\']msgBox error[^>]+>([^<]+)', webpage,
     53             'error', default=None)
     54         if error:
     55             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
     56 
     57         title = self._html_search_meta('title', webpage, 'title', fatal=True)
     58         TITLE_SUFFIX = ' - TeacherTube'
     59         if title.endswith(TITLE_SUFFIX):
     60             title = title[:-len(TITLE_SUFFIX)].strip()
     61 
     62         description = self._html_search_meta('description', webpage, 'description')
     63         if description:
     64             description = description.strip()
     65 
     66         quality = qualities(['mp3', 'flv', 'mp4'])
     67 
     68         media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
     69         media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
     70         media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))
     71 
     72         formats = [
     73             {
     74                 'url': media_url,
     75                 'quality': quality(determine_ext(media_url))
     76             } for media_url in set(media_urls)
     77         ]
     78 
     79         self._sort_formats(formats)
     80 
     81         thumbnail = self._og_search_thumbnail(
     82             webpage, default=None) or self._html_search_meta(
     83             'thumbnail', webpage)
     84 
     85         return {
     86             'id': video_id,
     87             'title': title,
     88             'description': description,
     89             'thumbnail': thumbnail,
     90             'formats': formats,
     91         }
     92 
     93 
     94 class TeacherTubeUserIE(InfoExtractor):
     95     IE_NAME = 'teachertube:user:collection'
     96     IE_DESC = 'teachertube.com user and collection videos'
     97 
     98     _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
     99 
    100     _MEDIA_RE = r'''(?sx)
    101         class="?sidebar_thumb_time"?>[0-9:]+</div>
    102         \s*
    103         <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
    104     '''
    105     _TEST = {
    106         'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
    107         'info_dict': {
    108             'id': 'rbhagwati2'
    109         },
    110         'playlist_mincount': 179,
    111     }
    112 
    113     def _real_extract(self, url):
    114         mobj = re.match(self._VALID_URL, url)
    115         user_id = mobj.group('user')
    116 
    117         urls = []
    118         webpage = self._download_webpage(url, user_id)
    119         urls.extend(re.findall(self._MEDIA_RE, webpage))
    120 
    121         pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
    122         for p in pages:
    123             more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
    124             webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
    125             video_urls = re.findall(self._MEDIA_RE, webpage)
    126             urls.extend(video_urls)
    127 
    128         entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
    129         return self.playlist_result(entries, user_id)