youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

scte.py (4993B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..utils import (
      7     decode_packed_codes,
      8     ExtractorError,
      9     urlencode_postdata,
     10 )
     11 
     12 
     13 class SCTEBaseIE(InfoExtractor):
     14     _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
     15     _NETRC_MACHINE = 'scte'
     16 
     17     def _real_initialize(self):
     18         self._login()
     19 
     20     def _login(self):
     21         username, password = self._get_login_info()
     22         if username is None:
     23             return
     24 
     25         login_popup = self._download_webpage(
     26             self._LOGIN_URL, None, 'Downloading login popup')
     27 
     28         def is_logged(webpage):
     29             return any(re.search(p, webpage) for p in (
     30                 r'class=["\']welcome\b', r'>Sign Out<'))
     31 
     32         # already logged in
     33         if is_logged(login_popup):
     34             return
     35 
     36         login_form = self._hidden_inputs(login_popup)
     37 
     38         login_form.update({
     39             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
     40             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
     41             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
     42         })
     43 
     44         response = self._download_webpage(
     45             self._LOGIN_URL, None, 'Logging in',
     46             data=urlencode_postdata(login_form))
     47 
     48         if '|pageRedirect|' not in response and not is_logged(response):
     49             error = self._html_search_regex(
     50                 r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
     51                 response, 'error message', default=None)
     52             if error:
     53                 raise ExtractorError('Unable to login: %s' % error, expected=True)
     54             raise ExtractorError('Unable to log in')
     55 
     56 
     57 class SCTEIE(SCTEBaseIE):
     58     _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
     59     _TESTS = [{
     60         'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
     61         'info_dict': {
     62             'title': 'Introduction to DOCSIS Engineering Professional',
     63             'id': '31484',
     64         },
     65         'playlist_count': 5,
     66         'skip': 'Requires account credentials',
     67     }]
     68 
     69     def _real_extract(self, url):
     70         video_id = self._match_id(url)
     71 
     72         webpage = self._download_webpage(url, video_id)
     73 
     74         title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
     75 
     76         context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
     77         content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
     78         context = decode_packed_codes(self._download_webpage(
     79             '%smobile/data.js' % content_base, video_id))
     80 
     81         data = self._parse_xml(
     82             self._search_regex(
     83                 r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
     84             video_id)
     85 
     86         entries = []
     87         for asset in data.findall('.//asset'):
     88             asset_url = asset.get('url')
     89             if not asset_url or not asset_url.endswith('.mp4'):
     90                 continue
     91             asset_id = self._search_regex(
     92                 r'video_([^_]+)_', asset_url, 'asset id', default=None)
     93             if not asset_id:
     94                 continue
     95             entries.append({
     96                 'id': asset_id,
     97                 'title': title,
     98                 'url': content_base + asset_url,
     99             })
    100 
    101         return self.playlist_result(entries, video_id, title)
    102 
    103 
    104 class SCTECourseIE(SCTEBaseIE):
    105     _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
    106     _TESTS = [{
    107         'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
    108         'only_matching': True,
    109     }, {
    110         'url': 'https://learning.scte.org/course/view.php?id=3639',
    111         'only_matching': True,
    112     }, {
    113         'url': 'https://learning.scte.org/course/view.php?id=3073',
    114         'only_matching': True,
    115     }]
    116 
    117     def _real_extract(self, url):
    118         course_id = self._match_id(url)
    119 
    120         webpage = self._download_webpage(url, course_id)
    121 
    122         title = self._search_regex(
    123             r'<h1>(.+?)</h1>', webpage, 'title', default=None)
    124 
    125         entries = []
    126         for mobj in re.finditer(
    127                 r'''(?x)
    128                     <a[^>]+
    129                         href=(["\'])
    130                         (?P<url>
    131                             https?://learning\.scte\.org/mod/
    132                             (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
    133                             \bid=\d+
    134                         )
    135                     ''',
    136                 webpage):
    137             item_url = mobj.group('url')
    138             if item_url == url:
    139                 continue
    140             ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
    141                   else SCTECourseIE.ie_key())
    142             entries.append(self.url_result(item_url, ie=ie))
    143 
    144         return self.playlist_result(entries, course_id, title)