scte.py (4993B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 decode_packed_codes, 8 ExtractorError, 9 urlencode_postdata, 10 ) 11 12 13 class SCTEBaseIE(InfoExtractor): 14 _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' 15 _NETRC_MACHINE = 'scte' 16 17 def _real_initialize(self): 18 self._login() 19 20 def _login(self): 21 username, password = self._get_login_info() 22 if username is None: 23 return 24 25 login_popup = self._download_webpage( 26 self._LOGIN_URL, None, 'Downloading login popup') 27 28 def is_logged(webpage): 29 return any(re.search(p, webpage) for p in ( 30 r'class=["\']welcome\b', r'>Sign Out<')) 31 32 # already logged in 33 if is_logged(login_popup): 34 return 35 36 login_form = self._hidden_inputs(login_popup) 37 38 login_form.update({ 39 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, 40 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, 41 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', 42 }) 43 44 response = self._download_webpage( 45 self._LOGIN_URL, None, 'Logging in', 46 data=urlencode_postdata(login_form)) 47 48 if '|pageRedirect|' not in response and not is_logged(response): 49 error = self._html_search_regex( 50 r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</', 51 response, 'error message', default=None) 52 if error: 53 raise ExtractorError('Unable to login: %s' % error, expected=True) 54 raise ExtractorError('Unable to log in') 55 56 57 class SCTEIE(SCTEBaseIE): 58 _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)' 59 _TESTS = [{ 60 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', 61 'info_dict': { 62 'title': 'Introduction to DOCSIS Engineering Professional', 63 'id': '31484', 64 }, 65 'playlist_count': 5, 66 'skip': 'Requires account credentials', 67 }] 68 69 def _real_extract(self, url): 70 video_id = self._match_id(url) 71 72 webpage = self._download_webpage(url, video_id) 73 74 title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title') 75 76 context_id = self._search_regex(r'context-(\d+)', webpage, video_id) 77 content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id 78 context = decode_packed_codes(self._download_webpage( 79 '%smobile/data.js' % content_base, video_id)) 80 81 data = self._parse_xml( 82 self._search_regex( 83 r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), 84 video_id) 85 86 entries = [] 87 for asset in data.findall('.//asset'): 88 asset_url = asset.get('url') 89 if not asset_url or not asset_url.endswith('.mp4'): 90 continue 91 asset_id = self._search_regex( 92 r'video_([^_]+)_', asset_url, 'asset id', default=None) 93 if not asset_id: 94 continue 95 entries.append({ 96 'id': asset_id, 97 'title': title, 98 'url': content_base + asset_url, 99 }) 100 101 return self.playlist_result(entries, video_id, title) 102 103 104 class SCTECourseIE(SCTEBaseIE): 105 _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)' 106 _TESTS = [{ 107 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', 108 'only_matching': True, 109 }, { 110 'url': 'https://learning.scte.org/course/view.php?id=3639', 111 'only_matching': True, 112 }, { 113 'url': 'https://learning.scte.org/course/view.php?id=3073', 114 'only_matching': True, 115 }] 116 117 def _real_extract(self, url): 118 course_id = self._match_id(url) 119 120 webpage = self._download_webpage(url, course_id) 121 122 title = self._search_regex( 123 r'<h1>(.+?)</h1>', webpage, 'title', default=None) 124 125 entries = [] 126 for mobj in re.finditer( 127 r'''(?x) 128 <a[^>]+ 129 href=(["\']) 130 (?P<url> 131 https?://learning\.scte\.org/mod/ 132 (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*? 133 \bid=\d+ 134 ) 135 ''', 136 webpage): 137 item_url = mobj.group('url') 138 if item_url == url: 139 continue 140 ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' 141 else SCTECourseIE.ie_key()) 142 entries.append(self.url_result(item_url, ie=ie)) 143 144 return self.playlist_result(entries, course_id, title)