khanacademy.py (3919B)
1 from __future__ import unicode_literals 2 3 import json 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 int_or_none, 8 parse_iso8601, 9 try_get, 10 ) 11 12 13 class KhanAcademyBaseIE(InfoExtractor): 14 _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' 15 16 def _parse_video(self, video): 17 return { 18 '_type': 'url_transparent', 19 'url': video['youtubeId'], 20 'id': video.get('slug'), 21 'title': video.get('title'), 22 'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'), 23 'duration': int_or_none(video.get('duration')), 24 'description': video.get('description'), 25 'ie_key': 'Youtube', 26 } 27 28 def _real_extract(self, url): 29 display_id = self._match_id(url) 30 component_props = self._parse_json(self._download_json( 31 'https://www.khanacademy.org/api/internal/graphql', 32 display_id, query={ 33 'hash': 1604303425, 34 'variables': json.dumps({ 35 'path': display_id, 36 'queryParams': '', 37 }), 38 })['data']['contentJson'], display_id)['componentProps'] 39 return self._parse_component_props(component_props) 40 41 42 class KhanAcademyIE(KhanAcademyBaseIE): 43 IE_NAME = 'khanacademy' 44 _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/') 45 _TEST = { 46 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad', 47 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0', 48 'info_dict': { 49 'id': 'FlIG3TvQCBQ', 50 'ext': 'mp4', 51 'title': 'The one-time pad', 52 'description': 'The perfect cipher', 53 'duration': 176, 54 'uploader': 'Brit Cruise', 55 'uploader_id': 'khanacademy', 56 'upload_date': '20120411', 57 'timestamp': 1334170113, 58 'license': 'cc-by-nc-sa', 59 }, 60 'add_ie': ['Youtube'], 61 } 62 63 def _parse_component_props(self, component_props): 64 video = component_props['tutorialPageData']['contentModel'] 65 info = self._parse_video(video) 66 author_names = video.get('authorNames') 67 info.update({ 68 'uploader': ', '.join(author_names) if author_names else None, 69 'timestamp': parse_iso8601(video.get('dateAdded')), 70 'license': video.get('kaUserLicense'), 71 }) 72 return info 73 74 75 class KhanAcademyUnitIE(KhanAcademyBaseIE): 76 IE_NAME = 'khanacademy:unit' 77 _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)' 78 _TEST = { 79 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography', 80 'info_dict': { 81 'id': 'cryptography', 82 'title': 'Cryptography', 83 'description': 'How have humans protected their secret messages through history? What has changed today?', 84 }, 85 'playlist_mincount': 31, 86 } 87 88 def _parse_component_props(self, component_props): 89 curation = component_props['curation'] 90 91 entries = [] 92 tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or [] 93 for tutorial_number, tutorial in enumerate(tutorials, 1): 94 chapter_info = { 95 'chapter': tutorial.get('title'), 96 'chapter_number': tutorial_number, 97 'chapter_id': tutorial.get('id'), 98 } 99 for content_item in (tutorial.get('contentItems') or []): 100 if content_item.get('kind') == 'Video': 101 info = self._parse_video(content_item) 102 info.update(chapter_info) 103 entries.append(info) 104 105 return self.playlist_result( 106 entries, curation.get('unit'), curation.get('title'), 107 curation.get('description'))