youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 3aec71766da38478740437c901514e666a39dbb0
parent 782b1b5bd1cdaaead6865dee5d300486e7dd8348
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 19 Mar 2016 22:30:48 +0600

[safari:api] Separate extractor (Closes #8871)

Diffstat:
Myoutube_dl/extractor/__init__.py | 1+
Myoutube_dl/extractor/safari.py | 54++++++++++++++++++++++++++++++++----------------------
2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -628,6 +628,7 @@ from .ruutu import RuutuIE from .sandia import SandiaIE from .safari import ( SafariIE, + SafariApiIE, SafariCourseIE, ) from .sapo import SapoIE diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py @@ -75,16 +75,7 @@ class SafariBaseIE(InfoExtractor): class SafariIE(SafariBaseIE): IE_NAME = 'safari' IE_DESC = 'safaribooksonline.com online video' - _VALID_URL = r'''(?x)https?:// - (?:www\.)?safaribooksonline\.com/ - (?: - library/view/[^/]+| - api/v1/book - )/ - (?P<course_id>[^/]+)/ - (?:chapter(?:-content)?/)? - (?P<part>part\d+)\.html - ''' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', @@ -98,9 +89,6 @@ class SafariIE(SafariBaseIE): 'uploader_id': 'stork', }, }, { - 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', - 'only_matching': True, - }, { # non-digits in course id 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 'only_matching': True, @@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - course_id = mobj.group('course_id') - part = mobj.group('part') - - webpage = self._download_webpage(url, '%s/%s' % (course_id, part)) - reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id') - partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id') - ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id') + video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) + + webpage = self._download_webpage(url, video_id) + reference_id = self._search_regex( + r'data-reference-id=(["\'])(?P<id>.+?)\1', + webpage, 'kaltura reference id', group='id') + partner_id = self._search_regex( + r'data-partner-id=(["\'])(?P<id>.+?)\1', + webpage, 'kaltura widget id', group='id') + ui_id = self._search_regex( + r'data-ui-id=(["\'])(?P<id>.+?)\1', + webpage, 'kaltura uiconf id', group='id') query = { 'wid': '_%s' % partner_id, @@ -125,7 +118,7 @@ class SafariIE(SafariBaseIE): if self.LOGGED_IN: kaltura_session = self._download_json( '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), - course_id, 'Downloading kaltura session JSON', + video_id, 'Downloading kaltura session JSON', 'Unable to download kaltura session JSON', fatal=False) if kaltura_session: session = kaltura_session.get('session') @@ -137,6 +130,23 @@ class SafariIE(SafariBaseIE): 'Kaltura') +class SafariApiIE(SafariBaseIE): + IE_NAME = 'safari:api' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html' + + _TEST = { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', + 'only_matching': True, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + part = self._download_json( + url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), + 'Downloading part JSON') + return self.url_result(part['web_url'], SafariIE.ie_key()) + + class SafariCourseIE(SafariBaseIE): IE_NAME = 'safari:course' IE_DESC = 'safaribooksonline.com online courses' @@ -168,7 +178,7 @@ class SafariCourseIE(SafariBaseIE): 'No chapters found for course %s' % course_id, expected=True) entries = [ - self.url_result(chapter, 'Safari') + self.url_result(chapter, SafariApiIE.ie_key()) for chapter in course_json['chapters']] course_title = course_json['title']