youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 7913e0fca7df6840e8434449f534f9744c9394f2
parent cdd1ce92c46167c12514a061609308112e79ea84
Author: mrBliss <dewinant@gmail.com>
Date:   Fri, 20 Oct 2017 14:41:07 +0200

[canvas] Add support for vrt.be/vrtnu (closes #11873)

Diffstat:
Myoutube_dl/extractor/canvas.py | 142+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Myoutube_dl/extractor/extractors.py | 1+
Ayoutube_dl/extractor/gigya.py | 22++++++++++++++++++++++
Myoutube_dl/extractor/medialaan.py | 17++++-------------
4 files changed, 167 insertions(+), 15 deletions(-)

diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py @@ -1,16 +1,24 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor +from .gigya import GigyaBaseIE + + +from ..compat import compat_HTTPError from ..utils import ( - float_or_none, + ExtractorError, strip_or_none, + float_or_none, + int_or_none, + parse_iso8601, ) class CanvasIE(InfoExtractor): - _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)' + _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'md5': '90139b746a0a9bd7bb631283f6e2a64e', @@ -166,3 +174,133 @@ class CanvasEenIE(InfoExtractor): 'title': title, 'description': self._og_search_description(webpage), } + + +class VrtNUIE(GigyaBaseIE): + IE_DESC = 'VrtNU.be' + _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', + 'info_dict': { + 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', + 'ext': 'flv', + 'title': 'De zwarte weduwe', + 'description': 'md5:d90c21dced7db869a85db89a623998d4', + 'duration': 1457.04, + 'thumbnail': r're:^https?://.*\.jpg$', + 'season': '1', + 'season_number': 1, + 'episode_number': 1, + }, + # 'skip': 'This video is only available for registered users' + }] + _NETRC_MACHINE = 'vrtnu' + _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' + _CONTEXT_ID = 'R3595707040' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + self.raise_login_required() + + auth_data = { + 'APIKey': self._APIKEY, + 'targetEnv': 'jssdk', + 'loginID': username, + 'password': password, + 'authMode': 'cookie', + } + + auth_info = self._gigya_login(auth_data) + + # Sometimes authentication fails for no good reason, retry + login_attempt = 1 + while login_attempt <= 3: + try: + # When requesting a token, no actual token is returned, but the + # necessary cookies are set. + self._request_webpage( + 'https://token.vrt.be', + None, note='Requesting a token', errnote='Could not get a token', + headers={ + 'Content-Type': 'application/json', + 'Referer': 'https://www.vrt.be/vrtnu/', + }, + data=json.dumps({ + 'uid': auth_info['UID'], + 'uidsig': auth_info['UIDSignature'], + 'ts': auth_info['signatureTimestamp'], + 'email': auth_info['profile']['email'], + }).encode('utf-8')) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + login_attempt += 1 + self.report_warning('Authentication failed') + self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') + else: + raise e + else: + break + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'(?ms)<h1 class="content__heading">(.+?)</h1>', + webpage, 'title').strip() + + description = self._html_search_regex( + r'(?ms)<div class="content__description">(.+?)</div>', + webpage, 'description', default=None) + + season = self._html_search_regex( + [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s* + <span>seizoen\ (.+?)</span>\s* + </div>''', + r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'], + webpage, 'season', default=None) + + season_number = int_or_none(season) + + episode_number = int_or_none(self._html_search_regex( + r'''(?xms)<div\ class="content__episode">\s* + <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span> + </div>''', + webpage, 'episode_number', default=None)) + + release_date = parse_iso8601(self._html_search_regex( + r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"', + webpage, 'release_date', default=None)) + + # If there's a ? or a # in the URL, remove them and everything after + clean_url = url.split('?')[0].split('#')[0].strip('/') + securevideo_url = clean_url + '.mssecurevideo.json' + + json = self._download_json(securevideo_url, display_id) + # We are dealing with a '../<show>.relevant' URL + redirect_url = json.get('url') + if redirect_url: + return self.url_result('https:' + redirect_url) + else: + # There is only one entry, but with an unknown key, so just get + # the first one + video_id = list(json.values())[0].get('videoid') + + return { + '_type': 'url_transparent', + 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, + 'ie_key': CanvasIE.ie_key(), + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'season': season, + 'season_number': season_number, + 'episode_number': episode_number, + 'release_date': release_date, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py @@ -150,6 +150,7 @@ from .canalc2 import Canalc2IE from .canvas import ( CanvasIE, CanvasEenIE, + VrtNUIE, ) from .carambatv import ( CarambaTVIE, diff --git a/youtube_dl/extractor/gigya.py b/youtube_dl/extractor/gigya.py @@ -0,0 +1,22 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + ExtractorError, + urlencode_postdata, +) + + +class GigyaBaseIE(InfoExtractor): + def _gigya_login(self, auth_data): + auth_info = self._download_json( + 'https://accounts.eu1.gigya.com/accounts.login', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(auth_data)) + + error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') + if error_message: + raise ExtractorError( + 'Unable to login: %s' % error_message, expected=True) + return auth_info diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py @@ -2,19 +2,18 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .gigya import GigyaBaseIE + from ..compat import compat_str from ..utils import ( - ExtractorError, int_or_none, parse_duration, try_get, unified_timestamp, - urlencode_postdata, ) -class MedialaanIE(InfoExtractor): +class MedialaanIE(GigyaBaseIE): _VALID_URL = r'''(?x) https?:// (?:www\.|nieuws\.)? @@ -119,15 +118,7 @@ class MedialaanIE(InfoExtractor): 'password': password, } - auth_info = self._download_json( - 'https://accounts.eu1.gigya.com/accounts.login', None, - note='Logging in', errnote='Unable to log in', - data=urlencode_postdata(auth_data)) - - error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') - if error_message: - raise ExtractorError( - 'Unable to login: %s' % error_message, expected=True) + auth_info = self._gigya_login(auth_data) self._uid = auth_info['UID'] self._uid_signature = auth_info['UIDSignature']