From dd5bcdc4c9cad0ee7e2d61343129ee1111048189 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 7 Nov 2013 21:06:48 +0100 Subject: [PATCH] [brightcove] Set the 'Referer' header if the url has the 'linkBaseUrl' parameter (fixes #1553) --- youtube_dl/extractor/brightcove.py | 17 +++++++++++++---- youtube_dl/extractor/generic.py | 12 +++++++----- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 0e60271f1..d8c35465a 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -10,10 +10,12 @@ from ..utils import ( find_xpath_attr, compat_urlparse, compat_str, + compat_urllib_request, ExtractorError, ) + class BrightcoveIE(InfoExtractor): _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P.*)' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' @@ -80,6 +82,9 @@ class BrightcoveIE(InfoExtractor): videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer') if videoPlayer is not None: params['@videoPlayer'] = videoPlayer.attrib['value'] + linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL') + if linkBase is not None: + params['linkBaseURL'] = linkBase.attrib['value'] data = compat_urllib_parse.urlencode(params) return cls._FEDERATED_URL_TEMPLATE % data @@ -107,14 +112,18 @@ class BrightcoveIE(InfoExtractor): videoPlayer = query.get('@videoPlayer') if videoPlayer: - return self._get_video_info(videoPlayer[0], query_str) + return self._get_video_info(videoPlayer[0], query_str, query) else: player_key = query['playerKey'] return self._get_playlist_info(player_key[0]) - def _get_video_info(self, video_id, query): - request_url = self._FEDERATED_URL_TEMPLATE % query - webpage = self._download_webpage(request_url, video_id) + def _get_video_info(self, video_id, query_str, query): + request_url = self._FEDERATED_URL_TEMPLATE % query_str + req = compat_urllib_request.Request(request_url) + linkBase = query.get('linkBaseURL') + if linkBase is not None: + req.add_header('Referer', linkBase[0]) + webpage = self._download_webpage(req, video_id) self.report_extraction(video_id) info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 04b7212f4..c7552fddb 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -55,15 +55,17 @@ class GenericIE(InfoExtractor): u'skip': u'There is a limit of 200 free downloads / month for the test song', }, # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' in the + # http requests { u'add_ie': ['Brightcove'], - u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics', + u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', u'info_dict': { - u'id': u'2365799484001', + u'id': u'2765128793001', u'ext': u'mp4', - u'title': u'Bubble Simulation', - u'description': u'A visualization from a new computer model of foam behavior.', - u'uploader': u'Scientific American', + u'title': u'Le cours de bourse : l’analyse technique', + u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9', + u'uploader': u'BFM BUSINESS', }, u'params': { u'skip_download': True, -- 2.22.2