youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 99877772d08285b1b2743427ddd20440f4f4ded2
parent b0268cb6ce16f54ef23901c860cba6be1e16cf37
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Mon,  3 Feb 2014 15:19:40 +0100

[generic] Add support for multiple brightcove URLs (Fixes #2283)

Diffstat:
Mtest/test_playlists.py | 11+++++++++++
Myoutube_dl/extractor/brightcove.py | 19+++++++++++--------
Myoutube_dl/extractor/generic.py | 18++++++++++++++----
3 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/test/test_playlists.py b/test/test_playlists.py @@ -34,6 +34,7 @@ from youtube_dl.extractor import ( KhanAcademyIE, EveryonesMixtapeIE, RutubeChannelIE, + GenericIE, ) @@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '1409') self.assertTrue(len(result['entries']) >= 34) + def test_multiple_brightcove_videos(self): + # https://github.com/rg3/youtube-dl/issues/2283 + dl = FakeYDL() + ie = GenericIE(dl) + result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'always-never-nuclear-command-and-control') + self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker') + self.assertEqual(len(result['entries']), 3) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py @@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor): @classmethod def _extract_brightcove_url(cls, webpage): - """Try to extract the brightcove url from the wepbage, returns None + """Try to extract the brightcove url from the webpage, returns None if it can't be found """ + urls = cls._extract_brightcove_urls(webpage) + return urls[0] if urls else None + + @classmethod + def _extract_brightcove_urls(cls, webpage): + """Return a list of all Brightcove URLs from the webpage """ url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) if url_m: - return url_m.group(1) + return [url_m.group(1)] - m_brightcove = re.search( + matches = re.findall( r'''(?sx)<object (?: - [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | + [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ ).+?</object>''', webpage) - if m_brightcove is not None: - return cls._build_brighcove_url(m_brightcove.group()) - else: - return None + return [cls._build_brighcove_url(m) for m in matches] def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -234,11 +234,21 @@ class GenericIE(InfoExtractor): r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') # Look for BrightCove: - bc_url = BrightcoveIE._extract_brightcove_url(webpage) - if bc_url is not None: + bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) + if bc_urls: self.to_screen('Brightcove video detected.') - surl = smuggle_url(bc_url, {'Referer': url}) - return self.url_result(surl, 'Brightcove') + entries = [{ + '_type': 'url', + 'url': smuggle_url(bc_url, {'Referer': url}), + 'ie_key': 'Brightcove' + } for bc_url in bc_urls] + + return { + '_type': 'playlist', + 'title': video_title, + 'id': video_id, + 'entries': entries, + } # Look for embedded (iframe) Vimeo player mobj = re.search(