youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 9766538124384b75c6b6cdfd8cb03ddce30136dc
parent 98dbee86815cad4d1fe1befab328033a16c56df6
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Sat,  8 Feb 2014 19:20:11 +0100

[jadorecettepub] Add extractor (Fixes #2148)

Diffstat:
Mtest/test_all_urls.py | 2+-
Mtest/test_youtube_lists.py | 10+++++-----
Myoutube_dl/extractor/__init__.py | 1+
Ayoutube_dl/extractor/jadorecettepub.py | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Myoutube_dl/extractor/youtube.py | 7++++---
5 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/test/test_all_urls.py b/test/test_all_urls.py @@ -85,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) def test_youtube_extract(self): - assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id) + assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py @@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase): result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'ytdl test PL') - ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] + ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) def test_youtube_playlist_noplaylist(self): @@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase): ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') - self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg') + self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') def test_issue_673(self): dl = FakeYDL() @@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') - ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] + ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] self.assertFalse('pElCt5oNDuI' in ytie_results) self.assertFalse('KdPEApIVdWM' in ytie_results) @@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase): # TODO find a > 100 (paginating?) videos course result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') entries = result['entries'] - self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') + self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(len(entries), 25) - self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0') + self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') def test_youtube_channel(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -105,6 +105,7 @@ from .ivi import ( IviIE, IviCompilationIE ) +from .jadorecettepub import JadoreCettePubIE from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE diff --git a/youtube_dl/extractor/jadorecettepub.py b/youtube_dl/extractor/jadorecettepub.py @@ -0,0 +1,49 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from .youtube import YoutubeIE + + +class JadoreCettePubIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html' + + _TEST = { + 'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html', + 'md5': '401286a06067c70b44076044b66515de', + 'info_dict': { + 'id': 'jLMja3tr7a4', + 'ext': 'mp4', + 'title': 'La pire utilisation de Star Wars', + 'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...", + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'<span style="font-size: x-large;"><b>(.*?)</b></span>', + webpage, 'title') + description = self._html_search_regex( + r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description', + fatal=False) + real_url = self._search_regex( + r'\[/postlink\](.*)endofvid', webpage, 'video URL') + video_id = YoutubeIE.extract_id(real_url) + + return { + '_type': 'url_transparent', + 'url': real_url, + 'id': video_id, + 'title': title, + 'description': description, + } + diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -1085,8 +1085,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self._downloader.report_warning(err_msg) return {} - def _extract_id(self, url): - mobj = re.match(self._VALID_URL, url, re.VERBOSE) + @classmethod + def extract_id(cls, url): + mobj = re.match(cls._VALID_URL, url, re.VERBOSE) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group(2) @@ -1115,7 +1116,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): mobj = re.search(self._NEXT_URL_RE, url) if mobj: url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') - video_id = self._extract_id(url) + video_id = self.extract_id(url) # Get video webpage url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id