youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 9e55e37a2e0e3a4e7d3fac2efd4ea13efe689b0e
parent a4ff6c4762fd01b606d54a2962530c753d4c52ec
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Sun,  9 Mar 2014 18:08:16 +0100

Merge remote-tracking branch 'origin/master'

Diffstat:
Atest/test_InfoExtractor.py | 44++++++++++++++++++++++++++++++++++++++++++++
Mtest/test_playlists.py | 2+-
Myoutube_dl/YoutubeDL.py | 13++++++++-----
Myoutube_dl/extractor/collegehumor.py | 14+++++++-------
Myoutube_dl/extractor/gamekings.py | 13++++++++-----
Myoutube_dl/extractor/mtv.py | 39+++++++++++++++++++++++++++++++++++----
Myoutube_dl/extractor/soundcloud.py | 1+
Myoutube_dl/extractor/spike.py | 15++++++++++++++-
Myoutube_dl/extractor/videodetective.py | 22+++++++++++-----------
Myoutube_dl/extractor/vube.py | 5++---
10 files changed, 131 insertions(+), 37 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL +from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.extractor import YoutubeIE, get_info_extractor + + +class TestIE(InfoExtractor): + pass + + +class TestInfoExtractor(unittest.TestCase): + def setUp(self): + self.ie = TestIE(FakeYDL()) + + def test_ie_key(self): + self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) + + def test_html_search_regex(self): + html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' + search = lambda re, *args: self.ie._html_search_regex(re, html, *args) + self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video') + + def test_opengraph(self): + ie = self.ie + html = ''' + <meta name="og:title" content='Foo'/> + <meta content="Some video's description " name="og:description"/> + <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/> + ''' + self.assertEqual(ie._og_search_title(html), 'Foo') + self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') + self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_playlists.py b/test/test_playlists.py @@ -99,7 +99,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') self.assertIsPlaylist(result) self.assertEqual(result['id'], '5124905') - self.assertTrue(len(result['entries']) >= 11) + self.assertTrue(len(result['entries']) >= 6) def test_soundcloud_set(self): dl = FakeYDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py @@ -370,12 +370,15 @@ class YoutubeDL(object): Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' - if self._err_file.isatty() and os.name != 'nt': - _msg_header = '\033[0;33mWARNING:\033[0m' + if self.params.get('logger') is not None: + self.params['logger'].warning(message) else: - _msg_header = 'WARNING:' - warning_message = '%s %s' % (_msg_header, message) - self.to_stderr(warning_message) + if self._err_file.isatty() and os.name != 'nt': + _msg_header = '\033[0;33mWARNING:\033[0m' + else: + _msg_header = 'WARNING:' + warning_message = '%s %s' % (_msg_header, message) + self.to_stderr(warning_message) def report_error(self, message, tb=None): ''' diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py @@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor): }, # embedded youtube video { - 'url': 'http://www.collegehumor.com/embed/6950457', + 'url': 'http://www.collegehumor.com/embed/6950306', 'info_dict': { - 'id': 'W5gMp3ZjYg4', + 'id': 'Z-bao9fg6Yc', 'ext': 'mp4', - 'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', - 'uploader': 'FunnyPlox TV', - 'uploader_id': 'funnyploxtv', - 'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', - 'upload_date': '20140128', + 'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', + 'uploader': 'Mark Dice', + 'uploader_id': 'MarkDice', + 'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', + 'upload_date': '20140127', }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -6,13 +8,14 @@ from .common import InfoExtractor class GamekingsIE(InfoExtractor): _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' _TEST = { - u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", - u'file': u'20130811.mp4', + 'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', # MD5 is flaky, seems to change regularly - #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', + # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', u'info_dict': { - u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", - u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", + 'id': '20130811', + 'ext': 'mp4', + 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', + 'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', } } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py @@ -5,9 +5,12 @@ import re from .common import InfoExtractor from ..utils import ( compat_urllib_parse, + compat_urllib_request, ExtractorError, find_xpath_attr, fix_xml_ampersands, + HEADRequest, + unescapeHTML, url_basename, RegexNotFoundError, ) @@ -18,6 +21,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): + _MOBILE_TEMPLATE = None @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor): else: return thumb_node.attrib['url'] - def _extract_video_formats(self, mdoc): - if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None: - raise ExtractorError('This video is not available from your country.', expected=True) + def _extract_mobile_video_formats(self, mtvn_id): + webpage_url = self._MOBILE_TEMPLATE % mtvn_id + req = compat_urllib_request.Request(webpage_url) + # Otherwise we get a webpage that would execute some javascript + req.add_header('Youtubedl-user-agent', 'curl/7') + webpage = self._download_webpage(req, mtvn_id, + 'Downloading mobile page') + metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) + req = HEADRequest(metrics_url) + response = self._request_webpage(req, mtvn_id, 'Resolving url') + url = response.geturl() + # Transform the url to get the best quality: + url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1) + return [{'url': url,'ext': 'mp4'}] + + def _extract_video_formats(self, mdoc, mtvn_id): + if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: + if mtvn_id is not None and self._MOBILE_TEMPLATE is not None: + self.to_screen('The normal version is not available from your ' + 'country, trying with the mobile version') + return self._extract_mobile_video_formats(mtvn_id) + raise ExtractorError('This video is not available from your country.', + expected=True) formats = [] for rendition in mdoc.findall('.//rendition'): @@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor): raise ExtractorError('Could not find video title') title = title.strip() + # This a short id that's used in the webpage urls + mtvn_id = None + mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', + 'scheme', 'urn:mtvn:id') + if mtvn_id_node is not None: + mtvn_id = mtvn_id_node.text + return { 'title': title, - 'formats': self._extract_video_formats(mediagen_doc), + 'formats': self._extract_video_formats(mediagen_doc, mtvn_id), 'id': video_id, 'thumbnail': self._get_thumbnail_url(uri, itemdoc), 'description': description, diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py @@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor): 'id': '47127627', 'ext': 'mp3', 'title': 'Goldrushed', + 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', 'upload_date': '20120521', }, diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py @@ -1,10 +1,15 @@ from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor class SpikeIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+' + _VALID_URL = r'''(?x)https?:// + (www\.spike\.com/(video-clips|episodes)/.+| + m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+)) + ''' _TEST = { 'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', 'md5': '1a9265f32b0c375793d6c4ce45255256', @@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor): } _FEED_URL = 'http://www.spike.com/feeds/mrss/' + _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + mobile_id = mobj.group('mobile_id') + if mobile_id is not None: + url = 'http://www.spike.com/video-clips/%s' % mobile_id + return super(SpikeIE, self)._real_extract(url) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py @@ -1,22 +1,23 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE -from ..utils import ( - compat_urlparse, -) +from ..utils import compat_urlparse class VideoDetectiveIE(InfoExtractor): _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' _TEST = { - u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', - u'file': u'194487.mp4', - u'info_dict': { - u'title': u'KICK-ASS 2', - u'description': u'md5:65ba37ad619165afac7d432eaded6013', - u'duration': 135, + 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', + 'info_dict': { + 'id': '194487', + 'ext': 'mp4', + 'title': 'KICK-ASS 2', + 'description': 'md5:65ba37ad619165afac7d432eaded6013', + 'duration': 135, }, } @@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage) query = compat_urlparse.urlparse(og_video).query - return self.url_result(InternetVideoArchiveIE._build_url(query), - ie=InternetVideoArchiveIE.ie_key()) + return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key()) diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py @@ -13,7 +13,7 @@ class VubeIE(InfoExtractor): _TEST = { 'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', - 'md5': 'f81dcf6d0448e3291f54380181695821', + 'md5': 'db7aba89d4603dadd627e9d1973946fe', 'info_dict': { 'id': 'YL2qNPkqon', 'ext': 'mp4', @@ -77,4 +77,4 @@ class VubeIE(InfoExtractor): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, - }- \ No newline at end of file + }