youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 9d4660cab15f374176f87d3f747a559142e4af9b
parent cd054fc491198a5a7c69d76f19693b1cd4d5c086
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Tue, 15 Oct 2013 12:05:13 +0200

[generic] Support embedded vimeo videos (#1602)

Diffstat:
Mtest/test_utils.py | 16++++++++++++++++
Myoutube_dl/extractor/generic.py | 21+++++++++++++++++++++
Myoutube_dl/extractor/vimeo.py | 11+++++++++--
Myoutube_dl/utils.py | 17+++++++++++++++++
4 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 # Allow direct execution import os @@ -21,6 +22,8 @@ from youtube_dl.utils import ( find_xpath_attr, get_meta_content, xpath_with_ns, + smuggle_url, + unsmuggle_url, ) if sys.version_info < (3, 0): @@ -155,5 +158,18 @@ class TestUtil(unittest.TestCase): self.assertEqual(find('media:song/media:author').text, u'The Author') self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') + def test_smuggle_url(self): + data = {u"ö": u"ö", u"abc": [3]} + url = 'https://foo.bar/baz?x=y#a' + smug_url = smuggle_url(url, data) + unsmug_url, unsmug_data = unsmuggle_url(smug_url) + self.assertEqual(url, unsmug_url) + self.assertEqual(data, unsmug_data) + + res_url, res_data = unsmuggle_url(url) + self.assertEqual(res_url, url) + self.assertEqual(res_data, None) + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -11,6 +11,8 @@ from ..utils import ( compat_urlparse, ExtractorError, + smuggle_url, + unescapeHTML, ) from .brightcove import BrightcoveIE @@ -29,6 +31,17 @@ class GenericIE(InfoExtractor): u"title": u"R\u00e9gis plante sa Jeep" } }, + # embedded vimeo video + { + u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references', + u'file': u'22444065.mp4', + u'md5': u'2903896e23df39722c33f015af0666e2', + u'info_dict': { + u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011', + u"uploader_id": u"skillsmatter", + u"uploader": u"Skills Matter", + } + } ] def report_download_webpage(self, video_id): @@ -127,6 +140,14 @@ class GenericIE(InfoExtractor): bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) return self.url_result(bc_url, 'Brightcove') + # Look for embedded Vimeo player + mobj = re.search( + r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage) + if mobj: + player_url = unescapeHTML(mobj.group(1)) + surl = smuggle_url(player_url, {'Referer': url}) + return self.url_result(surl, 'Vimeo') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py @@ -11,6 +11,7 @@ from ..utils import ( get_element_by_attribute, ExtractorError, std_headers, + unsmuggle_url, ) class VimeoIE(InfoExtractor): @@ -53,7 +54,7 @@ class VimeoIE(InfoExtractor): u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', u'uploader': u'The BLN & Business of Software', }, - }, + } ] def _login(self): @@ -98,6 +99,12 @@ class VimeoIE(InfoExtractor): self._login() def _real_extract(self, url, new_video=True): + url, data = unsmuggle_url(url) + headers = std_headers + if data is not None: + headers = headers.copy() + headers.update(data) + # Extract ID from URL mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -112,7 +119,7 @@ class VimeoIE(InfoExtractor): url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information - request = compat_urllib_request.Request(url, None, std_headers) + request = compat_urllib_request.Request(url, None, headers) webpage = self._download_webpage(request, video_id) # Now we begin extracting as much information as we can from what we diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -945,3 +945,20 @@ class locked_file(object): def shell_quote(args): return ' '.join(map(pipes.quote, args)) + + +def smuggle_url(url, data): + """ Pass additional data in a URL for internal use. """ + + sdata = compat_urllib_parse.urlencode( + {u'__youtubedl_smuggle': json.dumps(data)}) + return url + u'#' + sdata + + +def unsmuggle_url(smug_url): + if not '#__youtubedl_smuggle' in smug_url: + return smug_url, None + url, _, sdata = smug_url.rpartition(u'#') + jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] + data = json.loads(jsond) + return url, data