youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit befa4708fd2165b85d04002c3845adf191d34302
parent 90830004c893e2d5f0643c05af064cfc7a3b579e
Author: Sergey M․ <dstftw@gmail.com>
Date:   Mon, 19 Feb 2018 22:50:23 +0700

[utils] Fixup some common URL's typos in sanitize_url (closes #15649)

Diffstat:
Mtest/test_utils.py | 7+++++++
Myoutube_dl/utils.py | 18+++++++++++++++---
2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -57,6 +57,7 @@ from youtube_dl.utils import ( read_batch_urls, sanitize_filename, sanitize_path, + sanitize_url, expand_path, prepend_extension, replace_extension, @@ -219,6 +220,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_sanitize_url(self): + self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') + self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') + self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') + self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') + def test_expand_path(self): def env(var): return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -538,10 +538,22 @@ def sanitize_path(s): return os.path.join(*sanitized_path) -# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of -# unwanted failures due to missing protocol def sanitize_url(url): - return 'http:%s' % url if url.startswith('//') else url + # Prepend protocol-less URLs with `http:` scheme in order to mitigate + # the number of unwanted failures due to missing protocol + if url.startswith('//'): + return 'http:%s' % url + # Fix some common typos seen so far + COMMON_TYPOS = ( + # https://github.com/rg3/youtube-dl/issues/15649 + (r'^httpss://', r'https://'), + # https://bx1.be/lives/direct-tv/ + (r'^rmtp([es]?)://', r'rtmp\1://'), + ) + for mistake, fixup in COMMON_TYPOS: + if re.match(mistake, url): + return re.sub(mistake, fixup, url) + return url def sanitized_Request(url, *args, **kwargs):