[utils] Fix urljoin for paths with non-http(s) schemes
authorSergey M․ <dstftw@gmail.com>
Sun, 20 Jan 2019 13:21:24 +0000 (20:21 +0700)
committerSergey M․ <dstftw@gmail.com>
Sun, 20 Jan 2019 13:22:19 +0000 (20:22 +0700)
test/test_utils.py
youtube_dl/utils.py

index 9e28e008f5548f28598e38a64ad18165eecd463a..409482c3b7acd431c99457628081bc12b16541fb 100644 (file)
@@ -507,6 +507,8 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(urljoin('http://foo.de/', ''), None)
         self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
         self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
+        self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
+        self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
 
     def test_url_or_none(self):
         self.assertEqual(url_or_none(None), None)
index d2d3c1a9fde82510f47b8fcd43c726b51bedf9d6..d0cb65814238234e61ea32cff77eeb4a3ad210d3 100644 (file)
@@ -1868,7 +1868,7 @@ def urljoin(base, path):
         path = path.decode('utf-8')
     if not isinstance(path, compat_str) or not path:
         return None
-    if re.match(r'^(?:https?:)?//', path):
+    if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
         return path
     if isinstance(base, bytes):
         base = base.decode('utf-8')