[core] Remove `Cookie` header on redirect to prevent leaks
authordirkf <fieldhouse@gmx.net>
Wed, 7 Jun 2023 18:38:54 +0000 (19:38 +0100)
committerdirkf <fieldhouse@gmx.net>
Tue, 18 Jul 2023 09:50:46 +0000 (10:50 +0100)
Adated from yt-dlp/yt-dlp-ghsa-v8mc-9377-rwjj/pull/1/commits/101caac
Thx coletdjnz

test/test_http.py
youtube_dl/utils.py

index 1a65df9e056fcf90a5825608b942ca74dbcf9ce6..cd180b51ff53b4447c6dc31b31e5e594ec9720a9 100644 (file)
@@ -183,6 +183,11 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             self._method('GET')
         elif self.path.startswith('/headers'):
             self._headers()
+        elif self.path.startswith('/308-to-headers'):
+            self.send_response(308)
+            self.send_header('Location', '/headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path == '/trailing_garbage':
             payload = b'<html><video src="/vid.mp4" /></html>'
             compressed = gzip_compress(payload) + b'trailing garbage'
@@ -385,8 +390,31 @@ class TestHTTP(unittest.TestCase):
             ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
                 0, 'test', 'ytdl', None, False, '127.0.0.1', True,
                 False, '/headers', True, False, None, False, None, None, {}))
-            data = ydl.urlopen(sanitized_Request(self._test_url('headers'))).read()
-            self.assertIn(b'Cookie: test=ytdl', data)
+            data = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'))).read().decode('utf-8')
+            self.assertIn('Cookie: test=ytdl', data)
+
+    def test_passed_cookie_header(self):
+        # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
+        with FakeYDL() as ydl:
+            # Specified Cookie header should be used
+            res = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertIn('Cookie: test=test', res)
+
+            # Specified Cookie header should be removed on any redirect
+            res = ydl.urlopen(sanitized_Request(
+                self._test_url('308-to-headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertNotIn('Cookie: test=test', res)
+
+            # Specified Cookie header should override global cookiejar for that request
+            ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
+                0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
+                False, '/headers', True, False, None, False, None, None, {}))
+            data = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertNotIn('Cookie: test=ytdlp', data)
+            self.assertIn('Cookie: test=test', data)
 
     def test_no_compression_compat_header(self):
         with FakeYDL() as ydl:
index 58c710b086e9e55750dd238c40a899434fa31c4f..c21cd36877f5503f5891cf17da8884f3a903537d 100644 (file)
@@ -2968,7 +2968,6 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
         new_method = req.get_method()
         new_data = req.data
-        remove_headers = []
 
         # On python 2 urlh.geturl() may sometimes return redirect URL
         # as a byte string instead of unicode. This workaround forces
@@ -2981,6 +2980,11 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # but it is kept for compatibility with other callers.
         newurl = newurl.replace(' ', '%20')
 
+        # Technically the Cookie header should be in unredirected_hdrs;
+        # however in practice some may set it in normal headers anyway.
+        # We will remove it here to prevent any leaks.
+        remove_headers = ['Cookie']
+
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
         if code == 303 and req.get_method() != 'HEAD':
@@ -2999,7 +3003,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
         # NB: don't use dict comprehension for python 2.6 compatibility
         new_headers = dict((k, v) for k, v in req.header_items()
-                           if k.lower() not in remove_headers)
+                           if k.title() not in remove_headers)
 
         return compat_urllib_request.Request(
             newurl, headers=new_headers, origin_req_host=req.origin_req_host,