[utils] Improve cookie files support
authorSergey M․ <dstftw@gmail.com>
Mon, 4 May 2020 21:19:33 +0000 (04:19 +0700)
committerSergey M․ <dstftw@gmail.com>
Mon, 4 May 2020 21:21:25 +0000 (04:21 +0700)
+ Add support for UTF-8 in cookie files
* Skip malformed cookie file entries instead of crashing (invalid entry len, invalid expires at)

test/test_YoutubeDLCookieJar.py
test/testdata/cookies/malformed_cookies.txt [new file with mode: 0644]
youtube_dl/utils.py

index f959798deb595165fddac6a8e555570c0420e454..05f48bd7417e478ab1ae9ae7eddaca4cbc7039c3 100644 (file)
@@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
         assert_cookie_has_value('HTTPONLY_COOKIE')
         assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
 
+    def test_malformed_cookies(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        # Cookies should be empty since all malformed cookie file entries
+        # will be ignored
+        self.assertFalse(cookiejar._cookies)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt
new file mode 100644 (file)
index 0000000..17bc403
--- /dev/null
@@ -0,0 +1,9 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file!  Do not edit.
+
+# Cookie file entry with invalid number of fields - 6 instead of 7
+www.foobar.foobar      FALSE   /       FALSE   0       COOKIE
+
+# Cookie file entry with invalid expires at
+www.foobar.foobar      FALSE   /       FALSE   1.7976931348623157e+308 COOKIE  VALUE
index 38262bee4cd08f97e4f5009d7066d1466bde25a4..112279ed7dfc484395d0d8b57b81c03a7a14e9ec 100644 (file)
@@ -7,6 +7,7 @@ import base64
 import binascii
 import calendar
 import codecs
+import collections
 import contextlib
 import ctypes
 import datetime
@@ -30,6 +31,7 @@ import ssl
 import subprocess
 import sys
 import tempfile
+import time
 import traceback
 import xml.etree.ElementTree
 import zlib
@@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
     1. https://curl.haxx.se/docs/http-cookies.html
     """
     _HTTPONLY_PREFIX = '#HttpOnly_'
+    _ENTRY_LEN = 7
+    _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by youtube-dl.  Do not edit.
+
+'''
+    _CookieFileEntry = collections.namedtuple(
+        'CookieFileEntry',
+        ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
 
     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        """
+        Save cookies to a file.
+
+        Most of the code is taken from CPython 3.8 and slightly adapted
+        to support cookie files with UTF-8 in both python 2 and 3.
+        """
+        if filename is None:
+            if self.filename is not None:
+                filename = self.filename
+            else:
+                raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
         # Store session cookies with `expires` set to 0 instead of an empty
         # string
         for cookie in self:
             if cookie.expires is None:
                 cookie.expires = 0
-        compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
+
+        with io.open(filename, 'w', encoding='utf-8') as f:
+            f.write(self._HEADER)
+            now = time.time()
+            for cookie in self:
+                if not ignore_discard and cookie.discard:
+                    continue
+                if not ignore_expires and cookie.is_expired(now):
+                    continue
+                if cookie.secure:
+                    secure = 'TRUE'
+                else:
+                    secure = 'FALSE'
+                if cookie.domain.startswith('.'):
+                    initial_dot = 'TRUE'
+                else:
+                    initial_dot = 'FALSE'
+                if cookie.expires is not None:
+                    expires = compat_str(cookie.expires)
+                else:
+                    expires = ''
+                if cookie.value is None:
+                    # cookies.txt regards 'Set-Cookie: foo' as a cookie
+                    # with no name, whereas http.cookiejar regards it as a
+                    # cookie with no value.
+                    name = ''
+                    value = cookie.name
+                else:
+                    name = cookie.name
+                    value = cookie.value
+                f.write(
+                    '\t'.join([cookie.domain, initial_dot, cookie.path,
+                               secure, expires, name, value]) + '\n')
 
     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
         """Load cookies from a file."""
@@ -2752,12 +2806,30 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
             else:
                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
 
+        def prepare_line(line):
+            if line.startswith(self._HTTPONLY_PREFIX):
+                line = line[len(self._HTTPONLY_PREFIX):]
+            # comments and empty lines are fine
+            if line.startswith('#') or not line.strip():
+                return line
+            cookie_list = line.split('\t')
+            if len(cookie_list) != self._ENTRY_LEN:
+                raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+            cookie = self._CookieFileEntry(*cookie_list)
+            if cookie.expires_at and not cookie.expires_at.isdigit():
+                raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+            return line
+
         cf = io.StringIO()
-        with open(filename) as f:
+        with io.open(filename, encoding='utf-8') as f:
             for line in f:
-                if line.startswith(self._HTTPONLY_PREFIX):
-                    line = line[len(self._HTTPONLY_PREFIX):]
-                cf.write(compat_str(line))
+                try:
+                    cf.write(prepare_line(line))
+                except compat_cookiejar.LoadError as e:
+                    write_string(
+                        'WARNING: skipping cookie file entry due to %s: %r\n'
+                        % (e, line), sys.stderr)
+                    continue
         cf.seek(0)
         self._really_load(cf, filename, ignore_discard, ignore_expires)
         # Session cookies are denoted by either `expires` field set to