[compat] Fix for XML with <!DOCTYPE> in Python 2.7 and 3.2
authorYen Chi Hsuan <yan12125@gmail.com>
Sun, 22 May 2016 17:34:08 +0000 (01:34 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Sun, 22 May 2016 17:40:11 +0000 (01:40 +0800)
Such XML documents cause DeprecationWarning if python is run
with `-W error`

test/test_compat.py
youtube_dl/compat.py

index 539b3054027992b7b08effbe98803f40872d61bf..f5317ac3e24290d5aa73e12c7e490bfed72d6c21 100644 (file)
@@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
         self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
         self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
 
+    def test_compat_etree_fromstring_doctype(self):
+        xml = '''<?xml version="1.0"?>
+<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
+<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
+        compat_etree_fromstring(xml)
+
     def test_struct_unpack(self):
         self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
 
index 1392361a1636b4943b4083237c1704372f7065b7..06e5f3ff63ee644807fac38bb1859eb5b17ed75b 100644 (file)
@@ -245,13 +245,20 @@ try:
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 
+
+etree = xml.etree.ElementTree
+
+
+class _TreeBuilder(etree.TreeBuilder):
+    def doctype(self, name, pubid, system):
+        pass
+
 if sys.version_info[0] >= 3:
-    compat_etree_fromstring = xml.etree.ElementTree.fromstring
+    def compat_etree_fromstring(text):
+        return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
 else:
     # python 2.x tries to encode unicode strings with ascii (see the
     # XMLParser._fixtext method)
-    etree = xml.etree.ElementTree
-
     try:
         _etree_iter = etree.Element.iter
     except AttributeError:  # Python <=2.6
@@ -265,7 +272,7 @@ else:
     # 2.7 source
     def _XML(text, parser=None):
         if not parser:
-            parser = etree.XMLParser(target=etree.TreeBuilder())
+            parser = etree.XMLParser(target=_TreeBuilder())
         parser.feed(text)
         return parser.close()
 
@@ -277,7 +284,7 @@ else:
         return el
 
     def compat_etree_fromstring(text):
-        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
         for el in _etree_iter(doc):
             if el.text is not None and isinstance(el.text, bytes):
                 el.text = el.text.decode('utf-8')