[brightcove] Handle non well-formed XMLs (#5421)
authorSergey M․ <dstftw@gmail.com>
Tue, 14 Apr 2015 11:50:53 +0000 (17:50 +0600)
committerSergey M․ <dstftw@gmail.com>
Tue, 14 Apr 2015 11:50:53 +0000 (17:50 +0600)
youtube_dl/extractor/brightcove.py

index b37857b2e543376c16349ad43f0fea0b358ad3af..117cb00e6e563a620d551b7363ed95e03246e9e0 100644 (file)
@@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):
         object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
         object_str = fix_xml_ampersands(object_str)
 
-        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+        try:
+            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+        except xml.etree.ElementTree.ParseError:
+            return
 
         fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
         if fv_el is not None:
@@ -185,7 +188,7 @@ class BrightcoveIE(InfoExtractor):
                 [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
             ).+?>\s*</object>''',
             webpage)
-        return [cls._build_brighcove_url(m) for m in matches]
+        return filter(None, [cls._build_brighcove_url(m) for m in matches])
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})