youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit bcf89ce62cb4f6ab8802ab6aef01c3afaefc0075
parent e3899d0e00167c7d9675ab3a77bc77b679586ee8
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Mon, 10 Mar 2014 17:31:32 +0100

[generic] Suppress warning about doctypes in RSS parser

Diffstat:
Myoutube_dl/extractor/generic.py | 4++--
Myoutube_dl/utils.py | 11+++++++++++
2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import os import re -import xml.etree.ElementTree from .common import InfoExtractor from .youtube import YoutubeIE @@ -17,6 +16,7 @@ from ..utils import ( ExtractorError, HEADRequest, + parse_xml, smuggle_url, unescapeHTML, unified_strdate, @@ -274,7 +274,7 @@ class GenericIE(InfoExtractor): # Is it an RSS feed? try: - doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8')) + doc = parse_xml(webpage) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) except compat_xml_parse_error: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -22,6 +22,7 @@ import struct import subprocess import sys import traceback +import xml.etree.ElementTree import zlib try: @@ -1267,3 +1268,13 @@ def read_batch_urls(batch_fd): def urlencode_postdata(*args, **kargs): return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') + + +def parse_xml(s): + class TreeBuilder(xml.etree.ElementTree.TreeBuilder): + def doctype(self, name, pubid, system): + pass # Ignore doctypes + + parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) + kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} + return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)