youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit a8156c1d2e4b2a7ac5e034c247c6fccaca15a21d
parent 3e669f369f886dff8fa8272f3bfa37be6360a0ba
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Wed, 28 Nov 2012 00:06:28 +0100

Python 3 version of HTMLParser

Diffstat:
Myoutube_dl/utils.py | 16++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import gzip -import HTMLParser import locale import os import re @@ -42,6 +41,11 @@ except NameError: # Python 2 import htmlentitydefs as compat_html_entities try: + import html.parser as compat_html_parser +except NameError: # Python 2 + import HTMLParser as compat_html_parser + +try: compat_str = unicode # Python 2 except NameError: compat_str = str @@ -99,8 +103,8 @@ def htmlentity_transform(matchobj): # Unknown entity in name, return its literal representation return (u'&%s;' % entity) -HTMLParser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix -class IDParser(HTMLParser.HTMLParser): +compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix +class IDParser(compat_html_parser.HTMLParser): """Modified HTMLParser that isolates a tag with the specified id""" def __init__(self, id): self.id = id @@ -110,11 +114,11 @@ class IDParser(HTMLParser.HTMLParser): self.html = None self.watch_startpos = False self.error_count = 0 - HTMLParser.HTMLParser.__init__(self) + compat_html_parser.HTMLParser.__init__(self) def error(self, message): if self.error_count > 10 or self.started: - raise HTMLParser.HTMLParseError(message, self.getpos()) + raise compat_html_parser.HTMLParseError(message, self.getpos()) self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line self.error_count += 1 self.goahead(1) @@ -170,7 +174,7 @@ def get_element_by_id(id, html): parser = IDParser(id) try: parser.loads(html) - except HTMLParser.HTMLParseError: + except compat_html_parser.HTMLParseError: pass return parser.get_result()