youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 43e8fafd49f94ebf4776c84697e4b815750ec701
parent 314d506b96d87a212e7e57eaa4d86514579c1c12
Author: Nick Daniels <nick.daniels@forward.co.uk>
Date:   Wed, 19 Dec 2012 14:21:14 +0000

Refactor IDParser to search for elements by any attribute not just ID

Diffstat:
Myoutube_dl/utils.py | 19++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -214,10 +214,11 @@ def htmlentity_transform(matchobj): return (u'&%s;' % entity) compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix -class IDParser(compat_html_parser.HTMLParser): - """Modified HTMLParser that isolates a tag with the specified id""" - def __init__(self, id): - self.id = id +class AttrParser(compat_html_parser.HTMLParser): + """Modified HTMLParser that isolates a tag with the specified attribute""" + def __init__(self, attribute, value): + self.attribute = attribute + self.value = value self.result = None self.started = False self.depth = {} @@ -242,7 +243,7 @@ class IDParser(compat_html_parser.HTMLParser): attrs = dict(attrs) if self.started: self.find_startpos(None) - if 'id' in attrs and attrs['id'] == self.id: + if self.attribute in attrs and attrs[self.attribute] == self.value: self.result = [tag] self.started = True self.watch_startpos = True @@ -280,8 +281,12 @@ class IDParser(compat_html_parser.HTMLParser): return '\n'.join(lines).strip() def get_element_by_id(id, html): - """Return the content of the tag with the specified id in the passed HTML document""" - parser = IDParser(id) + """Return the content of the tag with the specified ID in the passed HTML document""" + return get_element_by_attribute("id", id, html) + +def get_element_by_attribute(attribute, value, html): + """Return the content of the tag with the specified attribute in the passed HTML document""" + parser = AttrParser(attribute, value) try: parser.loads(html) except compat_html_parser.HTMLParseError: