encodeFilename,
find_xpath_attr,
fix_xml_ampersands,
- get_meta_content,
orderedSet,
OnDemandPagedList,
InAdvancePagedList,
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
- def test_meta_parser(self):
- testhtml = '''
- <head>
- <meta name="description" content="foo & bar">
- <meta content='Plato' name='author'/>
- </head>
- '''
- get_meta = lambda name: get_meta_content(name, testhtml)
- self.assertEqual(get_meta('description'), 'foo & bar')
- self.assertEqual(get_meta('author'), 'Plato')
-
def test_xpath_with_ns(self):
testxml = '''<root xmlns:media="http://example.com/">
<media:song>
if self.rawdata[i:].startswith("</scr'+'ipt>")
else compat_html_parser.HTMLParser.parse_endtag(self, i))
+
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
return get_element_by_attribute("id", id, html)
+
def get_element_by_attribute(attribute, value, html):
"""Return the content of the tag with the specified attribute in the passed HTML document"""
parser = AttrParser(attribute, value)
def get_result(self):
return self.result
-def get_meta_content(name, html):
- """
- Return the content attribute from the meta tag with the given name attribute.
- """
- parser = MetaParser(name)
- try:
- parser.loads(html)
- except compat_html_parser.HTMLParseError:
- pass
- return parser.get_result()
def clean_html(html):