[InfoExtractor/common] Correct and test meta tag matching
authorPhilipp Hagemeister <phihag@phihag.de>
Wed, 7 Jan 2015 10:43:36 +0000 (11:43 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 8 Jan 2015 15:14:50 +0000 (16:14 +0100)
test/test_InfoExtractor.py
youtube_dl/extractor/common.py

index 13c18ed95d4ea65111b6a5bc1406d0a5703336c2..be8d12997a1a5aba2cb62270068363f339a5eac6 100644 (file)
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
 
+    def test_html_search_meta(self):
+        ie = self.ie
+        html = '''
+            <meta name="a" content="1" />
+            <meta name='b' content='2'>
+            <meta name="c" content='3'>
+            <meta name=d content='4'>
+            <meta property="e" content='5' >
+            <meta content="6" name="f">
+        '''
+
+        self.assertEqual(ie._html_search_meta('a', html), '1')
+        self.assertEqual(ie._html_search_meta('b', html), '2')
+        self.assertEqual(ie._html_search_meta('c', html), '3')
+        self.assertEqual(ie._html_search_meta('d', html), '4')
+        self.assertEqual(ie._html_search_meta('e', html), '5')
+        self.assertEqual(ie._html_search_meta('f', html), '6')
+
 if __name__ == '__main__':
     unittest.main()
index df32b5ca0ba081df6c5f4c27f2f00c1a46e7c246..d703893dcfef1e772f1e294b0c08430ee6c15db3 100644 (file)
@@ -594,7 +594,7 @@ class InfoExtractor(object):
         return self._html_search_regex(
             r'''(?isx)<meta
                     (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
-                    [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
             html, display_name, fatal=fatal, group='content', **kwargs)
 
     def _dc_search_uploader(self, html):