[extractor/common] Allow quoteless content attribute in og regexes (Closes #7115)
authorSergey M․ <dstftw@gmail.com>
Fri, 9 Oct 2015 19:44:33 +0000 (01:44 +0600)
committerSergey M․ <dstftw@gmail.com>
Fri, 9 Oct 2015 19:46:01 +0000 (01:46 +0600)
youtube_dl/extractor/common.py

index 242618c583d96afecf87ecf90d69b85c02456646..0082a4c84606f4f368d6d2075f34a021fb8da72e 100644 (file)
@@ -645,7 +645,7 @@ class InfoExtractor(object):
     # Helper functions for extracting OpenGraph info
     @staticmethod
     def _og_regexes(prop):
-        content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
+        content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
         property_re = r'(?:name|property)=[\'"]?og:%s[\'"]?' % re.escape(prop)
         template = r'<meta[^>]+?%s[^>]+?%s'
         return [