[extractor/common] Add support for dl8-* media tags (closes #27283)
authorSergey M․ <dstftw@gmail.com>
Sun, 6 Dec 2020 17:59:25 +0000 (00:59 +0700)
committerSergey M․ <dstftw@gmail.com>
Sun, 6 Dec 2020 18:08:22 +0000 (01:08 +0700)
youtube_dl/extractor/common.py
youtube_dl/extractor/generic.py

index 877873ebd3bb4fae3b753a0c41049294e4d75ba0..dd07a1cae6be42b2d2cc06a27a4162e4fb86d4a8 100644 (file)
@@ -2513,7 +2513,8 @@ class InfoExtractor(object):
         # amp-video and amp-audio are very similar to their HTML5 counterparts
         # so we wll include them right here (see
         # https://www.ampproject.org/docs/reference/components/amp-video)
-        _MEDIA_TAG_NAME_RE = r'(?:amp-)?(video|audio)'
+        # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
+        _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
         media_tags = [(media_tag, media_type, '')
                       for media_tag, media_type
                       in re.findall(r'(?s)(<%s[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
index d2ba0783991a2453e9a755a2ec8b4375b48df481..85dc1d02dd97a2fbf628cd1008e0e5064ae35dbc 100644 (file)
@@ -2466,7 +2466,9 @@ class GenericIE(InfoExtractor):
         # Sometimes embedded video player is hidden behind percent encoding
         # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
         # Unescaping the whole page allows to handle those cases in a generic way
-        webpage = compat_urllib_parse_unquote(webpage)
+        # FIXME: unescaping the whole page may break URLs, commenting out for now.
+        # There probably should be a second run of generic extractor on unescaped webpage.
+        # webpage = compat_urllib_parse_unquote(webpage)
 
         # Unescape squarespace embeds to be detected by generic extractor,
         # see https://github.com/ytdl-org/youtube-dl/issues/21294