[kaltura] Improve iframe extraction (#28969)
authorBen Rog-Wilhelm <zorba-github@pavlovian.net>
Tue, 4 May 2021 19:14:35 +0000 (14:14 -0500)
committerGitHub <noreply@github.com>
Tue, 4 May 2021 19:14:35 +0000 (02:14 +0700)
Co-authored-by: Sergey M. <dstftw@gmail.com>
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/kaltura.py

index 2f555c1d40cf5d88bdb324ff5bee93626722b1df..5ad40ee234e0f6a5f6366515212f361eb4550b7f 100644 (file)
@@ -102,6 +102,21 @@ class GDCVaultIE(InfoExtractor):
                 'format': 'mp4-408',
             },
         },
+        {
+            # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+            'url': 'https://www.gdcvault.com/play/1025699',
+            'info_dict': {
+                'id': '0_zagynv0a',
+                'ext': 'mp4',
+                'title': 'Tech Toolbox',
+                'upload_date': '20190408',
+                'uploader_id': 'joe@blazestreaming.com',
+                'timestamp': 1554764629,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
 
     def _login(self, webpage_url, display_id):
index 5d0ff041821325d51152f0f184e185bd911b3fb3..c731612c4ef164e2bffb3293da2bf8747b3a2244 100644 (file)
@@ -145,7 +145,7 @@ class KalturaIE(InfoExtractor):
                 ''', webpage))
             or list(re.finditer(
                 r'''(?xs)
-                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
                       (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                       (?:(?!(?P=q1)).)*
                       [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)