[wdr] Relax media link regex (closes #14447)
authorSergey M․ <dstftw@gmail.com>
Sun, 8 Oct 2017 14:36:50 +0000 (21:36 +0700)
committerSergey M․ <dstftw@gmail.com>
Sun, 8 Oct 2017 14:36:50 +0000 (21:36 +0700)
youtube_dl/extractor/wdr.py

index 8bb7362bbc1bec46dcef081bb08a02728d4af3e0..621de1e1efb73a9a377a46fe0fa702e595c3cde5 100644 (file)
@@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor):
         # for wdrmaus, in a tag with the class "videoButton" (previously a link
         # to the page in a multiline "videoLink"-tag)
         json_metadata = self._html_search_regex(
-            r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
-            webpage, 'media link', default=None, flags=re.MULTILINE)
+            r'''(?sx)class=
+                    (?:
+                        (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
+                        (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
+                    )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+            ''',
+            webpage, 'media link', default=None, group='data')
 
         if not json_metadata:
             return