commit 89923316210f8e17bb1a085278940e1c56fcff48
parent b0dde6686c7110c9c2515a808d803239a81e6505
Author: Sergey M․ <dstftw@gmail.com>
Date: Sun, 8 Oct 2017 21:36:50 +0700
[wdr] Relax media link regex (closes #14447)
Diffstat:
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
@@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor):
# for wdrmaus, in a tag with the class "videoButton" (previously a link
# to the page in a multiline "videoLink"-tag)
json_metadata = self._html_search_regex(
- r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
- webpage, 'media link', default=None, flags=re.MULTILINE)
+ r'''(?sx)class=
+ (?:
+ (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
+ (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
+ )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+ ''',
+ webpage, 'media link', default=None, group='data')
if not json_metadata:
return