commit ed9a25dd612fb06d9cf007a6491ac9982535a8f9
parent 84f810160667f9129e6a8d841fae2d0e2e1fec86
Author: Sergey M․ <dstftw@gmail.com>
Date: Wed, 18 Mar 2015 00:05:40 +0600
[generic] Generalize redirect regex
Diffstat:
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
@@ -1268,16 +1268,16 @@ class GenericIE(InfoExtractor):
# HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
- r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)',
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
webpage)
if not found:
# Look also in Refresh HTTP header
refresh_header = head_response.headers.get('Refresh')
if refresh_header:
- found = re.search(
- r'[0-9]{,2};\s*(?:URL|url)=(.+)', refresh_header)
+ found = re.search(REDIRECT_REGEX, refresh_header)
if found:
new_url = found.group(1)
self.report_following_redirect(new_url)