commit 84f810160667f9129e6a8d841fae2d0e2e1fec86
parent b1337948ebeeacfcf104d513fac5913d3a5818a7
Author: Sergey M․ <dstftw@gmail.com>
Date: Tue, 17 Mar 2015 23:51:40 +0600
[generic] Follow redirects specified by `Refresh` HTTP header
Diffstat:
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
@@ -1270,8 +1270,14 @@ class GenericIE(InfoExtractor):
if not found:
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
- r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)',
webpage)
+ if not found:
+ # Look also in Refresh HTTP header
+ refresh_header = head_response.headers.get('Refresh')
+ if refresh_header:
+ found = re.search(
+ r'[0-9]{,2};\s*(?:URL|url)=(.+)', refresh_header)
if found:
new_url = found.group(1)
self.report_following_redirect(new_url)