commit 73af5cc817ff19d21cb432c5a4e9e37dd35a353d
parent b5f523ed62f6c84fe0c58274f1751e66c58282d8
Author: Sergey M․ <dstftw@gmail.com>
Date: Fri, 23 Jun 2017 21:18:33 +0700
[YoutubeDL] Skip malformed formats for better extraction robustness
Diffstat:
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
@@ -1448,17 +1448,25 @@ class YoutubeDL(object):
if not formats:
raise ExtractorError('No video formats found!')
+ def is_wellformed(f):
+ url = f.get('url')
+ valid_url = url and isinstance(url, compat_str)
+ if not valid_url:
+ self.report_warning(
+ '"url" field is missing or empty - skipping format, '
+ 'there is an error in extractor')
+ return valid_url
+
+ # Filter out malformed formats for better extraction robustness
+ formats = list(filter(is_wellformed, formats))
+
formats_dict = {}
# We check that all the formats have the format and format_id fields
for i, format in enumerate(formats):
- if 'url' not in format:
- raise ExtractorError('Missing "url" key in result (index %d)' % i)
-
sanitize_string_field(format, 'format_id')
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
-
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
else: