compat_str,
compat_urllib_error,
compat_urllib_request,
+ compat_urllib_parse_urlparse,
ContentTooShortError,
date_from_str,
DateRange,
def urlopen(self, req):
""" Start an HTTP download """
+
+ # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+ # Working around by replacing request's original URL with escaped one
+
+ url = req if isinstance(req, compat_str) else req.get_full_url()
+
+ def escape(component):
+ return compat_cookiejar.escape_path(component.encode('utf-8'))
+
+ url_parsed = compat_urllib_parse_urlparse(url)
+ url_escaped = url_parsed._replace(
+ path=escape(url_parsed.path),
+ query=escape(url_parsed.query),
+ fragment=escape(url_parsed.fragment)
+ ).geturl()
+
+ # Substitute URL if any change after escaping
+ if url != url_escaped:
+ if isinstance(req, compat_str):
+ req = url_escaped
+ else:
+ req = compat_urllib_request.Request(
+ url_escaped, data=req.data, headers=req.headers,
+ origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):