common.py (14774B)
1 from __future__ import division, unicode_literals 2 3 import os 4 import re 5 import sys 6 import time 7 import random 8 9 from ..compat import compat_os_name 10 from ..utils import ( 11 decodeArgument, 12 encodeFilename, 13 error_to_compat_str, 14 format_bytes, 15 shell_quote, 16 timeconvert, 17 ) 18 19 20 class FileDownloader(object): 21 """File Downloader class. 22 23 File downloader objects are the ones responsible of downloading the 24 actual video file and writing it to disk. 25 26 File downloaders accept a lot of parameters. In order not to saturate 27 the object constructor with arguments, it receives a dictionary of 28 options instead. 29 30 Available options: 31 32 verbose: Print additional info to stdout. 33 quiet: Do not print messages to stdout. 34 ratelimit: Download speed limit, in bytes/sec. 35 retries: Number of times to retry for HTTP error 5xx 36 buffersize: Size of download buffer in bytes. 37 noresizebuffer: Do not automatically resize the download buffer. 38 continuedl: Try to continue downloads if possible. 39 noprogress: Do not print the progress bar. 40 logtostderr: Log messages to stderr instead of stdout. 41 consoletitle: Display progress in console window's titlebar. 42 nopart: Do not use temporary .part files. 43 updatetime: Use the Last-modified header to set output file timestamps. 44 test: Download only first bytes to test the downloader. 45 min_filesize: Skip files smaller than this size 46 max_filesize: Skip files larger than this size 47 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. 48 external_downloader_args: A list of additional command-line arguments for the 49 external downloader. 50 hls_use_mpegts: Use the mpegts container for HLS videos. 51 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be 52 useful for bypassing bandwidth throttling imposed by 53 a webserver (experimental) 54 55 Subclasses of this one must re-define the real_download method. 56 """ 57 58 _TEST_FILE_SIZE = 10241 59 params = None 60 61 def __init__(self, ydl, params): 62 """Create a FileDownloader object with the given options.""" 63 self.ydl = ydl 64 self._progress_hooks = [] 65 self.params = params 66 self.add_progress_hook(self.report_progress) 67 68 @staticmethod 69 def format_seconds(seconds): 70 (mins, secs) = divmod(seconds, 60) 71 (hours, mins) = divmod(mins, 60) 72 if hours > 99: 73 return '--:--:--' 74 if hours == 0: 75 return '%02d:%02d' % (mins, secs) 76 else: 77 return '%02d:%02d:%02d' % (hours, mins, secs) 78 79 @staticmethod 80 def calc_percent(byte_counter, data_len): 81 if data_len is None: 82 return None 83 return float(byte_counter) / float(data_len) * 100.0 84 85 @staticmethod 86 def format_percent(percent): 87 if percent is None: 88 return '---.-%' 89 return '%6s' % ('%3.1f%%' % percent) 90 91 @staticmethod 92 def calc_eta(start, now, total, current): 93 if total is None: 94 return None 95 if now is None: 96 now = time.time() 97 dif = now - start 98 if current == 0 or dif < 0.001: # One millisecond 99 return None 100 rate = float(current) / dif 101 return int((float(total) - float(current)) / rate) 102 103 @staticmethod 104 def format_eta(eta): 105 if eta is None: 106 return '--:--' 107 return FileDownloader.format_seconds(eta) 108 109 @staticmethod 110 def calc_speed(start, now, bytes): 111 dif = now - start 112 if bytes == 0 or dif < 0.001: # One millisecond 113 return None 114 return float(bytes) / dif 115 116 @staticmethod 117 def format_speed(speed): 118 if speed is None: 119 return '%10s' % '---b/s' 120 return '%10s' % ('%s/s' % format_bytes(speed)) 121 122 @staticmethod 123 def format_retries(retries): 124 return 'inf' if retries == float('inf') else '%.0f' % retries 125 126 @staticmethod 127 def best_block_size(elapsed_time, bytes): 128 new_min = max(bytes / 2.0, 1.0) 129 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 130 if elapsed_time < 0.001: 131 return int(new_max) 132 rate = bytes / elapsed_time 133 if rate > new_max: 134 return int(new_max) 135 if rate < new_min: 136 return int(new_min) 137 return int(rate) 138 139 @staticmethod 140 def parse_bytes(bytestr): 141 """Parse a string indicating a byte quantity into an integer.""" 142 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) 143 if matchobj is None: 144 return None 145 number = float(matchobj.group(1)) 146 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) 147 return int(round(number * multiplier)) 148 149 def to_screen(self, *args, **kargs): 150 self.ydl.to_screen(*args, **kargs) 151 152 def to_stderr(self, message): 153 self.ydl.to_screen(message) 154 155 def to_console_title(self, message): 156 self.ydl.to_console_title(message) 157 158 def trouble(self, *args, **kargs): 159 self.ydl.trouble(*args, **kargs) 160 161 def report_warning(self, *args, **kargs): 162 self.ydl.report_warning(*args, **kargs) 163 164 def report_error(self, *args, **kargs): 165 self.ydl.report_error(*args, **kargs) 166 167 def slow_down(self, start_time, now, byte_counter): 168 """Sleep if the download speed is over the rate limit.""" 169 rate_limit = self.params.get('ratelimit') 170 if rate_limit is None or byte_counter == 0: 171 return 172 if now is None: 173 now = time.time() 174 elapsed = now - start_time 175 if elapsed <= 0.0: 176 return 177 speed = float(byte_counter) / elapsed 178 if speed > rate_limit: 179 sleep_time = float(byte_counter) / rate_limit - elapsed 180 if sleep_time > 0: 181 time.sleep(sleep_time) 182 183 def temp_name(self, filename): 184 """Returns a temporary filename for the given filename.""" 185 if self.params.get('nopart', False) or filename == '-' or \ 186 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): 187 return filename 188 return filename + '.part' 189 190 def undo_temp_name(self, filename): 191 if filename.endswith('.part'): 192 return filename[:-len('.part')] 193 return filename 194 195 def ytdl_filename(self, filename): 196 return filename + '.ytdl' 197 198 def try_rename(self, old_filename, new_filename): 199 try: 200 if old_filename == new_filename: 201 return 202 os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) 203 except (IOError, OSError) as err: 204 self.report_error('unable to rename file: %s' % error_to_compat_str(err)) 205 206 def try_utime(self, filename, last_modified_hdr): 207 """Try to set the last-modified time of the given file.""" 208 if last_modified_hdr is None: 209 return 210 if not os.path.isfile(encodeFilename(filename)): 211 return 212 timestr = last_modified_hdr 213 if timestr is None: 214 return 215 filetime = timeconvert(timestr) 216 if filetime is None: 217 return filetime 218 # Ignore obviously invalid dates 219 if filetime == 0: 220 return 221 try: 222 os.utime(filename, (time.time(), filetime)) 223 except Exception: 224 pass 225 return filetime 226 227 def report_destination(self, filename): 228 """Report destination filename.""" 229 self.to_screen('[download] Destination: ' + filename) 230 231 def _report_progress_status(self, msg, is_last_line=False): 232 fullmsg = '[download] ' + msg 233 if self.params.get('progress_with_newline', False): 234 self.to_screen(fullmsg) 235 else: 236 if compat_os_name == 'nt': 237 prev_len = getattr(self, '_report_progress_prev_line_length', 238 0) 239 if prev_len > len(fullmsg): 240 fullmsg += ' ' * (prev_len - len(fullmsg)) 241 self._report_progress_prev_line_length = len(fullmsg) 242 clear_line = '\r' 243 else: 244 clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r') 245 self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) 246 self.to_console_title('youtube-dl ' + msg) 247 248 def report_progress(self, s): 249 if s['status'] == 'finished': 250 if self.params.get('noprogress', False): 251 self.to_screen('[download] Download completed') 252 else: 253 msg_template = '100%%' 254 if s.get('total_bytes') is not None: 255 s['_total_bytes_str'] = format_bytes(s['total_bytes']) 256 msg_template += ' of %(_total_bytes_str)s' 257 if s.get('elapsed') is not None: 258 s['_elapsed_str'] = self.format_seconds(s['elapsed']) 259 msg_template += ' in %(_elapsed_str)s' 260 self._report_progress_status( 261 msg_template % s, is_last_line=True) 262 263 if self.params.get('noprogress'): 264 return 265 266 if s['status'] != 'downloading': 267 return 268 269 if s.get('eta') is not None: 270 s['_eta_str'] = self.format_eta(s['eta']) 271 else: 272 s['_eta_str'] = 'Unknown ETA' 273 274 if s.get('total_bytes') and s.get('downloaded_bytes') is not None: 275 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) 276 elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: 277 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) 278 else: 279 if s.get('downloaded_bytes') == 0: 280 s['_percent_str'] = self.format_percent(0) 281 else: 282 s['_percent_str'] = 'Unknown %' 283 284 if s.get('speed') is not None: 285 s['_speed_str'] = self.format_speed(s['speed']) 286 else: 287 s['_speed_str'] = 'Unknown speed' 288 289 if s.get('total_bytes') is not None: 290 s['_total_bytes_str'] = format_bytes(s['total_bytes']) 291 msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' 292 elif s.get('total_bytes_estimate') is not None: 293 s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) 294 msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' 295 else: 296 if s.get('downloaded_bytes') is not None: 297 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) 298 if s.get('elapsed'): 299 s['_elapsed_str'] = self.format_seconds(s['elapsed']) 300 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' 301 else: 302 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' 303 else: 304 msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' 305 306 self._report_progress_status(msg_template % s) 307 308 def report_resuming_byte(self, resume_len): 309 """Report attempt to resume at given byte.""" 310 self.to_screen('[download] Resuming download at byte %s' % resume_len) 311 312 def report_retry(self, err, count, retries): 313 """Report retry in case of HTTP error 5xx""" 314 self.to_screen( 315 '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' 316 % (error_to_compat_str(err), count, self.format_retries(retries))) 317 318 def report_file_already_downloaded(self, file_name): 319 """Report file has already been fully downloaded.""" 320 try: 321 self.to_screen('[download] %s has already been downloaded' % file_name) 322 except UnicodeEncodeError: 323 self.to_screen('[download] The file has already been downloaded') 324 325 def report_unable_to_resume(self): 326 """Report it was impossible to resume download.""" 327 self.to_screen('[download] Unable to resume') 328 329 def download(self, filename, info_dict): 330 """Download to a filename using the info from info_dict 331 Return True on success and False otherwise 332 """ 333 334 nooverwrites_and_exists = ( 335 self.params.get('nooverwrites', False) 336 and os.path.exists(encodeFilename(filename)) 337 ) 338 339 if not hasattr(filename, 'write'): 340 continuedl_and_exists = ( 341 self.params.get('continuedl', True) 342 and os.path.isfile(encodeFilename(filename)) 343 and not self.params.get('nopart', False) 344 ) 345 346 # Check file already present 347 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): 348 self.report_file_already_downloaded(filename) 349 self._hook_progress({ 350 'filename': filename, 351 'status': 'finished', 352 'total_bytes': os.path.getsize(encodeFilename(filename)), 353 }) 354 return True 355 356 min_sleep_interval = self.params.get('sleep_interval') 357 if min_sleep_interval: 358 max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) 359 sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) 360 self.to_screen( 361 '[download] Sleeping %s seconds...' % ( 362 int(sleep_interval) if sleep_interval.is_integer() 363 else '%.2f' % sleep_interval)) 364 time.sleep(sleep_interval) 365 366 return self.real_download(filename, info_dict) 367 368 def real_download(self, filename, info_dict): 369 """Real download process. Redefine in subclasses.""" 370 raise NotImplementedError('This method must be implemented by subclasses') 371 372 def _hook_progress(self, status): 373 for ph in self._progress_hooks: 374 ph(status) 375 376 def add_progress_hook(self, ph): 377 # See YoutubeDl.py (search for progress_hooks) for a description of 378 # this interface 379 self._progress_hooks.append(ph) 380 381 def _debug_cmd(self, args, exe=None): 382 if not self.params.get('verbose', False): 383 return 384 385 str_args = [decodeArgument(a) for a in args] 386 387 if exe is None: 388 exe = os.path.basename(str_args[0]) 389 390 self.to_screen('[debug] %s command line: %s' % ( 391 exe, shell_quote(str_args)))