youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

common.py (14774B)


      1 from __future__ import division, unicode_literals
      2 
      3 import os
      4 import re
      5 import sys
      6 import time
      7 import random
      8 
      9 from ..compat import compat_os_name
     10 from ..utils import (
     11     decodeArgument,
     12     encodeFilename,
     13     error_to_compat_str,
     14     format_bytes,
     15     shell_quote,
     16     timeconvert,
     17 )
     18 
     19 
     20 class FileDownloader(object):
     21     """File Downloader class.
     22 
     23     File downloader objects are the ones responsible of downloading the
     24     actual video file and writing it to disk.
     25 
     26     File downloaders accept a lot of parameters. In order not to saturate
     27     the object constructor with arguments, it receives a dictionary of
     28     options instead.
     29 
     30     Available options:
     31 
     32     verbose:            Print additional info to stdout.
     33     quiet:              Do not print messages to stdout.
     34     ratelimit:          Download speed limit, in bytes/sec.
     35     retries:            Number of times to retry for HTTP error 5xx
     36     buffersize:         Size of download buffer in bytes.
     37     noresizebuffer:     Do not automatically resize the download buffer.
     38     continuedl:         Try to continue downloads if possible.
     39     noprogress:         Do not print the progress bar.
     40     logtostderr:        Log messages to stderr instead of stdout.
     41     consoletitle:       Display progress in console window's titlebar.
     42     nopart:             Do not use temporary .part files.
     43     updatetime:         Use the Last-modified header to set output file timestamps.
     44     test:               Download only first bytes to test the downloader.
     45     min_filesize:       Skip files smaller than this size
     46     max_filesize:       Skip files larger than this size
     47     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
     48     external_downloader_args:  A list of additional command-line arguments for the
     49                         external downloader.
     50     hls_use_mpegts:     Use the mpegts container for HLS videos.
     51     http_chunk_size:    Size of a chunk for chunk-based HTTP downloading. May be
     52                         useful for bypassing bandwidth throttling imposed by
     53                         a webserver (experimental)
     54 
     55     Subclasses of this one must re-define the real_download method.
     56     """
     57 
     58     _TEST_FILE_SIZE = 10241
     59     params = None
     60 
     61     def __init__(self, ydl, params):
     62         """Create a FileDownloader object with the given options."""
     63         self.ydl = ydl
     64         self._progress_hooks = []
     65         self.params = params
     66         self.add_progress_hook(self.report_progress)
     67 
     68     @staticmethod
     69     def format_seconds(seconds):
     70         (mins, secs) = divmod(seconds, 60)
     71         (hours, mins) = divmod(mins, 60)
     72         if hours > 99:
     73             return '--:--:--'
     74         if hours == 0:
     75             return '%02d:%02d' % (mins, secs)
     76         else:
     77             return '%02d:%02d:%02d' % (hours, mins, secs)
     78 
     79     @staticmethod
     80     def calc_percent(byte_counter, data_len):
     81         if data_len is None:
     82             return None
     83         return float(byte_counter) / float(data_len) * 100.0
     84 
     85     @staticmethod
     86     def format_percent(percent):
     87         if percent is None:
     88             return '---.-%'
     89         return '%6s' % ('%3.1f%%' % percent)
     90 
     91     @staticmethod
     92     def calc_eta(start, now, total, current):
     93         if total is None:
     94             return None
     95         if now is None:
     96             now = time.time()
     97         dif = now - start
     98         if current == 0 or dif < 0.001:  # One millisecond
     99             return None
    100         rate = float(current) / dif
    101         return int((float(total) - float(current)) / rate)
    102 
    103     @staticmethod
    104     def format_eta(eta):
    105         if eta is None:
    106             return '--:--'
    107         return FileDownloader.format_seconds(eta)
    108 
    109     @staticmethod
    110     def calc_speed(start, now, bytes):
    111         dif = now - start
    112         if bytes == 0 or dif < 0.001:  # One millisecond
    113             return None
    114         return float(bytes) / dif
    115 
    116     @staticmethod
    117     def format_speed(speed):
    118         if speed is None:
    119             return '%10s' % '---b/s'
    120         return '%10s' % ('%s/s' % format_bytes(speed))
    121 
    122     @staticmethod
    123     def format_retries(retries):
    124         return 'inf' if retries == float('inf') else '%.0f' % retries
    125 
    126     @staticmethod
    127     def best_block_size(elapsed_time, bytes):
    128         new_min = max(bytes / 2.0, 1.0)
    129         new_max = min(max(bytes * 2.0, 1.0), 4194304)  # Do not surpass 4 MB
    130         if elapsed_time < 0.001:
    131             return int(new_max)
    132         rate = bytes / elapsed_time
    133         if rate > new_max:
    134             return int(new_max)
    135         if rate < new_min:
    136             return int(new_min)
    137         return int(rate)
    138 
    139     @staticmethod
    140     def parse_bytes(bytestr):
    141         """Parse a string indicating a byte quantity into an integer."""
    142         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
    143         if matchobj is None:
    144             return None
    145         number = float(matchobj.group(1))
    146         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
    147         return int(round(number * multiplier))
    148 
    149     def to_screen(self, *args, **kargs):
    150         self.ydl.to_screen(*args, **kargs)
    151 
    152     def to_stderr(self, message):
    153         self.ydl.to_screen(message)
    154 
    155     def to_console_title(self, message):
    156         self.ydl.to_console_title(message)
    157 
    158     def trouble(self, *args, **kargs):
    159         self.ydl.trouble(*args, **kargs)
    160 
    161     def report_warning(self, *args, **kargs):
    162         self.ydl.report_warning(*args, **kargs)
    163 
    164     def report_error(self, *args, **kargs):
    165         self.ydl.report_error(*args, **kargs)
    166 
    167     def slow_down(self, start_time, now, byte_counter):
    168         """Sleep if the download speed is over the rate limit."""
    169         rate_limit = self.params.get('ratelimit')
    170         if rate_limit is None or byte_counter == 0:
    171             return
    172         if now is None:
    173             now = time.time()
    174         elapsed = now - start_time
    175         if elapsed <= 0.0:
    176             return
    177         speed = float(byte_counter) / elapsed
    178         if speed > rate_limit:
    179             sleep_time = float(byte_counter) / rate_limit - elapsed
    180             if sleep_time > 0:
    181                 time.sleep(sleep_time)
    182 
    183     def temp_name(self, filename):
    184         """Returns a temporary filename for the given filename."""
    185         if self.params.get('nopart', False) or filename == '-' or \
    186                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
    187             return filename
    188         return filename + '.part'
    189 
    190     def undo_temp_name(self, filename):
    191         if filename.endswith('.part'):
    192             return filename[:-len('.part')]
    193         return filename
    194 
    195     def ytdl_filename(self, filename):
    196         return filename + '.ytdl'
    197 
    198     def try_rename(self, old_filename, new_filename):
    199         try:
    200             if old_filename == new_filename:
    201                 return
    202             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
    203         except (IOError, OSError) as err:
    204             self.report_error('unable to rename file: %s' % error_to_compat_str(err))
    205 
    206     def try_utime(self, filename, last_modified_hdr):
    207         """Try to set the last-modified time of the given file."""
    208         if last_modified_hdr is None:
    209             return
    210         if not os.path.isfile(encodeFilename(filename)):
    211             return
    212         timestr = last_modified_hdr
    213         if timestr is None:
    214             return
    215         filetime = timeconvert(timestr)
    216         if filetime is None:
    217             return filetime
    218         # Ignore obviously invalid dates
    219         if filetime == 0:
    220             return
    221         try:
    222             os.utime(filename, (time.time(), filetime))
    223         except Exception:
    224             pass
    225         return filetime
    226 
    227     def report_destination(self, filename):
    228         """Report destination filename."""
    229         self.to_screen('[download] Destination: ' + filename)
    230 
    231     def _report_progress_status(self, msg, is_last_line=False):
    232         fullmsg = '[download] ' + msg
    233         if self.params.get('progress_with_newline', False):
    234             self.to_screen(fullmsg)
    235         else:
    236             if compat_os_name == 'nt':
    237                 prev_len = getattr(self, '_report_progress_prev_line_length',
    238                                    0)
    239                 if prev_len > len(fullmsg):
    240                     fullmsg += ' ' * (prev_len - len(fullmsg))
    241                 self._report_progress_prev_line_length = len(fullmsg)
    242                 clear_line = '\r'
    243             else:
    244                 clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
    245             self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
    246         self.to_console_title('youtube-dl ' + msg)
    247 
    248     def report_progress(self, s):
    249         if s['status'] == 'finished':
    250             if self.params.get('noprogress', False):
    251                 self.to_screen('[download] Download completed')
    252             else:
    253                 msg_template = '100%%'
    254                 if s.get('total_bytes') is not None:
    255                     s['_total_bytes_str'] = format_bytes(s['total_bytes'])
    256                     msg_template += ' of %(_total_bytes_str)s'
    257                 if s.get('elapsed') is not None:
    258                     s['_elapsed_str'] = self.format_seconds(s['elapsed'])
    259                     msg_template += ' in %(_elapsed_str)s'
    260                 self._report_progress_status(
    261                     msg_template % s, is_last_line=True)
    262 
    263         if self.params.get('noprogress'):
    264             return
    265 
    266         if s['status'] != 'downloading':
    267             return
    268 
    269         if s.get('eta') is not None:
    270             s['_eta_str'] = self.format_eta(s['eta'])
    271         else:
    272             s['_eta_str'] = 'Unknown ETA'
    273 
    274         if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
    275             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
    276         elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
    277             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
    278         else:
    279             if s.get('downloaded_bytes') == 0:
    280                 s['_percent_str'] = self.format_percent(0)
    281             else:
    282                 s['_percent_str'] = 'Unknown %'
    283 
    284         if s.get('speed') is not None:
    285             s['_speed_str'] = self.format_speed(s['speed'])
    286         else:
    287             s['_speed_str'] = 'Unknown speed'
    288 
    289         if s.get('total_bytes') is not None:
    290             s['_total_bytes_str'] = format_bytes(s['total_bytes'])
    291             msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
    292         elif s.get('total_bytes_estimate') is not None:
    293             s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
    294             msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
    295         else:
    296             if s.get('downloaded_bytes') is not None:
    297                 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
    298                 if s.get('elapsed'):
    299                     s['_elapsed_str'] = self.format_seconds(s['elapsed'])
    300                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
    301                 else:
    302                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
    303             else:
    304                 msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
    305 
    306         self._report_progress_status(msg_template % s)
    307 
    308     def report_resuming_byte(self, resume_len):
    309         """Report attempt to resume at given byte."""
    310         self.to_screen('[download] Resuming download at byte %s' % resume_len)
    311 
    312     def report_retry(self, err, count, retries):
    313         """Report retry in case of HTTP error 5xx"""
    314         self.to_screen(
    315             '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
    316             % (error_to_compat_str(err), count, self.format_retries(retries)))
    317 
    318     def report_file_already_downloaded(self, file_name):
    319         """Report file has already been fully downloaded."""
    320         try:
    321             self.to_screen('[download] %s has already been downloaded' % file_name)
    322         except UnicodeEncodeError:
    323             self.to_screen('[download] The file has already been downloaded')
    324 
    325     def report_unable_to_resume(self):
    326         """Report it was impossible to resume download."""
    327         self.to_screen('[download] Unable to resume')
    328 
    329     def download(self, filename, info_dict):
    330         """Download to a filename using the info from info_dict
    331         Return True on success and False otherwise
    332         """
    333 
    334         nooverwrites_and_exists = (
    335             self.params.get('nooverwrites', False)
    336             and os.path.exists(encodeFilename(filename))
    337         )
    338 
    339         if not hasattr(filename, 'write'):
    340             continuedl_and_exists = (
    341                 self.params.get('continuedl', True)
    342                 and os.path.isfile(encodeFilename(filename))
    343                 and not self.params.get('nopart', False)
    344             )
    345 
    346             # Check file already present
    347             if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
    348                 self.report_file_already_downloaded(filename)
    349                 self._hook_progress({
    350                     'filename': filename,
    351                     'status': 'finished',
    352                     'total_bytes': os.path.getsize(encodeFilename(filename)),
    353                 })
    354                 return True
    355 
    356         min_sleep_interval = self.params.get('sleep_interval')
    357         if min_sleep_interval:
    358             max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
    359             sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
    360             self.to_screen(
    361                 '[download] Sleeping %s seconds...' % (
    362                     int(sleep_interval) if sleep_interval.is_integer()
    363                     else '%.2f' % sleep_interval))
    364             time.sleep(sleep_interval)
    365 
    366         return self.real_download(filename, info_dict)
    367 
    368     def real_download(self, filename, info_dict):
    369         """Real download process. Redefine in subclasses."""
    370         raise NotImplementedError('This method must be implemented by subclasses')
    371 
    372     def _hook_progress(self, status):
    373         for ph in self._progress_hooks:
    374             ph(status)
    375 
    376     def add_progress_hook(self, ph):
    377         # See YoutubeDl.py (search for progress_hooks) for a description of
    378         # this interface
    379         self._progress_hooks.append(ph)
    380 
    381     def _debug_cmd(self, args, exe=None):
    382         if not self.params.get('verbose', False):
    383             return
    384 
    385         str_args = [decodeArgument(a) for a in args]
    386 
    387         if exe is None:
    388             exe = os.path.basename(str_args[0])
    389 
    390         self.to_screen('[debug] %s command line: %s' % (
    391             exe, shell_quote(str_args)))