youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

YoutubeDL.py (112834B)


      1 #!/usr/bin/env python
      2 # coding: utf-8
      3 
      4 from __future__ import absolute_import, unicode_literals
      5 
      6 import collections
      7 import contextlib
      8 import copy
      9 import datetime
     10 import errno
     11 import fileinput
     12 import io
     13 import itertools
     14 import json
     15 import locale
     16 import operator
     17 import os
     18 import platform
     19 import re
     20 import shutil
     21 import subprocess
     22 import socket
     23 import sys
     24 import time
     25 import tokenize
     26 import traceback
     27 import random
     28 
     29 from string import ascii_letters
     30 
     31 from .compat import (
     32     compat_basestring,
     33     compat_cookiejar,
     34     compat_get_terminal_size,
     35     compat_http_client,
     36     compat_kwargs,
     37     compat_numeric_types,
     38     compat_os_name,
     39     compat_str,
     40     compat_tokenize_tokenize,
     41     compat_urllib_error,
     42     compat_urllib_request,
     43     compat_urllib_request_DataHandler,
     44 )
     45 from .utils import (
     46     age_restricted,
     47     args_to_str,
     48     ContentTooShortError,
     49     date_from_str,
     50     DateRange,
     51     DEFAULT_OUTTMPL,
     52     determine_ext,
     53     determine_protocol,
     54     DownloadError,
     55     encode_compat_str,
     56     encodeFilename,
     57     error_to_compat_str,
     58     expand_path,
     59     ExtractorError,
     60     format_bytes,
     61     formatSeconds,
     62     GeoRestrictedError,
     63     int_or_none,
     64     ISO3166Utils,
     65     locked_file,
     66     make_HTTPS_handler,
     67     MaxDownloadsReached,
     68     orderedSet,
     69     PagedList,
     70     parse_filesize,
     71     PerRequestProxyHandler,
     72     platform_name,
     73     PostProcessingError,
     74     preferredencoding,
     75     prepend_extension,
     76     register_socks_protocols,
     77     render_table,
     78     replace_extension,
     79     SameFileError,
     80     sanitize_filename,
     81     sanitize_path,
     82     sanitize_url,
     83     sanitized_Request,
     84     std_headers,
     85     str_or_none,
     86     subtitles_filename,
     87     UnavailableVideoError,
     88     url_basename,
     89     version_tuple,
     90     write_json_file,
     91     write_string,
     92     YoutubeDLCookieJar,
     93     YoutubeDLCookieProcessor,
     94     YoutubeDLHandler,
     95     YoutubeDLRedirectHandler,
     96 )
     97 from .cache import Cache
     98 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
     99 from .extractor.openload import PhantomJSwrapper
    100 from .downloader import get_suitable_downloader
    101 from .downloader.rtmp import rtmpdump_version
    102 from .postprocessor import (
    103     FFmpegFixupM3u8PP,
    104     FFmpegFixupM4aPP,
    105     FFmpegFixupStretchedPP,
    106     FFmpegMergerPP,
    107     FFmpegPostProcessor,
    108     get_postprocessor,
    109 )
    110 from .version import __version__
    111 
    112 if compat_os_name == 'nt':
    113     import ctypes
    114 
    115 
    116 class YoutubeDL(object):
    117     """YoutubeDL class.
    118 
    119     YoutubeDL objects are the ones responsible of downloading the
    120     actual video file and writing it to disk if the user has requested
    121     it, among some other tasks. In most cases there should be one per
    122     program. As, given a video URL, the downloader doesn't know how to
    123     extract all the needed information, task that InfoExtractors do, it
    124     has to pass the URL to one of them.
    125 
    126     For this, YoutubeDL objects have a method that allows
    127     InfoExtractors to be registered in a given order. When it is passed
    128     a URL, the YoutubeDL object handles it to the first InfoExtractor it
    129     finds that reports being able to handle it. The InfoExtractor extracts
    130     all the information about the video or videos the URL refers to, and
    131     YoutubeDL process the extracted information, possibly using a File
    132     Downloader to download the video.
    133 
    134     YoutubeDL objects accept a lot of parameters. In order not to saturate
    135     the object constructor with arguments, it receives a dictionary of
    136     options instead. These options are available through the params
    137     attribute for the InfoExtractors to use. The YoutubeDL also
    138     registers itself as the downloader in charge for the InfoExtractors
    139     that are added to it, so this is a "mutual registration".
    140 
    141     Available options:
    142 
    143     username:          Username for authentication purposes.
    144     password:          Password for authentication purposes.
    145     videopassword:     Password for accessing a video.
    146     ap_mso:            Adobe Pass multiple-system operator identifier.
    147     ap_username:       Multiple-system operator account username.
    148     ap_password:       Multiple-system operator account password.
    149     usenetrc:          Use netrc for authentication instead.
    150     verbose:           Print additional info to stdout.
    151     quiet:             Do not print messages to stdout.
    152     no_warnings:       Do not print out anything for warnings.
    153     forceurl:          Force printing final URL.
    154     forcetitle:        Force printing title.
    155     forceid:           Force printing ID.
    156     forcethumbnail:    Force printing thumbnail URL.
    157     forcedescription:  Force printing description.
    158     forcefilename:     Force printing final filename.
    159     forceduration:     Force printing duration.
    160     forcejson:         Force printing info_dict as JSON.
    161     dump_single_json:  Force printing the info_dict of the whole playlist
    162                        (or video) as a single JSON line.
    163     simulate:          Do not download the video files.
    164     format:            Video format code. See options.py for more information.
    165     outtmpl:           Template for output names.
    166     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
    167     restrictfilenames: Do not allow "&" and spaces in file names
    168     ignoreerrors:      Do not stop on download errors.
    169     force_generic_extractor: Force downloader to use the generic extractor
    170     nooverwrites:      Prevent overwriting files.
    171     playliststart:     Playlist item to start at.
    172     playlistend:       Playlist item to end at.
    173     playlist_items:    Specific indices of playlist to download.
    174     playlistreverse:   Download playlist items in reverse order.
    175     playlistrandom:    Download playlist items in random order.
    176     matchtitle:        Download only matching titles.
    177     rejecttitle:       Reject downloads for matching titles.
    178     logger:            Log messages to a logging.Logger instance.
    179     logtostderr:       Log messages to stderr instead of stdout.
    180     writedescription:  Write the video description to a .description file
    181     writeinfojson:     Write the video description to a .info.json file
    182     writeannotations:  Write the video annotations to a .annotations.xml file
    183     writethumbnail:    Write the thumbnail image to a file
    184     write_all_thumbnails:  Write all thumbnail formats to files
    185     writesubtitles:    Write the video subtitles to a file
    186     writeautomaticsub: Write the automatically generated subtitles to a file
    187     allsubtitles:      Downloads all the subtitles of the video
    188                        (requires writesubtitles or writeautomaticsub)
    189     listsubtitles:     Lists all available subtitles for the video
    190     subtitlesformat:   The format code for subtitles
    191     subtitleslangs:    List of languages of the subtitles to download
    192     keepvideo:         Keep the video file after post-processing
    193     daterange:         A DateRange object, download only if the upload_date is in the range.
    194     skip_download:     Skip the actual download of the video file
    195     cachedir:          Location of the cache files in the filesystem.
    196                        False to disable filesystem cache.
    197     noplaylist:        Download single video instead of a playlist if in doubt.
    198     age_limit:         An integer representing the user's age in years.
    199                        Unsuitable videos for the given age are skipped.
    200     min_views:         An integer representing the minimum view count the video
    201                        must have in order to not be skipped.
    202                        Videos without view count information are always
    203                        downloaded. None for no limit.
    204     max_views:         An integer representing the maximum view count.
    205                        Videos that are more popular than that are not
    206                        downloaded.
    207                        Videos without view count information are always
    208                        downloaded. None for no limit.
    209     download_archive:  File name of a file where all downloads are recorded.
    210                        Videos already present in the file are not downloaded
    211                        again.
    212     cookiefile:        File name where cookies should be read from and dumped to.
    213     nocheckcertificate:Do not verify SSL certificates
    214     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
    215                        At the moment, this is only supported by YouTube.
    216     proxy:             URL of the proxy server to use
    217     geo_verification_proxy:  URL of the proxy to use for IP address verification
    218                        on geo-restricted sites.
    219     socket_timeout:    Time to wait for unresponsive hosts, in seconds
    220     bidi_workaround:   Work around buggy terminals without bidirectional text
    221                        support, using fridibi
    222     debug_printtraffic:Print out sent and received HTTP traffic
    223     include_ads:       Download ads as well
    224     default_search:    Prepend this string if an input url is not valid.
    225                        'auto' for elaborate guessing
    226     encoding:          Use this encoding instead of the system-specified.
    227     extract_flat:      Do not resolve URLs, return the immediate result.
    228                        Pass in 'in_playlist' to only show this behavior for
    229                        playlist items.
    230     postprocessors:    A list of dictionaries, each with an entry
    231                        * key:  The name of the postprocessor. See
    232                                youtube_dl/postprocessor/__init__.py for a list.
    233                        as well as any further keyword arguments for the
    234                        postprocessor.
    235     progress_hooks:    A list of functions that get called on download
    236                        progress, with a dictionary with the entries
    237                        * status: One of "downloading", "error", or "finished".
    238                                  Check this first and ignore unknown values.
    239 
    240                        If status is one of "downloading", or "finished", the
    241                        following properties may also be present:
    242                        * filename: The final filename (always present)
    243                        * tmpfilename: The filename we're currently writing to
    244                        * downloaded_bytes: Bytes on disk
    245                        * total_bytes: Size of the whole file, None if unknown
    246                        * total_bytes_estimate: Guess of the eventual file size,
    247                                                None if unavailable.
    248                        * elapsed: The number of seconds since download started.
    249                        * eta: The estimated time in seconds, None if unknown
    250                        * speed: The download speed in bytes/second, None if
    251                                 unknown
    252                        * fragment_index: The counter of the currently
    253                                          downloaded video fragment.
    254                        * fragment_count: The number of fragments (= individual
    255                                          files that will be merged)
    256 
    257                        Progress hooks are guaranteed to be called at least once
    258                        (with status "finished") if the download is successful.
    259     merge_output_format: Extension to use when merging formats.
    260     fixup:             Automatically correct known faults of the file.
    261                        One of:
    262                        - "never": do nothing
    263                        - "warn": only emit a warning
    264                        - "detect_or_warn": check whether we can do anything
    265                                            about it, warn otherwise (default)
    266     source_address:    Client-side IP address to bind to.
    267     call_home:         Boolean, true iff we are allowed to contact the
    268                        youtube-dl servers for debugging.
    269     sleep_interval:    Number of seconds to sleep before each download when
    270                        used alone or a lower bound of a range for randomized
    271                        sleep before each download (minimum possible number
    272                        of seconds to sleep) when used along with
    273                        max_sleep_interval.
    274     max_sleep_interval:Upper bound of a range for randomized sleep before each
    275                        download (maximum possible number of seconds to sleep).
    276                        Must only be used along with sleep_interval.
    277                        Actual sleep time will be a random float from range
    278                        [sleep_interval; max_sleep_interval].
    279     listformats:       Print an overview of available video formats and exit.
    280     list_thumbnails:   Print a table of all thumbnails and exit.
    281     match_filter:      A function that gets called with the info_dict of
    282                        every video.
    283                        If it returns a message, the video is ignored.
    284                        If it returns None, the video is downloaded.
    285                        match_filter_func in utils.py is one example for this.
    286     no_color:          Do not emit color codes in output.
    287     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
    288                        HTTP header
    289     geo_bypass_country:
    290                        Two-letter ISO 3166-2 country code that will be used for
    291                        explicit geographic restriction bypassing via faking
    292                        X-Forwarded-For HTTP header
    293     geo_bypass_ip_block:
    294                        IP range in CIDR notation that will be used similarly to
    295                        geo_bypass_country
    296 
    297     The following options determine which downloader is picked:
    298     external_downloader: Executable of the external downloader to call.
    299                        None or unset for standard (built-in) downloader.
    300     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
    301                        if True, otherwise use ffmpeg/avconv if False, otherwise
    302                        use downloader suggested by extractor if None.
    303 
    304     The following parameters are not used by YoutubeDL itself, they are used by
    305     the downloader (see youtube_dl/downloader/common.py):
    306     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
    307     noresizebuffer, retries, continuedl, noprogress, consoletitle,
    308     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
    309     http_chunk_size.
    310 
    311     The following options are used by the post processors:
    312     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
    313                        otherwise prefer ffmpeg.
    314     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
    315                        to the binary or its containing directory.
    316     postprocessor_args: A list of additional command-line arguments for the
    317                         postprocessor.
    318 
    319     The following options are used by the Youtube extractor:
    320     youtube_include_dash_manifest: If True (default), DASH manifests and related
    321                         data will be downloaded and processed by extractor.
    322                         You can reduce network I/O by disabling it if you don't
    323                         care about DASH.
    324     """
    325 
    326     _NUMERIC_FIELDS = set((
    327         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
    328         'timestamp', 'upload_year', 'upload_month', 'upload_day',
    329         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
    330         'average_rating', 'comment_count', 'age_limit',
    331         'start_time', 'end_time',
    332         'chapter_number', 'season_number', 'episode_number',
    333         'track_number', 'disc_number', 'release_year',
    334         'playlist_index',
    335     ))
    336 
    337     params = None
    338     _ies = []
    339     _pps = []
    340     _download_retcode = None
    341     _num_downloads = None
    342     _playlist_level = 0
    343     _playlist_urls = set()
    344     _screen_file = None
    345 
    346     def __init__(self, params=None, auto_init=True):
    347         """Create a FileDownloader object with the given options."""
    348         if params is None:
    349             params = {}
    350         self._ies = []
    351         self._ies_instances = {}
    352         self._pps = []
    353         self._progress_hooks = []
    354         self._download_retcode = 0
    355         self._num_downloads = 0
    356         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
    357         self._err_file = sys.stderr
    358         self.params = {
    359             # Default parameters
    360             'nocheckcertificate': False,
    361         }
    362         self.params.update(params)
    363         self.cache = Cache(self)
    364 
    365         def check_deprecated(param, option, suggestion):
    366             if self.params.get(param) is not None:
    367                 self.report_warning(
    368                     '%s is deprecated. Use %s instead.' % (option, suggestion))
    369                 return True
    370             return False
    371 
    372         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
    373             if self.params.get('geo_verification_proxy') is None:
    374                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
    375 
    376         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
    377         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
    378         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
    379 
    380         if params.get('bidi_workaround', False):
    381             try:
    382                 import pty
    383                 master, slave = pty.openpty()
    384                 width = compat_get_terminal_size().columns
    385                 if width is None:
    386                     width_args = []
    387                 else:
    388                     width_args = ['-w', str(width)]
    389                 sp_kwargs = dict(
    390                     stdin=subprocess.PIPE,
    391                     stdout=slave,
    392                     stderr=self._err_file)
    393                 try:
    394                     self._output_process = subprocess.Popen(
    395                         ['bidiv'] + width_args, **sp_kwargs
    396                     )
    397                 except OSError:
    398                     self._output_process = subprocess.Popen(
    399                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
    400                 self._output_channel = os.fdopen(master, 'rb')
    401             except OSError as ose:
    402                 if ose.errno == errno.ENOENT:
    403                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
    404                 else:
    405                     raise
    406 
    407         if (sys.platform != 'win32'
    408                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
    409                 and not params.get('restrictfilenames', False)):
    410             # Unicode filesystem API will throw errors (#1474, #13027)
    411             self.report_warning(
    412                 'Assuming --restrict-filenames since file system encoding '
    413                 'cannot encode all characters. '
    414                 'Set the LC_ALL environment variable to fix this.')
    415             self.params['restrictfilenames'] = True
    416 
    417         if isinstance(params.get('outtmpl'), bytes):
    418             self.report_warning(
    419                 'Parameter outtmpl is bytes, but should be a unicode string. '
    420                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
    421 
    422         self._setup_opener()
    423 
    424         if auto_init:
    425             self.print_debug_header()
    426             self.add_default_info_extractors()
    427 
    428         for pp_def_raw in self.params.get('postprocessors', []):
    429             pp_class = get_postprocessor(pp_def_raw['key'])
    430             pp_def = dict(pp_def_raw)
    431             del pp_def['key']
    432             pp = pp_class(self, **compat_kwargs(pp_def))
    433             self.add_post_processor(pp)
    434 
    435         for ph in self.params.get('progress_hooks', []):
    436             self.add_progress_hook(ph)
    437 
    438         register_socks_protocols()
    439 
    440     def warn_if_short_id(self, argv):
    441         # short YouTube ID starting with dash?
    442         idxs = [
    443             i for i, a in enumerate(argv)
    444             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
    445         if idxs:
    446             correct_argv = (
    447                 ['youtube-dl']
    448                 + [a for i, a in enumerate(argv) if i not in idxs]
    449                 + ['--'] + [argv[i] for i in idxs]
    450             )
    451             self.report_warning(
    452                 'Long argument string detected. '
    453                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
    454                 args_to_str(correct_argv))
    455 
    456     def add_info_extractor(self, ie):
    457         """Add an InfoExtractor object to the end of the list."""
    458         self._ies.append(ie)
    459         if not isinstance(ie, type):
    460             self._ies_instances[ie.ie_key()] = ie
    461             ie.set_downloader(self)
    462 
    463     def get_info_extractor(self, ie_key):
    464         """
    465         Get an instance of an IE with name ie_key, it will try to get one from
    466         the _ies list, if there's no instance it will create a new one and add
    467         it to the extractor list.
    468         """
    469         ie = self._ies_instances.get(ie_key)
    470         if ie is None:
    471             ie = get_info_extractor(ie_key)()
    472             self.add_info_extractor(ie)
    473         return ie
    474 
    475     def add_default_info_extractors(self):
    476         """
    477         Add the InfoExtractors returned by gen_extractors to the end of the list
    478         """
    479         for ie in gen_extractor_classes():
    480             self.add_info_extractor(ie)
    481 
    482     def add_post_processor(self, pp):
    483         """Add a PostProcessor object to the end of the chain."""
    484         self._pps.append(pp)
    485         pp.set_downloader(self)
    486 
    487     def add_progress_hook(self, ph):
    488         """Add the progress hook (currently only for the file downloader)"""
    489         self._progress_hooks.append(ph)
    490 
    491     def _bidi_workaround(self, message):
    492         if not hasattr(self, '_output_channel'):
    493             return message
    494 
    495         assert hasattr(self, '_output_process')
    496         assert isinstance(message, compat_str)
    497         line_count = message.count('\n') + 1
    498         self._output_process.stdin.write((message + '\n').encode('utf-8'))
    499         self._output_process.stdin.flush()
    500         res = ''.join(self._output_channel.readline().decode('utf-8')
    501                       for _ in range(line_count))
    502         return res[:-len('\n')]
    503 
    504     def to_screen(self, message, skip_eol=False):
    505         """Print message to stdout if not in quiet mode."""
    506         return self.to_stdout(message, skip_eol, check_quiet=True)
    507 
    508     def _write_string(self, s, out=None):
    509         write_string(s, out=out, encoding=self.params.get('encoding'))
    510 
    511     def to_stdout(self, message, skip_eol=False, check_quiet=False):
    512         """Print message to stdout if not in quiet mode."""
    513         if self.params.get('logger'):
    514             self.params['logger'].debug(message)
    515         elif not check_quiet or not self.params.get('quiet', False):
    516             message = self._bidi_workaround(message)
    517             terminator = ['\n', ''][skip_eol]
    518             output = message + terminator
    519 
    520             self._write_string(output, self._screen_file)
    521 
    522     def to_stderr(self, message):
    523         """Print message to stderr."""
    524         assert isinstance(message, compat_str)
    525         if self.params.get('logger'):
    526             self.params['logger'].error(message)
    527         else:
    528             message = self._bidi_workaround(message)
    529             output = message + '\n'
    530             self._write_string(output, self._err_file)
    531 
    532     def to_console_title(self, message):
    533         if not self.params.get('consoletitle', False):
    534             return
    535         if compat_os_name == 'nt':
    536             if ctypes.windll.kernel32.GetConsoleWindow():
    537                 # c_wchar_p() might not be necessary if `message` is
    538                 # already of type unicode()
    539                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
    540         elif 'TERM' in os.environ:
    541             self._write_string('\033]0;%s\007' % message, self._screen_file)
    542 
    543     def save_console_title(self):
    544         if not self.params.get('consoletitle', False):
    545             return
    546         if self.params.get('simulate', False):
    547             return
    548         if compat_os_name != 'nt' and 'TERM' in os.environ:
    549             # Save the title on stack
    550             self._write_string('\033[22;0t', self._screen_file)
    551 
    552     def restore_console_title(self):
    553         if not self.params.get('consoletitle', False):
    554             return
    555         if self.params.get('simulate', False):
    556             return
    557         if compat_os_name != 'nt' and 'TERM' in os.environ:
    558             # Restore the title from stack
    559             self._write_string('\033[23;0t', self._screen_file)
    560 
    561     def __enter__(self):
    562         self.save_console_title()
    563         return self
    564 
    565     def __exit__(self, *args):
    566         self.restore_console_title()
    567 
    568         if self.params.get('cookiefile') is not None:
    569             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
    570 
    571     def trouble(self, message=None, tb=None):
    572         """Determine action to take when a download problem appears.
    573 
    574         Depending on if the downloader has been configured to ignore
    575         download errors or not, this method may throw an exception or
    576         not when errors are found, after printing the message.
    577 
    578         tb, if given, is additional traceback information.
    579         """
    580         if message is not None:
    581             self.to_stderr(message)
    582         if self.params.get('verbose'):
    583             if tb is None:
    584                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
    585                     tb = ''
    586                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
    587                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
    588                     tb += encode_compat_str(traceback.format_exc())
    589                 else:
    590                     tb_data = traceback.format_list(traceback.extract_stack())
    591                     tb = ''.join(tb_data)
    592             self.to_stderr(tb)
    593         if not self.params.get('ignoreerrors', False):
    594             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
    595                 exc_info = sys.exc_info()[1].exc_info
    596             else:
    597                 exc_info = sys.exc_info()
    598             raise DownloadError(message, exc_info)
    599         self._download_retcode = 1
    600 
    601     def report_warning(self, message):
    602         '''
    603         Print the message to stderr, it will be prefixed with 'WARNING:'
    604         If stderr is a tty file the 'WARNING:' will be colored
    605         '''
    606         if self.params.get('logger') is not None:
    607             self.params['logger'].warning(message)
    608         else:
    609             if self.params.get('no_warnings'):
    610                 return
    611             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
    612                 _msg_header = '\033[0;33mWARNING:\033[0m'
    613             else:
    614                 _msg_header = 'WARNING:'
    615             warning_message = '%s %s' % (_msg_header, message)
    616             self.to_stderr(warning_message)
    617 
    618     def report_error(self, message, tb=None):
    619         '''
    620         Do the same as trouble, but prefixes the message with 'ERROR:', colored
    621         in red if stderr is a tty file.
    622         '''
    623         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
    624             _msg_header = '\033[0;31mERROR:\033[0m'
    625         else:
    626             _msg_header = 'ERROR:'
    627         error_message = '%s %s' % (_msg_header, message)
    628         self.trouble(error_message, tb)
    629 
    630     def report_file_already_downloaded(self, file_name):
    631         """Report file has already been fully downloaded."""
    632         try:
    633             self.to_screen('[download] %s has already been downloaded' % file_name)
    634         except UnicodeEncodeError:
    635             self.to_screen('[download] The file has already been downloaded')
    636 
    637     def prepare_filename(self, info_dict):
    638         """Generate the output filename."""
    639         try:
    640             template_dict = dict(info_dict)
    641 
    642             template_dict['epoch'] = int(time.time())
    643             autonumber_size = self.params.get('autonumber_size')
    644             if autonumber_size is None:
    645                 autonumber_size = 5
    646             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
    647             if template_dict.get('resolution') is None:
    648                 if template_dict.get('width') and template_dict.get('height'):
    649                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
    650                 elif template_dict.get('height'):
    651                     template_dict['resolution'] = '%sp' % template_dict['height']
    652                 elif template_dict.get('width'):
    653                     template_dict['resolution'] = '%dx?' % template_dict['width']
    654 
    655             sanitize = lambda k, v: sanitize_filename(
    656                 compat_str(v),
    657                 restricted=self.params.get('restrictfilenames'),
    658                 is_id=(k == 'id' or k.endswith('_id')))
    659             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
    660                                  for k, v in template_dict.items()
    661                                  if v is not None and not isinstance(v, (list, tuple, dict)))
    662             template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
    663 
    664             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
    665 
    666             # For fields playlist_index and autonumber convert all occurrences
    667             # of %(field)s to %(field)0Nd for backward compatibility
    668             field_size_compat_map = {
    669                 'playlist_index': len(str(template_dict['n_entries'])),
    670                 'autonumber': autonumber_size,
    671             }
    672             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
    673             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
    674             if mobj:
    675                 outtmpl = re.sub(
    676                     FIELD_SIZE_COMPAT_RE,
    677                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
    678                     outtmpl)
    679 
    680             # Missing numeric fields used together with integer presentation types
    681             # in format specification will break the argument substitution since
    682             # string NA placeholder is returned for missing fields. We will patch
    683             # output template for missing fields to meet string presentation type.
    684             for numeric_field in self._NUMERIC_FIELDS:
    685                 if numeric_field not in template_dict:
    686                     # As of [1] format syntax is:
    687                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
    688                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
    689                     FORMAT_RE = r'''(?x)
    690                         (?<!%)
    691                         %
    692                         \({0}\)  # mapping key
    693                         (?:[#0\-+ ]+)?  # conversion flags (optional)
    694                         (?:\d+)?  # minimum field width (optional)
    695                         (?:\.\d+)?  # precision (optional)
    696                         [hlL]?  # length modifier (optional)
    697                         [diouxXeEfFgGcrs%]  # conversion type
    698                     '''
    699                     outtmpl = re.sub(
    700                         FORMAT_RE.format(numeric_field),
    701                         r'%({0})s'.format(numeric_field), outtmpl)
    702 
    703             # expand_path translates '%%' into '%' and '$$' into '$'
    704             # correspondingly that is not what we want since we need to keep
    705             # '%%' intact for template dict substitution step. Working around
    706             # with boundary-alike separator hack.
    707             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
    708             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
    709 
    710             # outtmpl should be expand_path'ed before template dict substitution
    711             # because meta fields may contain env variables we don't want to
    712             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
    713             # title "Hello $PATH", we don't want `$PATH` to be expanded.
    714             filename = expand_path(outtmpl).replace(sep, '') % template_dict
    715 
    716             # Temporary fix for #4787
    717             # 'Treat' all problem characters by passing filename through preferredencoding
    718             # to workaround encoding issues with subprocess on python2 @ Windows
    719             if sys.version_info < (3, 0) and sys.platform == 'win32':
    720                 filename = encodeFilename(filename, True).decode(preferredencoding())
    721             return sanitize_path(filename)
    722         except ValueError as err:
    723             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
    724             return None
    725 
    726     def _match_entry(self, info_dict, incomplete):
    727         """ Returns None iff the file should be downloaded """
    728 
    729         video_title = info_dict.get('title', info_dict.get('id', 'video'))
    730         if 'title' in info_dict:
    731             # This can happen when we're just evaluating the playlist
    732             title = info_dict['title']
    733             matchtitle = self.params.get('matchtitle', False)
    734             if matchtitle:
    735                 if not re.search(matchtitle, title, re.IGNORECASE):
    736                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
    737             rejecttitle = self.params.get('rejecttitle', False)
    738             if rejecttitle:
    739                 if re.search(rejecttitle, title, re.IGNORECASE):
    740                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
    741         date = info_dict.get('upload_date')
    742         if date is not None:
    743             dateRange = self.params.get('daterange', DateRange())
    744             if date not in dateRange:
    745                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
    746         view_count = info_dict.get('view_count')
    747         if view_count is not None:
    748             min_views = self.params.get('min_views')
    749             if min_views is not None and view_count < min_views:
    750                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
    751             max_views = self.params.get('max_views')
    752             if max_views is not None and view_count > max_views:
    753                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
    754         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
    755             return 'Skipping "%s" because it is age restricted' % video_title
    756         if self.in_download_archive(info_dict):
    757             return '%s has already been recorded in archive' % video_title
    758 
    759         if not incomplete:
    760             match_filter = self.params.get('match_filter')
    761             if match_filter is not None:
    762                 ret = match_filter(info_dict)
    763                 if ret is not None:
    764                     return ret
    765 
    766         return None
    767 
    768     @staticmethod
    769     def add_extra_info(info_dict, extra_info):
    770         '''Set the keys from extra_info in info dict if they are missing'''
    771         for key, value in extra_info.items():
    772             info_dict.setdefault(key, value)
    773 
    774     def extract_info(self, url, download=True, ie_key=None, extra_info={},
    775                      process=True, force_generic_extractor=False):
    776         """
    777         Return a list with a dictionary for each video extracted.
    778 
    779         Arguments:
    780         url -- URL to extract
    781 
    782         Keyword arguments:
    783         download -- whether to download videos during extraction
    784         ie_key -- extractor key hint
    785         extra_info -- dictionary containing the extra values to add to each result
    786         process -- whether to resolve all unresolved references (URLs, playlist items),
    787             must be True for download to work.
    788         force_generic_extractor -- force using the generic extractor
    789         """
    790 
    791         if not ie_key and force_generic_extractor:
    792             ie_key = 'Generic'
    793 
    794         if ie_key:
    795             ies = [self.get_info_extractor(ie_key)]
    796         else:
    797             ies = self._ies
    798 
    799         for ie in ies:
    800             if not ie.suitable(url):
    801                 continue
    802 
    803             ie = self.get_info_extractor(ie.ie_key())
    804             if not ie.working():
    805                 self.report_warning('The program functionality for this site has been marked as broken, '
    806                                     'and will probably not work.')
    807 
    808             return self.__extract_info(url, ie, download, extra_info, process)
    809         else:
    810             self.report_error('no suitable InfoExtractor for URL %s' % url)
    811 
    812     def __handle_extraction_exceptions(func):
    813         def wrapper(self, *args, **kwargs):
    814             try:
    815                 return func(self, *args, **kwargs)
    816             except GeoRestrictedError as e:
    817                 msg = e.msg
    818                 if e.countries:
    819                     msg += '\nThis video is available in %s.' % ', '.join(
    820                         map(ISO3166Utils.short2full, e.countries))
    821                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
    822                 self.report_error(msg)
    823             except ExtractorError as e:  # An error we somewhat expected
    824                 self.report_error(compat_str(e), e.format_traceback())
    825             except MaxDownloadsReached:
    826                 raise
    827             except Exception as e:
    828                 if self.params.get('ignoreerrors', False):
    829                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
    830                 else:
    831                     raise
    832         return wrapper
    833 
    834     @__handle_extraction_exceptions
    835     def __extract_info(self, url, ie, download, extra_info, process):
    836         ie_result = ie.extract(url)
    837         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
    838             return
    839         if isinstance(ie_result, list):
    840             # Backwards compatibility: old IE result format
    841             ie_result = {
    842                 '_type': 'compat_list',
    843                 'entries': ie_result,
    844             }
    845         self.add_default_extra_info(ie_result, ie, url)
    846         if process:
    847             return self.process_ie_result(ie_result, download, extra_info)
    848         else:
    849             return ie_result
    850 
    851     def add_default_extra_info(self, ie_result, ie, url):
    852         self.add_extra_info(ie_result, {
    853             'extractor': ie.IE_NAME,
    854             'webpage_url': url,
    855             'webpage_url_basename': url_basename(url),
    856             'extractor_key': ie.ie_key(),
    857         })
    858 
    859     def process_ie_result(self, ie_result, download=True, extra_info={}):
    860         """
    861         Take the result of the ie(may be modified) and resolve all unresolved
    862         references (URLs, playlist items).
    863 
    864         It will also download the videos if 'download'.
    865         Returns the resolved ie_result.
    866         """
    867         result_type = ie_result.get('_type', 'video')
    868 
    869         if result_type in ('url', 'url_transparent'):
    870             ie_result['url'] = sanitize_url(ie_result['url'])
    871             extract_flat = self.params.get('extract_flat', False)
    872             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
    873                     or extract_flat is True):
    874                 self.__forced_printings(
    875                     ie_result, self.prepare_filename(ie_result),
    876                     incomplete=True)
    877                 return ie_result
    878 
    879         if result_type == 'video':
    880             self.add_extra_info(ie_result, extra_info)
    881             return self.process_video_result(ie_result, download=download)
    882         elif result_type == 'url':
    883             # We have to add extra_info to the results because it may be
    884             # contained in a playlist
    885             return self.extract_info(ie_result['url'],
    886                                      download,
    887                                      ie_key=ie_result.get('ie_key'),
    888                                      extra_info=extra_info)
    889         elif result_type == 'url_transparent':
    890             # Use the information from the embedding page
    891             info = self.extract_info(
    892                 ie_result['url'], ie_key=ie_result.get('ie_key'),
    893                 extra_info=extra_info, download=False, process=False)
    894 
    895             # extract_info may return None when ignoreerrors is enabled and
    896             # extraction failed with an error, don't crash and return early
    897             # in this case
    898             if not info:
    899                 return info
    900 
    901             force_properties = dict(
    902                 (k, v) for k, v in ie_result.items() if v is not None)
    903             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
    904                 if f in force_properties:
    905                     del force_properties[f]
    906             new_result = info.copy()
    907             new_result.update(force_properties)
    908 
    909             # Extracted info may not be a video result (i.e.
    910             # info.get('_type', 'video') != video) but rather an url or
    911             # url_transparent. In such cases outer metadata (from ie_result)
    912             # should be propagated to inner one (info). For this to happen
    913             # _type of info should be overridden with url_transparent. This
    914             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
    915             if new_result.get('_type') == 'url':
    916                 new_result['_type'] = 'url_transparent'
    917 
    918             return self.process_ie_result(
    919                 new_result, download=download, extra_info=extra_info)
    920         elif result_type in ('playlist', 'multi_video'):
    921             # Protect from infinite recursion due to recursively nested playlists
    922             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
    923             webpage_url = ie_result['webpage_url']
    924             if webpage_url in self._playlist_urls:
    925                 self.to_screen(
    926                     '[download] Skipping already downloaded playlist: %s'
    927                     % ie_result.get('title') or ie_result.get('id'))
    928                 return
    929 
    930             self._playlist_level += 1
    931             self._playlist_urls.add(webpage_url)
    932             try:
    933                 return self.__process_playlist(ie_result, download)
    934             finally:
    935                 self._playlist_level -= 1
    936                 if not self._playlist_level:
    937                     self._playlist_urls.clear()
    938         elif result_type == 'compat_list':
    939             self.report_warning(
    940                 'Extractor %s returned a compat_list result. '
    941                 'It needs to be updated.' % ie_result.get('extractor'))
    942 
    943             def _fixup(r):
    944                 self.add_extra_info(
    945                     r,
    946                     {
    947                         'extractor': ie_result['extractor'],
    948                         'webpage_url': ie_result['webpage_url'],
    949                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
    950                         'extractor_key': ie_result['extractor_key'],
    951                     }
    952                 )
    953                 return r
    954             ie_result['entries'] = [
    955                 self.process_ie_result(_fixup(r), download, extra_info)
    956                 for r in ie_result['entries']
    957             ]
    958             return ie_result
    959         else:
    960             raise Exception('Invalid result type: %s' % result_type)
    961 
    962     def __process_playlist(self, ie_result, download):
    963         # We process each entry in the playlist
    964         playlist = ie_result.get('title') or ie_result.get('id')
    965 
    966         self.to_screen('[download] Downloading playlist: %s' % playlist)
    967 
    968         playlist_results = []
    969 
    970         playliststart = self.params.get('playliststart', 1) - 1
    971         playlistend = self.params.get('playlistend')
    972         # For backwards compatibility, interpret -1 as whole list
    973         if playlistend == -1:
    974             playlistend = None
    975 
    976         playlistitems_str = self.params.get('playlist_items')
    977         playlistitems = None
    978         if playlistitems_str is not None:
    979             def iter_playlistitems(format):
    980                 for string_segment in format.split(','):
    981                     if '-' in string_segment:
    982                         start, end = string_segment.split('-')
    983                         for item in range(int(start), int(end) + 1):
    984                             yield int(item)
    985                     else:
    986                         yield int(string_segment)
    987             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
    988 
    989         ie_entries = ie_result['entries']
    990 
    991         def make_playlistitems_entries(list_ie_entries):
    992             num_entries = len(list_ie_entries)
    993             return [
    994                 list_ie_entries[i - 1] for i in playlistitems
    995                 if -num_entries <= i - 1 < num_entries]
    996 
    997         def report_download(num_entries):
    998             self.to_screen(
    999                 '[%s] playlist %s: Downloading %d videos' %
   1000                 (ie_result['extractor'], playlist, num_entries))
   1001 
   1002         if isinstance(ie_entries, list):
   1003             n_all_entries = len(ie_entries)
   1004             if playlistitems:
   1005                 entries = make_playlistitems_entries(ie_entries)
   1006             else:
   1007                 entries = ie_entries[playliststart:playlistend]
   1008             n_entries = len(entries)
   1009             self.to_screen(
   1010                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
   1011                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
   1012         elif isinstance(ie_entries, PagedList):
   1013             if playlistitems:
   1014                 entries = []
   1015                 for item in playlistitems:
   1016                     entries.extend(ie_entries.getslice(
   1017                         item - 1, item
   1018                     ))
   1019             else:
   1020                 entries = ie_entries.getslice(
   1021                     playliststart, playlistend)
   1022             n_entries = len(entries)
   1023             report_download(n_entries)
   1024         else:  # iterable
   1025             if playlistitems:
   1026                 entries = make_playlistitems_entries(list(itertools.islice(
   1027                     ie_entries, 0, max(playlistitems))))
   1028             else:
   1029                 entries = list(itertools.islice(
   1030                     ie_entries, playliststart, playlistend))
   1031             n_entries = len(entries)
   1032             report_download(n_entries)
   1033 
   1034         if self.params.get('playlistreverse', False):
   1035             entries = entries[::-1]
   1036 
   1037         if self.params.get('playlistrandom', False):
   1038             random.shuffle(entries)
   1039 
   1040         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
   1041 
   1042         for i, entry in enumerate(entries, 1):
   1043             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
   1044             # This __x_forwarded_for_ip thing is a bit ugly but requires
   1045             # minimal changes
   1046             if x_forwarded_for:
   1047                 entry['__x_forwarded_for_ip'] = x_forwarded_for
   1048             extra = {
   1049                 'n_entries': n_entries,
   1050                 'playlist': playlist,
   1051                 'playlist_id': ie_result.get('id'),
   1052                 'playlist_title': ie_result.get('title'),
   1053                 'playlist_uploader': ie_result.get('uploader'),
   1054                 'playlist_uploader_id': ie_result.get('uploader_id'),
   1055                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
   1056                 'extractor': ie_result['extractor'],
   1057                 'webpage_url': ie_result['webpage_url'],
   1058                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
   1059                 'extractor_key': ie_result['extractor_key'],
   1060             }
   1061 
   1062             reason = self._match_entry(entry, incomplete=True)
   1063             if reason is not None:
   1064                 self.to_screen('[download] ' + reason)
   1065                 continue
   1066 
   1067             entry_result = self.__process_iterable_entry(entry, download, extra)
   1068             # TODO: skip failed (empty) entries?
   1069             playlist_results.append(entry_result)
   1070         ie_result['entries'] = playlist_results
   1071         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
   1072         return ie_result
   1073 
   1074     @__handle_extraction_exceptions
   1075     def __process_iterable_entry(self, entry, download, extra_info):
   1076         return self.process_ie_result(
   1077             entry, download=download, extra_info=extra_info)
   1078 
   1079     def _build_format_filter(self, filter_spec):
   1080         " Returns a function to filter the formats according to the filter_spec "
   1081 
   1082         OPERATORS = {
   1083             '<': operator.lt,
   1084             '<=': operator.le,
   1085             '>': operator.gt,
   1086             '>=': operator.ge,
   1087             '=': operator.eq,
   1088             '!=': operator.ne,
   1089         }
   1090         operator_rex = re.compile(r'''(?x)\s*
   1091             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
   1092             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
   1093             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
   1094             $
   1095             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
   1096         m = operator_rex.search(filter_spec)
   1097         if m:
   1098             try:
   1099                 comparison_value = int(m.group('value'))
   1100             except ValueError:
   1101                 comparison_value = parse_filesize(m.group('value'))
   1102                 if comparison_value is None:
   1103                     comparison_value = parse_filesize(m.group('value') + 'B')
   1104                 if comparison_value is None:
   1105                     raise ValueError(
   1106                         'Invalid value %r in format specification %r' % (
   1107                             m.group('value'), filter_spec))
   1108             op = OPERATORS[m.group('op')]
   1109 
   1110         if not m:
   1111             STR_OPERATORS = {
   1112                 '=': operator.eq,
   1113                 '^=': lambda attr, value: attr.startswith(value),
   1114                 '$=': lambda attr, value: attr.endswith(value),
   1115                 '*=': lambda attr, value: value in attr,
   1116             }
   1117             str_operator_rex = re.compile(r'''(?x)
   1118                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
   1119                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
   1120                 \s*(?P<value>[a-zA-Z0-9._-]+)
   1121                 \s*$
   1122                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
   1123             m = str_operator_rex.search(filter_spec)
   1124             if m:
   1125                 comparison_value = m.group('value')
   1126                 str_op = STR_OPERATORS[m.group('op')]
   1127                 if m.group('negation'):
   1128                     op = lambda attr, value: not str_op(attr, value)
   1129                 else:
   1130                     op = str_op
   1131 
   1132         if not m:
   1133             raise ValueError('Invalid filter specification %r' % filter_spec)
   1134 
   1135         def _filter(f):
   1136             actual_value = f.get(m.group('key'))
   1137             if actual_value is None:
   1138                 return m.group('none_inclusive')
   1139             return op(actual_value, comparison_value)
   1140         return _filter
   1141 
   1142     def _default_format_spec(self, info_dict, download=True):
   1143 
   1144         def can_merge():
   1145             merger = FFmpegMergerPP(self)
   1146             return merger.available and merger.can_merge()
   1147 
   1148         def prefer_best():
   1149             if self.params.get('simulate', False):
   1150                 return False
   1151             if not download:
   1152                 return False
   1153             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
   1154                 return True
   1155             if info_dict.get('is_live'):
   1156                 return True
   1157             if not can_merge():
   1158                 return True
   1159             return False
   1160 
   1161         req_format_list = ['bestvideo+bestaudio', 'best']
   1162         if prefer_best():
   1163             req_format_list.reverse()
   1164         return '/'.join(req_format_list)
   1165 
   1166     def build_format_selector(self, format_spec):
   1167         def syntax_error(note, start):
   1168             message = (
   1169                 'Invalid format specification: '
   1170                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
   1171             return SyntaxError(message)
   1172 
   1173         PICKFIRST = 'PICKFIRST'
   1174         MERGE = 'MERGE'
   1175         SINGLE = 'SINGLE'
   1176         GROUP = 'GROUP'
   1177         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
   1178 
   1179         def _parse_filter(tokens):
   1180             filter_parts = []
   1181             for type, string, start, _, _ in tokens:
   1182                 if type == tokenize.OP and string == ']':
   1183                     return ''.join(filter_parts)
   1184                 else:
   1185                     filter_parts.append(string)
   1186 
   1187         def _remove_unused_ops(tokens):
   1188             # Remove operators that we don't use and join them with the surrounding strings
   1189             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
   1190             ALLOWED_OPS = ('/', '+', ',', '(', ')')
   1191             last_string, last_start, last_end, last_line = None, None, None, None
   1192             for type, string, start, end, line in tokens:
   1193                 if type == tokenize.OP and string == '[':
   1194                     if last_string:
   1195                         yield tokenize.NAME, last_string, last_start, last_end, last_line
   1196                         last_string = None
   1197                     yield type, string, start, end, line
   1198                     # everything inside brackets will be handled by _parse_filter
   1199                     for type, string, start, end, line in tokens:
   1200                         yield type, string, start, end, line
   1201                         if type == tokenize.OP and string == ']':
   1202                             break
   1203                 elif type == tokenize.OP and string in ALLOWED_OPS:
   1204                     if last_string:
   1205                         yield tokenize.NAME, last_string, last_start, last_end, last_line
   1206                         last_string = None
   1207                     yield type, string, start, end, line
   1208                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
   1209                     if not last_string:
   1210                         last_string = string
   1211                         last_start = start
   1212                         last_end = end
   1213                     else:
   1214                         last_string += string
   1215             if last_string:
   1216                 yield tokenize.NAME, last_string, last_start, last_end, last_line
   1217 
   1218         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
   1219             selectors = []
   1220             current_selector = None
   1221             for type, string, start, _, _ in tokens:
   1222                 # ENCODING is only defined in python 3.x
   1223                 if type == getattr(tokenize, 'ENCODING', None):
   1224                     continue
   1225                 elif type in [tokenize.NAME, tokenize.NUMBER]:
   1226                     current_selector = FormatSelector(SINGLE, string, [])
   1227                 elif type == tokenize.OP:
   1228                     if string == ')':
   1229                         if not inside_group:
   1230                             # ')' will be handled by the parentheses group
   1231                             tokens.restore_last_token()
   1232                         break
   1233                     elif inside_merge and string in ['/', ',']:
   1234                         tokens.restore_last_token()
   1235                         break
   1236                     elif inside_choice and string == ',':
   1237                         tokens.restore_last_token()
   1238                         break
   1239                     elif string == ',':
   1240                         if not current_selector:
   1241                             raise syntax_error('"," must follow a format selector', start)
   1242                         selectors.append(current_selector)
   1243                         current_selector = None
   1244                     elif string == '/':
   1245                         if not current_selector:
   1246                             raise syntax_error('"/" must follow a format selector', start)
   1247                         first_choice = current_selector
   1248                         second_choice = _parse_format_selection(tokens, inside_choice=True)
   1249                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
   1250                     elif string == '[':
   1251                         if not current_selector:
   1252                             current_selector = FormatSelector(SINGLE, 'best', [])
   1253                         format_filter = _parse_filter(tokens)
   1254                         current_selector.filters.append(format_filter)
   1255                     elif string == '(':
   1256                         if current_selector:
   1257                             raise syntax_error('Unexpected "("', start)
   1258                         group = _parse_format_selection(tokens, inside_group=True)
   1259                         current_selector = FormatSelector(GROUP, group, [])
   1260                     elif string == '+':
   1261                         if inside_merge:
   1262                             raise syntax_error('Unexpected "+"', start)
   1263                         video_selector = current_selector
   1264                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
   1265                         if not video_selector or not audio_selector:
   1266                             raise syntax_error('"+" must be between two format selectors', start)
   1267                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
   1268                     else:
   1269                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
   1270                 elif type == tokenize.ENDMARKER:
   1271                     break
   1272             if current_selector:
   1273                 selectors.append(current_selector)
   1274             return selectors
   1275 
   1276         def _build_selector_function(selector):
   1277             if isinstance(selector, list):
   1278                 fs = [_build_selector_function(s) for s in selector]
   1279 
   1280                 def selector_function(ctx):
   1281                     for f in fs:
   1282                         for format in f(ctx):
   1283                             yield format
   1284                 return selector_function
   1285             elif selector.type == GROUP:
   1286                 selector_function = _build_selector_function(selector.selector)
   1287             elif selector.type == PICKFIRST:
   1288                 fs = [_build_selector_function(s) for s in selector.selector]
   1289 
   1290                 def selector_function(ctx):
   1291                     for f in fs:
   1292                         picked_formats = list(f(ctx))
   1293                         if picked_formats:
   1294                             return picked_formats
   1295                     return []
   1296             elif selector.type == SINGLE:
   1297                 format_spec = selector.selector
   1298 
   1299                 def selector_function(ctx):
   1300                     formats = list(ctx['formats'])
   1301                     if not formats:
   1302                         return
   1303                     if format_spec == 'all':
   1304                         for f in formats:
   1305                             yield f
   1306                     elif format_spec in ['best', 'worst', None]:
   1307                         format_idx = 0 if format_spec == 'worst' else -1
   1308                         audiovideo_formats = [
   1309                             f for f in formats
   1310                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
   1311                         if audiovideo_formats:
   1312                             yield audiovideo_formats[format_idx]
   1313                         # for extractors with incomplete formats (audio only (soundcloud)
   1314                         # or video only (imgur)) we will fallback to best/worst
   1315                         # {video,audio}-only format
   1316                         elif ctx['incomplete_formats']:
   1317                             yield formats[format_idx]
   1318                     elif format_spec == 'bestaudio':
   1319                         audio_formats = [
   1320                             f for f in formats
   1321                             if f.get('vcodec') == 'none']
   1322                         if audio_formats:
   1323                             yield audio_formats[-1]
   1324                     elif format_spec == 'worstaudio':
   1325                         audio_formats = [
   1326                             f for f in formats
   1327                             if f.get('vcodec') == 'none']
   1328                         if audio_formats:
   1329                             yield audio_formats[0]
   1330                     elif format_spec == 'bestvideo':
   1331                         video_formats = [
   1332                             f for f in formats
   1333                             if f.get('acodec') == 'none']
   1334                         if video_formats:
   1335                             yield video_formats[-1]
   1336                     elif format_spec == 'worstvideo':
   1337                         video_formats = [
   1338                             f for f in formats
   1339                             if f.get('acodec') == 'none']
   1340                         if video_formats:
   1341                             yield video_formats[0]
   1342                     else:
   1343                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
   1344                         if format_spec in extensions:
   1345                             filter_f = lambda f: f['ext'] == format_spec
   1346                         else:
   1347                             filter_f = lambda f: f['format_id'] == format_spec
   1348                         matches = list(filter(filter_f, formats))
   1349                         if matches:
   1350                             yield matches[-1]
   1351             elif selector.type == MERGE:
   1352                 def _merge(formats_info):
   1353                     format_1, format_2 = [f['format_id'] for f in formats_info]
   1354                     # The first format must contain the video and the
   1355                     # second the audio
   1356                     if formats_info[0].get('vcodec') == 'none':
   1357                         self.report_error('The first format must '
   1358                                           'contain the video, try using '
   1359                                           '"-f %s+%s"' % (format_2, format_1))
   1360                         return
   1361                     # Formats must be opposite (video+audio)
   1362                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
   1363                         self.report_error(
   1364                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
   1365                             % (format_1, format_2))
   1366                         return
   1367                     output_ext = (
   1368                         formats_info[0]['ext']
   1369                         if self.params.get('merge_output_format') is None
   1370                         else self.params['merge_output_format'])
   1371                     return {
   1372                         'requested_formats': formats_info,
   1373                         'format': '%s+%s' % (formats_info[0].get('format'),
   1374                                              formats_info[1].get('format')),
   1375                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
   1376                                                 formats_info[1].get('format_id')),
   1377                         'width': formats_info[0].get('width'),
   1378                         'height': formats_info[0].get('height'),
   1379                         'resolution': formats_info[0].get('resolution'),
   1380                         'fps': formats_info[0].get('fps'),
   1381                         'vcodec': formats_info[0].get('vcodec'),
   1382                         'vbr': formats_info[0].get('vbr'),
   1383                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
   1384                         'acodec': formats_info[1].get('acodec'),
   1385                         'abr': formats_info[1].get('abr'),
   1386                         'ext': output_ext,
   1387                     }
   1388                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
   1389 
   1390                 def selector_function(ctx):
   1391                     for pair in itertools.product(
   1392                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
   1393                         yield _merge(pair)
   1394 
   1395             filters = [self._build_format_filter(f) for f in selector.filters]
   1396 
   1397             def final_selector(ctx):
   1398                 ctx_copy = copy.deepcopy(ctx)
   1399                 for _filter in filters:
   1400                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
   1401                 return selector_function(ctx_copy)
   1402             return final_selector
   1403 
   1404         stream = io.BytesIO(format_spec.encode('utf-8'))
   1405         try:
   1406             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
   1407         except tokenize.TokenError:
   1408             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
   1409 
   1410         class TokenIterator(object):
   1411             def __init__(self, tokens):
   1412                 self.tokens = tokens
   1413                 self.counter = 0
   1414 
   1415             def __iter__(self):
   1416                 return self
   1417 
   1418             def __next__(self):
   1419                 if self.counter >= len(self.tokens):
   1420                     raise StopIteration()
   1421                 value = self.tokens[self.counter]
   1422                 self.counter += 1
   1423                 return value
   1424 
   1425             next = __next__
   1426 
   1427             def restore_last_token(self):
   1428                 self.counter -= 1
   1429 
   1430         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
   1431         return _build_selector_function(parsed_selector)
   1432 
   1433     def _calc_headers(self, info_dict):
   1434         res = std_headers.copy()
   1435 
   1436         add_headers = info_dict.get('http_headers')
   1437         if add_headers:
   1438             res.update(add_headers)
   1439 
   1440         cookies = self._calc_cookies(info_dict)
   1441         if cookies:
   1442             res['Cookie'] = cookies
   1443 
   1444         if 'X-Forwarded-For' not in res:
   1445             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
   1446             if x_forwarded_for_ip:
   1447                 res['X-Forwarded-For'] = x_forwarded_for_ip
   1448 
   1449         return res
   1450 
   1451     def _calc_cookies(self, info_dict):
   1452         pr = sanitized_Request(info_dict['url'])
   1453         self.cookiejar.add_cookie_header(pr)
   1454         return pr.get_header('Cookie')
   1455 
   1456     def process_video_result(self, info_dict, download=True):
   1457         assert info_dict.get('_type', 'video') == 'video'
   1458 
   1459         if 'id' not in info_dict:
   1460             raise ExtractorError('Missing "id" field in extractor result')
   1461         if 'title' not in info_dict:
   1462             raise ExtractorError('Missing "title" field in extractor result')
   1463 
   1464         def report_force_conversion(field, field_not, conversion):
   1465             self.report_warning(
   1466                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
   1467                 % (field, field_not, conversion))
   1468 
   1469         def sanitize_string_field(info, string_field):
   1470             field = info.get(string_field)
   1471             if field is None or isinstance(field, compat_str):
   1472                 return
   1473             report_force_conversion(string_field, 'a string', 'string')
   1474             info[string_field] = compat_str(field)
   1475 
   1476         def sanitize_numeric_fields(info):
   1477             for numeric_field in self._NUMERIC_FIELDS:
   1478                 field = info.get(numeric_field)
   1479                 if field is None or isinstance(field, compat_numeric_types):
   1480                     continue
   1481                 report_force_conversion(numeric_field, 'numeric', 'int')
   1482                 info[numeric_field] = int_or_none(field)
   1483 
   1484         sanitize_string_field(info_dict, 'id')
   1485         sanitize_numeric_fields(info_dict)
   1486 
   1487         if 'playlist' not in info_dict:
   1488             # It isn't part of a playlist
   1489             info_dict['playlist'] = None
   1490             info_dict['playlist_index'] = None
   1491 
   1492         thumbnails = info_dict.get('thumbnails')
   1493         if thumbnails is None:
   1494             thumbnail = info_dict.get('thumbnail')
   1495             if thumbnail:
   1496                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
   1497         if thumbnails:
   1498             thumbnails.sort(key=lambda t: (
   1499                 t.get('preference') if t.get('preference') is not None else -1,
   1500                 t.get('width') if t.get('width') is not None else -1,
   1501                 t.get('height') if t.get('height') is not None else -1,
   1502                 t.get('id') if t.get('id') is not None else '', t.get('url')))
   1503             for i, t in enumerate(thumbnails):
   1504                 t['url'] = sanitize_url(t['url'])
   1505                 if t.get('width') and t.get('height'):
   1506                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
   1507                 if t.get('id') is None:
   1508                     t['id'] = '%d' % i
   1509 
   1510         if self.params.get('list_thumbnails'):
   1511             self.list_thumbnails(info_dict)
   1512             return
   1513 
   1514         thumbnail = info_dict.get('thumbnail')
   1515         if thumbnail:
   1516             info_dict['thumbnail'] = sanitize_url(thumbnail)
   1517         elif thumbnails:
   1518             info_dict['thumbnail'] = thumbnails[-1]['url']
   1519 
   1520         if 'display_id' not in info_dict and 'id' in info_dict:
   1521             info_dict['display_id'] = info_dict['id']
   1522 
   1523         for ts_key, date_key in (
   1524                 ('timestamp', 'upload_date'),
   1525                 ('release_timestamp', 'release_date'),
   1526         ):
   1527             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
   1528                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
   1529                 # see http://bugs.python.org/issue1646728)
   1530                 try:
   1531                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
   1532                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
   1533                 except (ValueError, OverflowError, OSError):
   1534                     pass
   1535 
   1536         # Auto generate title fields corresponding to the *_number fields when missing
   1537         # in order to always have clean titles. This is very common for TV series.
   1538         for field in ('chapter', 'season', 'episode'):
   1539             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
   1540                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
   1541 
   1542         for cc_kind in ('subtitles', 'automatic_captions'):
   1543             cc = info_dict.get(cc_kind)
   1544             if cc:
   1545                 for _, subtitle in cc.items():
   1546                     for subtitle_format in subtitle:
   1547                         if subtitle_format.get('url'):
   1548                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
   1549                         if subtitle_format.get('ext') is None:
   1550                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
   1551 
   1552         automatic_captions = info_dict.get('automatic_captions')
   1553         subtitles = info_dict.get('subtitles')
   1554 
   1555         if self.params.get('listsubtitles', False):
   1556             if 'automatic_captions' in info_dict:
   1557                 self.list_subtitles(
   1558                     info_dict['id'], automatic_captions, 'automatic captions')
   1559             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
   1560             return
   1561 
   1562         info_dict['requested_subtitles'] = self.process_subtitles(
   1563             info_dict['id'], subtitles, automatic_captions)
   1564 
   1565         # We now pick which formats have to be downloaded
   1566         if info_dict.get('formats') is None:
   1567             # There's only one format available
   1568             formats = [info_dict]
   1569         else:
   1570             formats = info_dict['formats']
   1571 
   1572         if not formats:
   1573             raise ExtractorError('No video formats found!')
   1574 
   1575         def is_wellformed(f):
   1576             url = f.get('url')
   1577             if not url:
   1578                 self.report_warning(
   1579                     '"url" field is missing or empty - skipping format, '
   1580                     'there is an error in extractor')
   1581                 return False
   1582             if isinstance(url, bytes):
   1583                 sanitize_string_field(f, 'url')
   1584             return True
   1585 
   1586         # Filter out malformed formats for better extraction robustness
   1587         formats = list(filter(is_wellformed, formats))
   1588 
   1589         formats_dict = {}
   1590 
   1591         # We check that all the formats have the format and format_id fields
   1592         for i, format in enumerate(formats):
   1593             sanitize_string_field(format, 'format_id')
   1594             sanitize_numeric_fields(format)
   1595             format['url'] = sanitize_url(format['url'])
   1596             if not format.get('format_id'):
   1597                 format['format_id'] = compat_str(i)
   1598             else:
   1599                 # Sanitize format_id from characters used in format selector expression
   1600                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
   1601             format_id = format['format_id']
   1602             if format_id not in formats_dict:
   1603                 formats_dict[format_id] = []
   1604             formats_dict[format_id].append(format)
   1605 
   1606         # Make sure all formats have unique format_id
   1607         for format_id, ambiguous_formats in formats_dict.items():
   1608             if len(ambiguous_formats) > 1:
   1609                 for i, format in enumerate(ambiguous_formats):
   1610                     format['format_id'] = '%s-%d' % (format_id, i)
   1611 
   1612         for i, format in enumerate(formats):
   1613             if format.get('format') is None:
   1614                 format['format'] = '{id} - {res}{note}'.format(
   1615                     id=format['format_id'],
   1616                     res=self.format_resolution(format),
   1617                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
   1618                 )
   1619             # Automatically determine file extension if missing
   1620             if format.get('ext') is None:
   1621                 format['ext'] = determine_ext(format['url']).lower()
   1622             # Automatically determine protocol if missing (useful for format
   1623             # selection purposes)
   1624             if format.get('protocol') is None:
   1625                 format['protocol'] = determine_protocol(format)
   1626             # Add HTTP headers, so that external programs can use them from the
   1627             # json output
   1628             full_format_info = info_dict.copy()
   1629             full_format_info.update(format)
   1630             format['http_headers'] = self._calc_headers(full_format_info)
   1631         # Remove private housekeeping stuff
   1632         if '__x_forwarded_for_ip' in info_dict:
   1633             del info_dict['__x_forwarded_for_ip']
   1634 
   1635         # TODO Central sorting goes here
   1636 
   1637         if formats[0] is not info_dict:
   1638             # only set the 'formats' fields if the original info_dict list them
   1639             # otherwise we end up with a circular reference, the first (and unique)
   1640             # element in the 'formats' field in info_dict is info_dict itself,
   1641             # which can't be exported to json
   1642             info_dict['formats'] = formats
   1643         if self.params.get('listformats'):
   1644             self.list_formats(info_dict)
   1645             return
   1646 
   1647         req_format = self.params.get('format')
   1648         if req_format is None:
   1649             req_format = self._default_format_spec(info_dict, download=download)
   1650             if self.params.get('verbose'):
   1651                 self._write_string('[debug] Default format spec: %s\n' % req_format)
   1652 
   1653         format_selector = self.build_format_selector(req_format)
   1654 
   1655         # While in format selection we may need to have an access to the original
   1656         # format set in order to calculate some metrics or do some processing.
   1657         # For now we need to be able to guess whether original formats provided
   1658         # by extractor are incomplete or not (i.e. whether extractor provides only
   1659         # video-only or audio-only formats) for proper formats selection for
   1660         # extractors with such incomplete formats (see
   1661         # https://github.com/ytdl-org/youtube-dl/pull/5556).
   1662         # Since formats may be filtered during format selection and may not match
   1663         # the original formats the results may be incorrect. Thus original formats
   1664         # or pre-calculated metrics should be passed to format selection routines
   1665         # as well.
   1666         # We will pass a context object containing all necessary additional data
   1667         # instead of just formats.
   1668         # This fixes incorrect format selection issue (see
   1669         # https://github.com/ytdl-org/youtube-dl/issues/10083).
   1670         incomplete_formats = (
   1671             # All formats are video-only or
   1672             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
   1673             # all formats are audio-only
   1674             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
   1675 
   1676         ctx = {
   1677             'formats': formats,
   1678             'incomplete_formats': incomplete_formats,
   1679         }
   1680 
   1681         formats_to_download = list(format_selector(ctx))
   1682         if not formats_to_download:
   1683             raise ExtractorError('requested format not available',
   1684                                  expected=True)
   1685 
   1686         if download:
   1687             if len(formats_to_download) > 1:
   1688                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
   1689             for format in formats_to_download:
   1690                 new_info = dict(info_dict)
   1691                 new_info.update(format)
   1692                 self.process_info(new_info)
   1693         # We update the info dict with the best quality format (backwards compatibility)
   1694         info_dict.update(formats_to_download[-1])
   1695         return info_dict
   1696 
   1697     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
   1698         """Select the requested subtitles and their format"""
   1699         available_subs = {}
   1700         if normal_subtitles and self.params.get('writesubtitles'):
   1701             available_subs.update(normal_subtitles)
   1702         if automatic_captions and self.params.get('writeautomaticsub'):
   1703             for lang, cap_info in automatic_captions.items():
   1704                 if lang not in available_subs:
   1705                     available_subs[lang] = cap_info
   1706 
   1707         if (not self.params.get('writesubtitles') and not
   1708                 self.params.get('writeautomaticsub') or not
   1709                 available_subs):
   1710             return None
   1711 
   1712         if self.params.get('allsubtitles', False):
   1713             requested_langs = available_subs.keys()
   1714         else:
   1715             if self.params.get('subtitleslangs', False):
   1716                 requested_langs = self.params.get('subtitleslangs')
   1717             elif 'en' in available_subs:
   1718                 requested_langs = ['en']
   1719             else:
   1720                 requested_langs = [list(available_subs.keys())[0]]
   1721 
   1722         formats_query = self.params.get('subtitlesformat', 'best')
   1723         formats_preference = formats_query.split('/') if formats_query else []
   1724         subs = {}
   1725         for lang in requested_langs:
   1726             formats = available_subs.get(lang)
   1727             if formats is None:
   1728                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
   1729                 continue
   1730             for ext in formats_preference:
   1731                 if ext == 'best':
   1732                     f = formats[-1]
   1733                     break
   1734                 matches = list(filter(lambda f: f['ext'] == ext, formats))
   1735                 if matches:
   1736                     f = matches[-1]
   1737                     break
   1738             else:
   1739                 f = formats[-1]
   1740                 self.report_warning(
   1741                     'No subtitle format found matching "%s" for language %s, '
   1742                     'using %s' % (formats_query, lang, f['ext']))
   1743             subs[lang] = f
   1744         return subs
   1745 
   1746     def __forced_printings(self, info_dict, filename, incomplete):
   1747         def print_mandatory(field):
   1748             if (self.params.get('force%s' % field, False)
   1749                     and (not incomplete or info_dict.get(field) is not None)):
   1750                 self.to_stdout(info_dict[field])
   1751 
   1752         def print_optional(field):
   1753             if (self.params.get('force%s' % field, False)
   1754                     and info_dict.get(field) is not None):
   1755                 self.to_stdout(info_dict[field])
   1756 
   1757         print_mandatory('title')
   1758         print_mandatory('id')
   1759         if self.params.get('forceurl', False) and not incomplete:
   1760             if info_dict.get('requested_formats') is not None:
   1761                 for f in info_dict['requested_formats']:
   1762                     self.to_stdout(f['url'] + f.get('play_path', ''))
   1763             else:
   1764                 # For RTMP URLs, also include the playpath
   1765                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
   1766         print_optional('thumbnail')
   1767         print_optional('description')
   1768         if self.params.get('forcefilename', False) and filename is not None:
   1769             self.to_stdout(filename)
   1770         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
   1771             self.to_stdout(formatSeconds(info_dict['duration']))
   1772         print_mandatory('format')
   1773         if self.params.get('forcejson', False):
   1774             self.to_stdout(json.dumps(info_dict))
   1775 
   1776     def process_info(self, info_dict):
   1777         """Process a single resolved IE result."""
   1778 
   1779         assert info_dict.get('_type', 'video') == 'video'
   1780 
   1781         max_downloads = self.params.get('max_downloads')
   1782         if max_downloads is not None:
   1783             if self._num_downloads >= int(max_downloads):
   1784                 raise MaxDownloadsReached()
   1785 
   1786         # TODO: backward compatibility, to be removed
   1787         info_dict['fulltitle'] = info_dict['title']
   1788 
   1789         if 'format' not in info_dict:
   1790             info_dict['format'] = info_dict['ext']
   1791 
   1792         reason = self._match_entry(info_dict, incomplete=False)
   1793         if reason is not None:
   1794             self.to_screen('[download] ' + reason)
   1795             return
   1796 
   1797         self._num_downloads += 1
   1798 
   1799         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
   1800 
   1801         # Forced printings
   1802         self.__forced_printings(info_dict, filename, incomplete=False)
   1803 
   1804         # Do nothing else if in simulate mode
   1805         if self.params.get('simulate', False):
   1806             return
   1807 
   1808         if filename is None:
   1809             return
   1810 
   1811         def ensure_dir_exists(path):
   1812             try:
   1813                 dn = os.path.dirname(path)
   1814                 if dn and not os.path.exists(dn):
   1815                     os.makedirs(dn)
   1816                 return True
   1817             except (OSError, IOError) as err:
   1818                 if isinstance(err, OSError) and err.errno == errno.EEXIST:
   1819                     return True
   1820                 self.report_error('unable to create directory ' + error_to_compat_str(err))
   1821                 return False
   1822 
   1823         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
   1824             return
   1825 
   1826         if self.params.get('writedescription', False):
   1827             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
   1828             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
   1829                 self.to_screen('[info] Video description is already present')
   1830             elif info_dict.get('description') is None:
   1831                 self.report_warning('There\'s no description to write.')
   1832             else:
   1833                 try:
   1834                     self.to_screen('[info] Writing video description to: ' + descfn)
   1835                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
   1836                         descfile.write(info_dict['description'])
   1837                 except (OSError, IOError):
   1838                     self.report_error('Cannot write description file ' + descfn)
   1839                     return
   1840 
   1841         if self.params.get('writeannotations', False):
   1842             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
   1843             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
   1844                 self.to_screen('[info] Video annotations are already present')
   1845             elif not info_dict.get('annotations'):
   1846                 self.report_warning('There are no annotations to write.')
   1847             else:
   1848                 try:
   1849                     self.to_screen('[info] Writing video annotations to: ' + annofn)
   1850                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
   1851                         annofile.write(info_dict['annotations'])
   1852                 except (KeyError, TypeError):
   1853                     self.report_warning('There are no annotations to write.')
   1854                 except (OSError, IOError):
   1855                     self.report_error('Cannot write annotations file: ' + annofn)
   1856                     return
   1857 
   1858         subtitles_are_requested = any([self.params.get('writesubtitles', False),
   1859                                        self.params.get('writeautomaticsub')])
   1860 
   1861         if subtitles_are_requested and info_dict.get('requested_subtitles'):
   1862             # subtitles download errors are already managed as troubles in relevant IE
   1863             # that way it will silently go on when used with unsupporting IE
   1864             subtitles = info_dict['requested_subtitles']
   1865             ie = self.get_info_extractor(info_dict['extractor_key'])
   1866             for sub_lang, sub_info in subtitles.items():
   1867                 sub_format = sub_info['ext']
   1868                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
   1869                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
   1870                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
   1871                 else:
   1872                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
   1873                     if sub_info.get('data') is not None:
   1874                         try:
   1875                             # Use newline='' to prevent conversion of newline characters
   1876                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
   1877                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
   1878                                 subfile.write(sub_info['data'])
   1879                         except (OSError, IOError):
   1880                             self.report_error('Cannot write subtitles file ' + sub_filename)
   1881                             return
   1882                     else:
   1883                         try:
   1884                             sub_data = ie._request_webpage(
   1885                                 sub_info['url'], info_dict['id'], note=False).read()
   1886                             with io.open(encodeFilename(sub_filename), 'wb') as subfile:
   1887                                 subfile.write(sub_data)
   1888                         except (ExtractorError, IOError, OSError, ValueError) as err:
   1889                             self.report_warning('Unable to download subtitle for "%s": %s' %
   1890                                                 (sub_lang, error_to_compat_str(err)))
   1891                             continue
   1892 
   1893         if self.params.get('writeinfojson', False):
   1894             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
   1895             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
   1896                 self.to_screen('[info] Video description metadata is already present')
   1897             else:
   1898                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
   1899                 try:
   1900                     write_json_file(self.filter_requested_info(info_dict), infofn)
   1901                 except (OSError, IOError):
   1902                     self.report_error('Cannot write metadata to JSON file ' + infofn)
   1903                     return
   1904 
   1905         self._write_thumbnails(info_dict, filename)
   1906 
   1907         if not self.params.get('skip_download', False):
   1908             try:
   1909                 def dl(name, info):
   1910                     fd = get_suitable_downloader(info, self.params)(self, self.params)
   1911                     for ph in self._progress_hooks:
   1912                         fd.add_progress_hook(ph)
   1913                     if self.params.get('verbose'):
   1914                         self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
   1915                     return fd.download(name, info)
   1916 
   1917                 if info_dict.get('requested_formats') is not None:
   1918                     downloaded = []
   1919                     success = True
   1920                     merger = FFmpegMergerPP(self)
   1921                     if not merger.available:
   1922                         postprocessors = []
   1923                         self.report_warning('You have requested multiple '
   1924                                             'formats but ffmpeg or avconv are not installed.'
   1925                                             ' The formats won\'t be merged.')
   1926                     else:
   1927                         postprocessors = [merger]
   1928 
   1929                     def compatible_formats(formats):
   1930                         video, audio = formats
   1931                         # Check extension
   1932                         video_ext, audio_ext = video.get('ext'), audio.get('ext')
   1933                         if video_ext and audio_ext:
   1934                             COMPATIBLE_EXTS = (
   1935                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
   1936                                 ('webm')
   1937                             )
   1938                             for exts in COMPATIBLE_EXTS:
   1939                                 if video_ext in exts and audio_ext in exts:
   1940                                     return True
   1941                         # TODO: Check acodec/vcodec
   1942                         return False
   1943 
   1944                     filename_real_ext = os.path.splitext(filename)[1][1:]
   1945                     filename_wo_ext = (
   1946                         os.path.splitext(filename)[0]
   1947                         if filename_real_ext == info_dict['ext']
   1948                         else filename)
   1949                     requested_formats = info_dict['requested_formats']
   1950                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
   1951                         info_dict['ext'] = 'mkv'
   1952                         self.report_warning(
   1953                             'Requested formats are incompatible for merge and will be merged into mkv.')
   1954                     # Ensure filename always has a correct extension for successful merge
   1955                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
   1956                     if os.path.exists(encodeFilename(filename)):
   1957                         self.to_screen(
   1958                             '[download] %s has already been downloaded and '
   1959                             'merged' % filename)
   1960                     else:
   1961                         for f in requested_formats:
   1962                             new_info = dict(info_dict)
   1963                             new_info.update(f)
   1964                             fname = prepend_extension(
   1965                                 self.prepare_filename(new_info),
   1966                                 'f%s' % f['format_id'], new_info['ext'])
   1967                             if not ensure_dir_exists(fname):
   1968                                 return
   1969                             downloaded.append(fname)
   1970                             partial_success = dl(fname, new_info)
   1971                             success = success and partial_success
   1972                         info_dict['__postprocessors'] = postprocessors
   1973                         info_dict['__files_to_merge'] = downloaded
   1974                 else:
   1975                     # Just a single file
   1976                     success = dl(filename, info_dict)
   1977             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
   1978                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
   1979                 return
   1980             except (OSError, IOError) as err:
   1981                 raise UnavailableVideoError(err)
   1982             except (ContentTooShortError, ) as err:
   1983                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
   1984                 return
   1985 
   1986             if success and filename != '-':
   1987                 # Fixup content
   1988                 fixup_policy = self.params.get('fixup')
   1989                 if fixup_policy is None:
   1990                     fixup_policy = 'detect_or_warn'
   1991 
   1992                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
   1993 
   1994                 stretched_ratio = info_dict.get('stretched_ratio')
   1995                 if stretched_ratio is not None and stretched_ratio != 1:
   1996                     if fixup_policy == 'warn':
   1997                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
   1998                             info_dict['id'], stretched_ratio))
   1999                     elif fixup_policy == 'detect_or_warn':
   2000                         stretched_pp = FFmpegFixupStretchedPP(self)
   2001                         if stretched_pp.available:
   2002                             info_dict.setdefault('__postprocessors', [])
   2003                             info_dict['__postprocessors'].append(stretched_pp)
   2004                         else:
   2005                             self.report_warning(
   2006                                 '%s: Non-uniform pixel ratio (%s). %s'
   2007                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
   2008                     else:
   2009                         assert fixup_policy in ('ignore', 'never')
   2010 
   2011                 if (info_dict.get('requested_formats') is None
   2012                         and info_dict.get('container') == 'm4a_dash'):
   2013                     if fixup_policy == 'warn':
   2014                         self.report_warning(
   2015                             '%s: writing DASH m4a. '
   2016                             'Only some players support this container.'
   2017                             % info_dict['id'])
   2018                     elif fixup_policy == 'detect_or_warn':
   2019                         fixup_pp = FFmpegFixupM4aPP(self)
   2020                         if fixup_pp.available:
   2021                             info_dict.setdefault('__postprocessors', [])
   2022                             info_dict['__postprocessors'].append(fixup_pp)
   2023                         else:
   2024                             self.report_warning(
   2025                                 '%s: writing DASH m4a. '
   2026                                 'Only some players support this container. %s'
   2027                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
   2028                     else:
   2029                         assert fixup_policy in ('ignore', 'never')
   2030 
   2031                 if (info_dict.get('protocol') == 'm3u8_native'
   2032                         or info_dict.get('protocol') == 'm3u8'
   2033                         and self.params.get('hls_prefer_native')):
   2034                     if fixup_policy == 'warn':
   2035                         self.report_warning('%s: malformed AAC bitstream detected.' % (
   2036                             info_dict['id']))
   2037                     elif fixup_policy == 'detect_or_warn':
   2038                         fixup_pp = FFmpegFixupM3u8PP(self)
   2039                         if fixup_pp.available:
   2040                             info_dict.setdefault('__postprocessors', [])
   2041                             info_dict['__postprocessors'].append(fixup_pp)
   2042                         else:
   2043                             self.report_warning(
   2044                                 '%s: malformed AAC bitstream detected. %s'
   2045                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
   2046                     else:
   2047                         assert fixup_policy in ('ignore', 'never')
   2048 
   2049                 try:
   2050                     self.post_process(filename, info_dict)
   2051                 except (PostProcessingError) as err:
   2052                     self.report_error('postprocessing: %s' % str(err))
   2053                     return
   2054                 self.record_download_archive(info_dict)
   2055 
   2056     def download(self, url_list):
   2057         """Download a given list of URLs."""
   2058         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
   2059         if (len(url_list) > 1
   2060                 and outtmpl != '-'
   2061                 and '%' not in outtmpl
   2062                 and self.params.get('max_downloads') != 1):
   2063             raise SameFileError(outtmpl)
   2064 
   2065         for url in url_list:
   2066             try:
   2067                 # It also downloads the videos
   2068                 res = self.extract_info(
   2069                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
   2070             except UnavailableVideoError:
   2071                 self.report_error('unable to download video')
   2072             except MaxDownloadsReached:
   2073                 self.to_screen('[info] Maximum number of downloaded files reached.')
   2074                 raise
   2075             else:
   2076                 if self.params.get('dump_single_json', False):
   2077                     self.to_stdout(json.dumps(res))
   2078 
   2079         return self._download_retcode
   2080 
   2081     def download_with_info_file(self, info_filename):
   2082         with contextlib.closing(fileinput.FileInput(
   2083                 [info_filename], mode='r',
   2084                 openhook=fileinput.hook_encoded('utf-8'))) as f:
   2085             # FileInput doesn't have a read method, we can't call json.load
   2086             info = self.filter_requested_info(json.loads('\n'.join(f)))
   2087         try:
   2088             self.process_ie_result(info, download=True)
   2089         except DownloadError:
   2090             webpage_url = info.get('webpage_url')
   2091             if webpage_url is not None:
   2092                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
   2093                 return self.download([webpage_url])
   2094             else:
   2095                 raise
   2096         return self._download_retcode
   2097 
   2098     @staticmethod
   2099     def filter_requested_info(info_dict):
   2100         return dict(
   2101             (k, v) for k, v in info_dict.items()
   2102             if k not in ['requested_formats', 'requested_subtitles'])
   2103 
   2104     def post_process(self, filename, ie_info):
   2105         """Run all the postprocessors on the given file."""
   2106         info = dict(ie_info)
   2107         info['filepath'] = filename
   2108         pps_chain = []
   2109         if ie_info.get('__postprocessors') is not None:
   2110             pps_chain.extend(ie_info['__postprocessors'])
   2111         pps_chain.extend(self._pps)
   2112         for pp in pps_chain:
   2113             files_to_delete = []
   2114             try:
   2115                 files_to_delete, info = pp.run(info)
   2116             except PostProcessingError as e:
   2117                 self.report_error(e.msg)
   2118             if files_to_delete and not self.params.get('keepvideo', False):
   2119                 for old_filename in files_to_delete:
   2120                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
   2121                     try:
   2122                         os.remove(encodeFilename(old_filename))
   2123                     except (IOError, OSError):
   2124                         self.report_warning('Unable to remove downloaded original file')
   2125 
   2126     def _make_archive_id(self, info_dict):
   2127         video_id = info_dict.get('id')
   2128         if not video_id:
   2129             return
   2130         # Future-proof against any change in case
   2131         # and backwards compatibility with prior versions
   2132         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
   2133         if extractor is None:
   2134             url = str_or_none(info_dict.get('url'))
   2135             if not url:
   2136                 return
   2137             # Try to find matching extractor for the URL and take its ie_key
   2138             for ie in self._ies:
   2139                 if ie.suitable(url):
   2140                     extractor = ie.ie_key()
   2141                     break
   2142             else:
   2143                 return
   2144         return extractor.lower() + ' ' + video_id
   2145 
   2146     def in_download_archive(self, info_dict):
   2147         fn = self.params.get('download_archive')
   2148         if fn is None:
   2149             return False
   2150 
   2151         vid_id = self._make_archive_id(info_dict)
   2152         if not vid_id:
   2153             return False  # Incomplete video information
   2154 
   2155         try:
   2156             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
   2157                 for line in archive_file:
   2158                     if line.strip() == vid_id:
   2159                         return True
   2160         except IOError as ioe:
   2161             if ioe.errno != errno.ENOENT:
   2162                 raise
   2163         return False
   2164 
   2165     def record_download_archive(self, info_dict):
   2166         fn = self.params.get('download_archive')
   2167         if fn is None:
   2168             return
   2169         vid_id = self._make_archive_id(info_dict)
   2170         assert vid_id
   2171         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
   2172             archive_file.write(vid_id + '\n')
   2173 
   2174     @staticmethod
   2175     def format_resolution(format, default='unknown'):
   2176         if format.get('vcodec') == 'none':
   2177             return 'audio only'
   2178         if format.get('resolution') is not None:
   2179             return format['resolution']
   2180         if format.get('height') is not None:
   2181             if format.get('width') is not None:
   2182                 res = '%sx%s' % (format['width'], format['height'])
   2183             else:
   2184                 res = '%sp' % format['height']
   2185         elif format.get('width') is not None:
   2186             res = '%dx?' % format['width']
   2187         else:
   2188             res = default
   2189         return res
   2190 
   2191     def _format_note(self, fdict):
   2192         res = ''
   2193         if fdict.get('ext') in ['f4f', 'f4m']:
   2194             res += '(unsupported) '
   2195         if fdict.get('language'):
   2196             if res:
   2197                 res += ' '
   2198             res += '[%s] ' % fdict['language']
   2199         if fdict.get('format_note') is not None:
   2200             res += fdict['format_note'] + ' '
   2201         if fdict.get('tbr') is not None:
   2202             res += '%4dk ' % fdict['tbr']
   2203         if fdict.get('container') is not None:
   2204             if res:
   2205                 res += ', '
   2206             res += '%s container' % fdict['container']
   2207         if (fdict.get('vcodec') is not None
   2208                 and fdict.get('vcodec') != 'none'):
   2209             if res:
   2210                 res += ', '
   2211             res += fdict['vcodec']
   2212             if fdict.get('vbr') is not None:
   2213                 res += '@'
   2214         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
   2215             res += 'video@'
   2216         if fdict.get('vbr') is not None:
   2217             res += '%4dk' % fdict['vbr']
   2218         if fdict.get('fps') is not None:
   2219             if res:
   2220                 res += ', '
   2221             res += '%sfps' % fdict['fps']
   2222         if fdict.get('acodec') is not None:
   2223             if res:
   2224                 res += ', '
   2225             if fdict['acodec'] == 'none':
   2226                 res += 'video only'
   2227             else:
   2228                 res += '%-5s' % fdict['acodec']
   2229         elif fdict.get('abr') is not None:
   2230             if res:
   2231                 res += ', '
   2232             res += 'audio'
   2233         if fdict.get('abr') is not None:
   2234             res += '@%3dk' % fdict['abr']
   2235         if fdict.get('asr') is not None:
   2236             res += ' (%5dHz)' % fdict['asr']
   2237         if fdict.get('filesize') is not None:
   2238             if res:
   2239                 res += ', '
   2240             res += format_bytes(fdict['filesize'])
   2241         elif fdict.get('filesize_approx') is not None:
   2242             if res:
   2243                 res += ', '
   2244             res += '~' + format_bytes(fdict['filesize_approx'])
   2245         return res
   2246 
   2247     def list_formats(self, info_dict):
   2248         formats = info_dict.get('formats', [info_dict])
   2249         table = [
   2250             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
   2251             for f in formats
   2252             if f.get('preference') is None or f['preference'] >= -1000]
   2253         if len(formats) > 1:
   2254             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
   2255 
   2256         header_line = ['format code', 'extension', 'resolution', 'note']
   2257         self.to_screen(
   2258             '[info] Available formats for %s:\n%s' %
   2259             (info_dict['id'], render_table(header_line, table)))
   2260 
   2261     def list_thumbnails(self, info_dict):
   2262         thumbnails = info_dict.get('thumbnails')
   2263         if not thumbnails:
   2264             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
   2265             return
   2266 
   2267         self.to_screen(
   2268             '[info] Thumbnails for %s:' % info_dict['id'])
   2269         self.to_screen(render_table(
   2270             ['ID', 'width', 'height', 'URL'],
   2271             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
   2272 
   2273     def list_subtitles(self, video_id, subtitles, name='subtitles'):
   2274         if not subtitles:
   2275             self.to_screen('%s has no %s' % (video_id, name))
   2276             return
   2277         self.to_screen(
   2278             'Available %s for %s:' % (name, video_id))
   2279         self.to_screen(render_table(
   2280             ['Language', 'formats'],
   2281             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
   2282                 for lang, formats in subtitles.items()]))
   2283 
   2284     def urlopen(self, req):
   2285         """ Start an HTTP download """
   2286         if isinstance(req, compat_basestring):
   2287             req = sanitized_Request(req)
   2288         return self._opener.open(req, timeout=self._socket_timeout)
   2289 
   2290     def print_debug_header(self):
   2291         if not self.params.get('verbose'):
   2292             return
   2293 
   2294         if type('') is not compat_str:
   2295             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
   2296             self.report_warning(
   2297                 'Your Python is broken! Update to a newer and supported version')
   2298 
   2299         stdout_encoding = getattr(
   2300             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
   2301         encoding_str = (
   2302             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
   2303                 locale.getpreferredencoding(),
   2304                 sys.getfilesystemencoding(),
   2305                 stdout_encoding,
   2306                 self.get_encoding()))
   2307         write_string(encoding_str, encoding=None)
   2308 
   2309         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
   2310         if _LAZY_LOADER:
   2311             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
   2312         try:
   2313             sp = subprocess.Popen(
   2314                 ['git', 'rev-parse', '--short', 'HEAD'],
   2315                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
   2316                 cwd=os.path.dirname(os.path.abspath(__file__)))
   2317             out, err = sp.communicate()
   2318             out = out.decode().strip()
   2319             if re.match('[0-9a-f]+', out):
   2320                 self._write_string('[debug] Git HEAD: ' + out + '\n')
   2321         except Exception:
   2322             try:
   2323                 sys.exc_clear()
   2324             except Exception:
   2325                 pass
   2326 
   2327         def python_implementation():
   2328             impl_name = platform.python_implementation()
   2329             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
   2330                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
   2331             return impl_name
   2332 
   2333         self._write_string('[debug] Python version %s (%s) - %s\n' % (
   2334             platform.python_version(), python_implementation(),
   2335             platform_name()))
   2336 
   2337         exe_versions = FFmpegPostProcessor.get_versions(self)
   2338         exe_versions['rtmpdump'] = rtmpdump_version()
   2339         exe_versions['phantomjs'] = PhantomJSwrapper._version()
   2340         exe_str = ', '.join(
   2341             '%s %s' % (exe, v)
   2342             for exe, v in sorted(exe_versions.items())
   2343             if v
   2344         )
   2345         if not exe_str:
   2346             exe_str = 'none'
   2347         self._write_string('[debug] exe versions: %s\n' % exe_str)
   2348 
   2349         proxy_map = {}
   2350         for handler in self._opener.handlers:
   2351             if hasattr(handler, 'proxies'):
   2352                 proxy_map.update(handler.proxies)
   2353         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
   2354 
   2355         if self.params.get('call_home', False):
   2356             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
   2357             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
   2358             latest_version = self.urlopen(
   2359                 'https://yt-dl.org/latest/version').read().decode('utf-8')
   2360             if version_tuple(latest_version) > version_tuple(__version__):
   2361                 self.report_warning(
   2362                     'You are using an outdated version (newest version: %s)! '
   2363                     'See https://yt-dl.org/update if you need help updating.' %
   2364                     latest_version)
   2365 
   2366     def _setup_opener(self):
   2367         timeout_val = self.params.get('socket_timeout')
   2368         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
   2369 
   2370         opts_cookiefile = self.params.get('cookiefile')
   2371         opts_proxy = self.params.get('proxy')
   2372 
   2373         if opts_cookiefile is None:
   2374             self.cookiejar = compat_cookiejar.CookieJar()
   2375         else:
   2376             opts_cookiefile = expand_path(opts_cookiefile)
   2377             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
   2378             if os.access(opts_cookiefile, os.R_OK):
   2379                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
   2380 
   2381         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
   2382         if opts_proxy is not None:
   2383             if opts_proxy == '':
   2384                 proxies = {}
   2385             else:
   2386                 proxies = {'http': opts_proxy, 'https': opts_proxy}
   2387         else:
   2388             proxies = compat_urllib_request.getproxies()
   2389             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
   2390             if 'http' in proxies and 'https' not in proxies:
   2391                 proxies['https'] = proxies['http']
   2392         proxy_handler = PerRequestProxyHandler(proxies)
   2393 
   2394         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
   2395         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
   2396         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
   2397         redirect_handler = YoutubeDLRedirectHandler()
   2398         data_handler = compat_urllib_request_DataHandler()
   2399 
   2400         # When passing our own FileHandler instance, build_opener won't add the
   2401         # default FileHandler and allows us to disable the file protocol, which
   2402         # can be used for malicious purposes (see
   2403         # https://github.com/ytdl-org/youtube-dl/issues/8227)
   2404         file_handler = compat_urllib_request.FileHandler()
   2405 
   2406         def file_open(*args, **kwargs):
   2407             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
   2408         file_handler.file_open = file_open
   2409 
   2410         opener = compat_urllib_request.build_opener(
   2411             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
   2412 
   2413         # Delete the default user-agent header, which would otherwise apply in
   2414         # cases where our custom HTTP handler doesn't come into play
   2415         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
   2416         opener.addheaders = []
   2417         self._opener = opener
   2418 
   2419     def encode(self, s):
   2420         if isinstance(s, bytes):
   2421             return s  # Already encoded
   2422 
   2423         try:
   2424             return s.encode(self.get_encoding())
   2425         except UnicodeEncodeError as err:
   2426             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
   2427             raise
   2428 
   2429     def get_encoding(self):
   2430         encoding = self.params.get('encoding')
   2431         if encoding is None:
   2432             encoding = preferredencoding()
   2433         return encoding
   2434 
   2435     def _write_thumbnails(self, info_dict, filename):
   2436         if self.params.get('writethumbnail', False):
   2437             thumbnails = info_dict.get('thumbnails')
   2438             if thumbnails:
   2439                 thumbnails = [thumbnails[-1]]
   2440         elif self.params.get('write_all_thumbnails', False):
   2441             thumbnails = info_dict.get('thumbnails')
   2442         else:
   2443             return
   2444 
   2445         if not thumbnails:
   2446             # No thumbnails present, so return immediately
   2447             return
   2448 
   2449         for t in thumbnails:
   2450             thumb_ext = determine_ext(t['url'], 'jpg')
   2451             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
   2452             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
   2453             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
   2454 
   2455             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
   2456                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
   2457                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
   2458             else:
   2459                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
   2460                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
   2461                 try:
   2462                     uf = self.urlopen(t['url'])
   2463                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
   2464                         shutil.copyfileobj(uf, thumbf)
   2465                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
   2466                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
   2467                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
   2468                     self.report_warning('Unable to download thumbnail "%s": %s' %
   2469                                         (t['url'], error_to_compat_str(err)))