This functionality is intended to eventually encompass the current format filtering.
version_tuple,
xpath_with_ns,
render_table,
+ match_str,
)
'123 4\n'
'9999 51')
+ def test_match_str(self):
+ self.assertRaises(ValueError, match_str, 'xy>foobar', {})
+ self.assertFalse(match_str('xy', {'x': 1200}))
+ self.assertTrue(match_str('!xy', {'x': 1200}))
+ self.assertTrue(match_str('x', {'x': 1200}))
+ self.assertFalse(match_str('!x', {'x': 1200}))
+ self.assertTrue(match_str('x', {'x': 0}))
+ self.assertFalse(match_str('x>0', {'x': 0}))
+ self.assertFalse(match_str('x>0', {}))
+ self.assertTrue(match_str('x>?0', {}))
+ self.assertTrue(match_str('x>1K', {'x': 1200}))
+ self.assertFalse(match_str('x>2K', {'x': 1200}))
+ self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
+ self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+ self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 90, 'description': 'foo'}))
+ self.assertTrue(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'description': 'foo'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'dislike_count': 10}))
+
if __name__ == '__main__':
unittest.main()
external_downloader: Executable of the external downloader to call.
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
+ match_filter: A function that gets called with the info_dict of
+ every video.
+ If it returns a message, the video is ignored.
+ If it returns None, the video is downloaded.
+ match_filter_func in utils.py is one example for this.
The following parameters are not used by YoutubeDL itself, they are used by
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % title
+ return 'Skipping "%s" because it is age restricted' % video_title
if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
+
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+
return None
@staticmethod
)
from .utils import (
DateRange,
- DEFAULT_OUTTMPL,
decodeOption,
+ DEFAULT_OUTTMPL,
DownloadError,
+ match_filter_func,
MaxDownloadsReached,
preferredencoding,
read_batch_urls,
xattr # Confuse flake8
except ImportError:
parser.error('setting filesize xattr requested but python-xattr is not available')
+ match_filter = (
+ None if opts.match_filter is None
+ else match_filter_func(opts.match_filter))
ydl_opts = {
'usenetrc': opts.usenetrc,
'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize,
+ 'match_filter': match_filter,
}
with YoutubeDL(ydl_opts) as ydl:
'--max-views',
metavar='COUNT', dest='max_views', default=None, type=int,
help='Do not download any videos with more than COUNT views')
+ selection.add_option(
+ '--match-filter',
+ metavar='FILTER', dest='match_filter', default=None,
+ help=(
+ '(Experimental) Generic video filter. '
+ 'Specify any key (see help for -o for a list of available keys) to'
+ ' match if the key is present, '
+ '!key to check if the key is not present,'
+ 'key > NUMBER (like "comment_count > 12", also works with '
+ '>=, <, <=, !=, =) to compare against a number, and '
+ '& to require multiple matches. '
+ 'Values which are not known are excluded unless you'
+ ' put a question mark (?) after the operator.'
+ 'For example, to only match videos that have been liked more than '
+ '100 times and disliked less than 50 times (or the dislike '
+ 'functionality is not available at the given service), but who '
+ 'also have a description, use --match-filter '
+ '"like_count > 100 & dislike_count <? 50 & description" .'
+ ))
selection.add_option(
'--no-playlist',
action='store_true', dest='noplaylist', default=False,
import json
import locale
import math
+import operator
import os
import pipes
import platform
max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
return '\n'.join(format_str % tuple(row) for row in table)
+
+
+def _match_one(filter_part, dct):
+ COMPARISON_OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-z_]+)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?:
+ (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+ (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+ )
+ \s*$
+ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = COMPARISON_OPERATORS[m.group('op')]
+ if m.group('strval') is not None:
+ if m.group('op') not in ('=', '!='):
+ raise ValueError(
+ 'Operator %s does not support string values!' % m.group('op'))
+ comparison_value = m.group('strval')
+ else:
+ try:
+ comparison_value = int(m.group('intval'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('intval'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('intval') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid integer value %r in filter part %r' % (
+ m.group('intval'), filter_part))
+ actual_value = dct.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+
+ UNARY_OPERATORS = {
+ '': lambda v: v is not None,
+ '!': lambda v: v is None,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<op>%s)\s*(?P<key>[a-z_]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = UNARY_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ return op(actual_value)
+
+ raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct):
+ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
+
+ return all(
+ _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+
+
+def match_filter_func(filter_str):
+ def _match_func(info_dict):
+ if match_str(filter_str, info_dict):
+ return None
+ else:
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+ return _match_func