mirror of https://github.com/borgbackup/borg.git
implement pattern support for --match-archives, fixes #6504
also: - rename --glob-archives option to --match-archives (short: -a, unchanged) - globbing patterns now need sh: prefix - regex patterns need re: prefix - "identical" match "patterns" use an id: prefix - new default style is id: pattern (--glob-archives used sh: glob pattern) - source code: glob -> match, GLOB -> PATTERN
This commit is contained in:
parent
f5df35b36e
commit
4493d396e6
|
@ -1656,18 +1656,18 @@ class ArchiveChecker:
|
|||
self.possibly_superseded = set()
|
||||
|
||||
def check(
|
||||
self, repository, repair=False, first=0, last=0, sort_by="", glob=None, verify_data=False, save_space=False
|
||||
self, repository, repair=False, first=0, last=0, sort_by="", match=None, verify_data=False, save_space=False
|
||||
):
|
||||
"""Perform a set of checks on 'repository'
|
||||
|
||||
:param repair: enable repair mode, write updated or corrected data into repository
|
||||
:param first/last/sort_by: only check this number of first/last archives ordered by sort_by
|
||||
:param glob: only check archives matching this glob
|
||||
:param match: only check archives matching this pattern
|
||||
:param verify_data: integrity verification of data referenced by archives
|
||||
:param save_space: Repository.commit(save_space)
|
||||
"""
|
||||
logger.info("Starting archive consistency check...")
|
||||
self.check_all = not any((first, last, glob))
|
||||
self.check_all = not any((first, last, match))
|
||||
self.repair = repair
|
||||
self.repository = repository
|
||||
self.init_chunks()
|
||||
|
@ -1690,7 +1690,7 @@ class ArchiveChecker:
|
|||
self.error_found = True
|
||||
del self.chunks[Manifest.MANIFEST_ID]
|
||||
self.manifest = self.rebuild_manifest()
|
||||
self.rebuild_refcounts(glob=glob, first=first, last=last, sort_by=sort_by)
|
||||
self.rebuild_refcounts(match=match, first=first, last=last, sort_by=sort_by)
|
||||
self.orphan_chunks_check()
|
||||
self.finish(save_space=save_space)
|
||||
if self.error_found:
|
||||
|
@ -1886,7 +1886,7 @@ class ArchiveChecker:
|
|||
logger.info("Manifest rebuild complete.")
|
||||
return manifest
|
||||
|
||||
def rebuild_refcounts(self, first=0, last=0, sort_by="", glob=None):
|
||||
def rebuild_refcounts(self, first=0, last=0, sort_by="", match=None):
|
||||
"""Rebuild object reference counts by walking the metadata
|
||||
|
||||
Missing and/or incorrect data is repaired when detected
|
||||
|
@ -2080,10 +2080,10 @@ class ArchiveChecker:
|
|||
i += 1
|
||||
|
||||
sort_by = sort_by.split(",")
|
||||
if any((first, last, glob)):
|
||||
archive_infos = self.manifest.archives.list(sort_by=sort_by, glob=glob, first=first, last=last)
|
||||
if glob and not archive_infos:
|
||||
logger.warning("--glob-archives %s does not match any archives", glob)
|
||||
if any((first, last, match)):
|
||||
archive_infos = self.manifest.archives.list(sort_by=sort_by, match=match, first=first, last=last)
|
||||
if match and not archive_infos:
|
||||
logger.warning("--match-archives %s does not match any archives", match)
|
||||
if first and len(archive_infos) < first:
|
||||
logger.warning("--first %d archives: only found %d archives", first, len(archive_infos))
|
||||
if last and len(archive_infos) < last:
|
||||
|
|
|
@ -410,7 +410,7 @@ class Archiver(
|
|||
replace_placeholders.override("now", DatetimeWrapper(args.timestamp))
|
||||
replace_placeholders.override("utcnow", DatetimeWrapper(args.timestamp.astimezone(timezone.utc)))
|
||||
args.location = args.location.with_timestamp(args.timestamp)
|
||||
for name in "name", "other_name", "newname", "glob_archives", "comment":
|
||||
for name in "name", "other_name", "newname", "match_archives", "comment":
|
||||
value = getattr(args, name, None)
|
||||
if value is not None:
|
||||
setattr(args, name, replace_placeholders(value))
|
||||
|
|
|
@ -360,11 +360,11 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
|
|||
group = filters_group.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
"-a",
|
||||
"--glob-archives",
|
||||
metavar="GLOB",
|
||||
dest="glob_archives",
|
||||
"--match-archives",
|
||||
metavar="PATTERN",
|
||||
dest="match_archives",
|
||||
action=Highlander,
|
||||
help="only consider archive names matching the glob. " 'sh: rules apply, see "borg help patterns".',
|
||||
help='only consider archive names matching the pattern. see "borg help match-archives".',
|
||||
)
|
||||
|
||||
if sort_by:
|
||||
|
|
|
@ -31,9 +31,9 @@ class CheckMixIn:
|
|||
env_var_override="BORG_CHECK_I_KNOW_WHAT_I_AM_DOING",
|
||||
):
|
||||
return EXIT_ERROR
|
||||
if args.repo_only and any((args.verify_data, args.first, args.last, args.glob_archives)):
|
||||
if args.repo_only and any((args.verify_data, args.first, args.last, args.match_archives)):
|
||||
self.print_error(
|
||||
"--repository-only contradicts --first, --last, -a / --glob-archives " " and --verify-data arguments."
|
||||
"--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments."
|
||||
)
|
||||
return EXIT_ERROR
|
||||
if args.repair and args.max_duration:
|
||||
|
@ -55,7 +55,7 @@ class CheckMixIn:
|
|||
first=args.first,
|
||||
last=args.last,
|
||||
sort_by=args.sort_by or "ts",
|
||||
glob=args.glob_archives,
|
||||
match=args.match_archives,
|
||||
verify_data=args.verify_data,
|
||||
save_space=args.save_space,
|
||||
):
|
||||
|
|
|
@ -23,9 +23,9 @@ class DeleteMixIn:
|
|||
archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
|
||||
if not archive_names:
|
||||
return self.exit_code
|
||||
if args.glob_archives is None and args.first == 0 and args.last == 0:
|
||||
if args.match_archives is None and args.first == 0 and args.last == 0:
|
||||
self.print_error(
|
||||
"Aborting: if you really want to delete all archives, please use -a '*' "
|
||||
"Aborting: if you really want to delete all archives, please use -a 'sh:*' "
|
||||
"or just delete the whole repository (might be much faster)."
|
||||
)
|
||||
return EXIT_ERROR
|
||||
|
@ -114,8 +114,8 @@ class DeleteMixIn:
|
|||
that is how much your repository will shrink.
|
||||
Please note that the "All archives" stats refer to the state after deletion.
|
||||
|
||||
You can delete multiple archives by specifying a matching shell pattern,
|
||||
using the ``--glob-archives GLOB`` option (for more info on these patterns,
|
||||
You can delete multiple archives by specifying a matching pattern,
|
||||
using the ``--match-archives PATTERN`` option (for more info on these patterns,
|
||||
see :ref:`borg_patterns`).
|
||||
|
||||
Always first use ``--dry-run --list`` to see what would be deleted.
|
||||
|
|
|
@ -244,9 +244,38 @@ class HelpMixIn:
|
|||
This allows you to share the same patterns between multiple repositories
|
||||
without needing to specify them on the command line.\n\n"""
|
||||
)
|
||||
helptext["match-archives"] = textwrap.dedent(
|
||||
"""
|
||||
The ``--match-archives`` option matches a given pattern against the list of all archive
|
||||
names in the repository.
|
||||
|
||||
It uses pattern styles similar to the ones described by ``borg help patterns``:
|
||||
|
||||
Identical match pattern, selector ``id:`` (default)
|
||||
Simple string match, must fully match exactly as given.
|
||||
|
||||
Shell-style patterns, selector ``sh:``
|
||||
Match like on the shell, wildcards like `*` and `?` work.
|
||||
|
||||
`Regular expressions <https://docs.python.org/3/library/re.html>`_, selector ``re:``
|
||||
Full regular expression support.
|
||||
This is very powerful, but can also get rather complicated.
|
||||
|
||||
Examples::
|
||||
# id: style
|
||||
borg delete --match-archives 'id:archive-with-crap'
|
||||
borg delete -a 'id:archive-with-crap' # same, using short option
|
||||
borg delete -a 'archive-with-crap' # same, because 'id:' is the default
|
||||
|
||||
# sh: style
|
||||
borg delete -a 'sh:home-kenny-*'
|
||||
|
||||
# re: style
|
||||
borg delete -a 're:pc[123]-home-(user1|user2)-2022-09-.*'\n\n"""
|
||||
)
|
||||
helptext["placeholders"] = textwrap.dedent(
|
||||
"""
|
||||
Repository URLs, ``--name``, ``-a`` / ``--glob-archives``, ``--comment``
|
||||
Repository URLs, ``--name``, ``-a`` / ``--match-archives``, ``--comment``
|
||||
and ``--remote-path`` values support these placeholders:
|
||||
|
||||
{hostname}
|
||||
|
@ -292,7 +321,7 @@ class HelpMixIn:
|
|||
|
||||
borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
|
||||
borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S%z} ...
|
||||
borg prune -a '{hostname}-*' ...
|
||||
borg prune -a 'sh:{hostname}-*' ...
|
||||
|
||||
.. note::
|
||||
systemd uses a difficult, non-standard syntax for command lines in unit files (refer to
|
||||
|
|
|
@ -84,7 +84,7 @@ class PruneMixIn:
|
|||
return self.exit_code
|
||||
checkpoint_re = r"\.checkpoint(\.\d+)?"
|
||||
archives_checkpoints = manifest.archives.list(
|
||||
glob=args.glob_archives,
|
||||
match=args.match_archives,
|
||||
consider_checkpoints=True,
|
||||
match_end=r"(%s)?\Z" % checkpoint_re,
|
||||
sort_by=["ts"],
|
||||
|
@ -191,7 +191,7 @@ class PruneMixIn:
|
|||
archive (and thus still needed). Checkpoint archives are not considered when
|
||||
comparing archive counts against the retention limits (``--keep-X``).
|
||||
|
||||
If you use --glob-archives (-a), then only archives that match the GLOB are
|
||||
If you use --match-archives (-a), then only archives that match the pattern are
|
||||
considered for deletion and only those archives count towards the totals
|
||||
specified by the rules.
|
||||
Otherwise, *all* archives in the repository are candidates for deletion!
|
||||
|
@ -200,7 +200,7 @@ class PruneMixIn:
|
|||
|
||||
If you have multiple sequences of archives with different data sets (e.g.
|
||||
from different machines) in one shared repository, use one prune call per
|
||||
data set that matches only the respective archives using the --glob-archives
|
||||
data set that matches only the respective archives using the --match-archives
|
||||
(-a) option.
|
||||
|
||||
The ``--keep-within`` option takes an argument of the form "<int><char>",
|
||||
|
|
|
@ -11,12 +11,12 @@ from .logger import create_logger
|
|||
|
||||
logger = create_logger()
|
||||
|
||||
from .helpers import shellpattern
|
||||
from .constants import * # NOQA
|
||||
from .helpers.datastruct import StableDict
|
||||
from .helpers.parseformat import bin_to_hex
|
||||
from .helpers.time import parse_timestamp
|
||||
from .helpers.errors import Error
|
||||
from .patterns import get_regex_from_pattern
|
||||
from .repoobj import RepoObj
|
||||
|
||||
|
||||
|
@ -74,12 +74,20 @@ class Archives(abc.MutableMapping):
|
|||
del self._archives[name]
|
||||
|
||||
def list(
|
||||
self, *, glob=None, match_end=r"\Z", sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False
|
||||
self,
|
||||
*,
|
||||
match=None,
|
||||
match_end=r"\Z",
|
||||
sort_by=(),
|
||||
consider_checkpoints=True,
|
||||
first=None,
|
||||
last=None,
|
||||
reverse=False
|
||||
):
|
||||
"""
|
||||
Return list of ArchiveInfo instances according to the parameters.
|
||||
|
||||
First match *glob* (considering *match_end*), then *sort_by*.
|
||||
First match *match* (considering *match_end*), then *sort_by*.
|
||||
Apply *first* and *last* filters, and then possibly *reverse* the list.
|
||||
|
||||
*sort_by* is a list of sort keys applied in reverse order.
|
||||
|
@ -90,7 +98,8 @@ class Archives(abc.MutableMapping):
|
|||
"""
|
||||
if isinstance(sort_by, (str, bytes)):
|
||||
raise TypeError("sort_by must be a sequence of str")
|
||||
regex = re.compile(shellpattern.translate(glob or "*", match_end=match_end))
|
||||
regex = get_regex_from_pattern(match or "re:.*")
|
||||
regex = re.compile(regex + match_end)
|
||||
archives = [x for x in self.values() if regex.match(x.name) is not None]
|
||||
if not consider_checkpoints:
|
||||
archives = [x for x in archives if ".checkpoint" not in x.name]
|
||||
|
@ -106,18 +115,18 @@ class Archives(abc.MutableMapping):
|
|||
|
||||
def list_considering(self, args):
|
||||
"""
|
||||
get a list of archives, considering --first/last/prefix/glob-archives/sort/consider-checkpoints cmdline args
|
||||
get a list of archives, considering --first/last/prefix/match-archives/sort/consider-checkpoints cmdline args
|
||||
"""
|
||||
name = getattr(args, "name", None)
|
||||
consider_checkpoints = getattr(args, "consider_checkpoints", None)
|
||||
if name is not None:
|
||||
raise Error(
|
||||
"Giving a specific name is incompatible with options --first, --last, -a / --glob-archives, and --consider-checkpoints."
|
||||
"Giving a specific name is incompatible with options --first, --last, -a / --match-archives, and --consider-checkpoints."
|
||||
)
|
||||
return self.list(
|
||||
sort_by=args.sort_by.split(","),
|
||||
consider_checkpoints=consider_checkpoints,
|
||||
glob=args.glob_archives,
|
||||
match=args.match_archives,
|
||||
first=args.first,
|
||||
last=args.last,
|
||||
)
|
||||
|
|
|
@ -388,3 +388,26 @@ def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
|
|||
val = parse_pattern(remainder_str, fallback, recurse_dir)
|
||||
|
||||
return CmdTuple(val, cmd)
|
||||
|
||||
|
||||
def get_regex_from_pattern(pattern: str) -> str:
|
||||
"""
|
||||
return a regular expression string corresponding to the given pattern string.
|
||||
|
||||
the allowed pattern types are similar to the ones implemented by PatternBase subclasses,
|
||||
but here we rather do generic string matching, not specialised filesystem paths matching.
|
||||
"""
|
||||
if len(pattern) > 2 and pattern[2] == ":" and pattern[:2] in {"sh", "re", "id"}:
|
||||
(style, pattern) = (pattern[:2], pattern[3:])
|
||||
else:
|
||||
(style, pattern) = ("id", pattern) # "identical" match is the default
|
||||
if style == "sh":
|
||||
# (?ms) (meaning re.MULTILINE and re.DOTALL) are not desired here.
|
||||
regex = shellpattern.translate(pattern, match_end="").removeprefix("(?ms)")
|
||||
elif style == "re":
|
||||
regex = pattern
|
||||
elif style == "id":
|
||||
regex = re.escape(pattern)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return regex
|
||||
|
|
|
@ -39,7 +39,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
|
|||
"check",
|
||||
"-v",
|
||||
"--archives-only",
|
||||
"--glob-archives=archive2",
|
||||
"--match-archives=archive2",
|
||||
exit_code=0,
|
||||
)
|
||||
self.assert_not_in("archive1", output)
|
||||
|
|
|
@ -19,7 +19,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd(f"--repo={self.repository_location}", "create", "another_test.2", "input")
|
||||
self.cmd(f"--repo={self.repository_location}", "extract", "test", "--dry-run")
|
||||
self.cmd(f"--repo={self.repository_location}", "extract", "test.2", "--dry-run")
|
||||
self.cmd(f"--repo={self.repository_location}", "delete", "--glob-archives", "another_*")
|
||||
self.cmd(f"--repo={self.repository_location}", "delete", "--match-archives", "sh:another_*")
|
||||
self.cmd(f"--repo={self.repository_location}", "delete", "--last", "1")
|
||||
self.cmd(f"--repo={self.repository_location}", "delete", "-a", "test")
|
||||
self.cmd(f"--repo={self.repository_location}", "extract", "test.2", "--dry-run")
|
||||
|
|
|
@ -236,13 +236,13 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch11", "arch12"]
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--last=2", "--sort=name"):
|
||||
assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch21", "arch22"]
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--glob-archives=arch1*"):
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--match-archives=sh:arch1*"):
|
||||
assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch11", "arch12"]
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--glob-archives=arch2*"):
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--match-archives=sh:arch2*"):
|
||||
assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch21", "arch22"]
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--glob-archives=arch*"):
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--match-archives=sh:arch*"):
|
||||
assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch11", "arch12", "arch21", "arch22"]
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--glob-archives=nope"):
|
||||
with self.fuse_mount(self.repository_location, mountpoint, "--match-archives=nope"):
|
||||
assert sorted(os.listdir(os.path.join(mountpoint))) == []
|
||||
|
||||
@unittest.skipUnless(llfuse, "llfuse not installed")
|
||||
|
|
|
@ -188,7 +188,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
"--list",
|
||||
"--dry-run",
|
||||
"--keep-daily=1",
|
||||
"--glob-archives=foo-*",
|
||||
"--match-archives=sh:foo-*",
|
||||
)
|
||||
assert re.search(r"Keeping archive \(rule: daily #1\):\s+foo-2015-08-12-20:00", output)
|
||||
assert re.search(r"Would prune:\s+foo-2015-08-12-10:00", output)
|
||||
|
@ -197,7 +197,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.assert_in("foo-2015-08-12-20:00", output)
|
||||
self.assert_in("bar-2015-08-12-10:00", output)
|
||||
self.assert_in("bar-2015-08-12-20:00", output)
|
||||
self.cmd(f"--repo={self.repository_location}", "prune", "--keep-daily=1", "--glob-archives=foo-*")
|
||||
self.cmd(f"--repo={self.repository_location}", "prune", "--keep-daily=1", "--match-archives=sh:foo-*")
|
||||
output = self.cmd(f"--repo={self.repository_location}", "rlist")
|
||||
self.assert_not_in("foo-2015-08-12-10:00", output)
|
||||
self.assert_in("foo-2015-08-12-20:00", output)
|
||||
|
@ -216,7 +216,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
"--list",
|
||||
"--dry-run",
|
||||
"--keep-daily=1",
|
||||
"--glob-archives=2015-*-foo",
|
||||
"--match-archives=sh:2015-*-foo",
|
||||
)
|
||||
assert re.search(r"Keeping archive \(rule: daily #1\):\s+2015-08-12-20:00-foo", output)
|
||||
assert re.search(r"Would prune:\s+2015-08-12-10:00-foo", output)
|
||||
|
@ -225,7 +225,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.assert_in("2015-08-12-20:00-foo", output)
|
||||
self.assert_in("2015-08-12-10:00-bar", output)
|
||||
self.assert_in("2015-08-12-20:00-bar", output)
|
||||
self.cmd(f"--repo={self.repository_location}", "prune", "--keep-daily=1", "--glob-archives=2015-*-foo")
|
||||
self.cmd(f"--repo={self.repository_location}", "prune", "--keep-daily=1", "--match-archives=sh:2015-*-foo")
|
||||
output = self.cmd(f"--repo={self.repository_location}", "rlist")
|
||||
self.assert_not_in("2015-08-12-10:00-foo", output)
|
||||
self.assert_in("2015-08-12-20:00-foo", output)
|
||||
|
|
|
@ -19,7 +19,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd(f"--repo={self.repository_location}", "create", "test-1", src_dir)
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "something-else-than-test-1", src_dir)
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "test-2", src_dir)
|
||||
output = self.cmd(f"--repo={self.repository_location}", "rlist", "--glob-archives=test-*")
|
||||
output = self.cmd(f"--repo={self.repository_location}", "rlist", "--match-archives=sh:test-*")
|
||||
self.assert_in("test-1", output)
|
||||
self.assert_in("test-2", output)
|
||||
self.assert_not_in("something-else", output)
|
||||
|
|
|
@ -8,6 +8,7 @@ import pytest
|
|||
from ..patterns import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
|
||||
from ..patterns import load_exclude_file, load_pattern_file
|
||||
from ..patterns import parse_pattern, PatternMatcher
|
||||
from ..patterns import get_regex_from_pattern
|
||||
|
||||
|
||||
def check_patterns(files, pattern, expected):
|
||||
|
@ -617,3 +618,18 @@ def test_pattern_matcher():
|
|||
assert pm.match("z") == "B"
|
||||
|
||||
assert PatternMatcher(fallback="hey!").fallback == "hey!"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pattern, regex",
|
||||
[
|
||||
("foo.bar", r"foo\.bar"), # default is id:
|
||||
("id:foo.bar", r"foo\.bar"),
|
||||
("id:foo?", r"foo\?"),
|
||||
("re:foo.bar", r"foo.bar"),
|
||||
("re:.*(fooo?|bar|baz).*", r".*(fooo?|bar|baz).*"),
|
||||
("sh:foo.*", r"foo\.[^\/]*"),
|
||||
],
|
||||
)
|
||||
def test_regex_from_pattern(pattern, regex):
|
||||
assert get_regex_from_pattern(pattern) == regex
|
||||
|
|
Loading…
Reference in New Issue