From 4c052cd65d6f9e98ab6cae45ae15849e59dea686 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 13 Aug 2024 18:58:32 +0200 Subject: [PATCH] delete: just remove archive from manifest, let borg compact clean up later. much faster and easier now, similar to what borg delete --force --force used to do. considering that speed, no need for checkpointing anymore. --stats does not work that way, thus it was removed. borg compact now shows some stats. --- src/borg/archive.py | 83 +--------------- src/borg/archiver/delete_cmd.py | 114 +++++----------------- src/borg/archiver/prune_cmd.py | 49 ++-------- src/borg/testsuite/archiver/delete_cmd.py | 57 +---------- 4 files changed, 41 insertions(+), 262 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 31b1f0575..326b3cbf8 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1038,84 +1038,11 @@ def rename(self, name): self.set_meta("name", name) del self.manifest.archives[oldname] - def delete(self, stats, progress=False, forced=False): - class ChunksIndexError(Error): - """Chunk ID {} missing from chunks index, corrupted chunks index - aborting transaction.""" - - exception_ignored = object() - - def fetch_async_response(wait=True): - try: - return self.repository.async_response(wait=wait) - except Repository3.ObjectNotFound: - nonlocal error - # object not in repo - strange, but we wanted to delete it anyway. - if forced == 0: - raise - error = True - return exception_ignored # must not return None here - - def chunk_decref(id, size, stats): - try: - self.cache.chunk_decref(id, size, stats, wait=False) - except KeyError: - nonlocal error - if forced == 0: - cid = bin_to_hex(id) - raise ChunksIndexError(cid) - error = True - else: - fetch_async_response(wait=False) - - error = False - try: - unpacker = msgpack.Unpacker(use_list=False) - items_ids = self.metadata.items - pi = ProgressIndicatorPercent( - total=len(items_ids), msg="Decrementing references %3.0f%%", msgid="archive.delete" - ) - for i, (items_id, data) in enumerate(zip(items_ids, self.repository.get_many(items_ids))): - if progress: - pi.show(i) - _, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM) - unpacker.feed(data) - chunk_decref(items_id, 1, stats) - try: - for item in unpacker: - item = Item(internal_dict=item) - if "chunks" in item: - for chunk_id, size in item.chunks: - chunk_decref(chunk_id, size, stats) - except (TypeError, ValueError): - # if items metadata spans multiple chunks and one chunk got dropped somehow, - # it could be that unpacker yields bad types - if forced == 0: - raise - error = True - if progress: - pi.finish() - except (msgpack.UnpackException, Repository3.ObjectNotFound): - # items metadata corrupted - if forced == 0: - raise - error = True - - # delete the blocks that store all the references that end up being loaded into metadata.items: - for id in self.metadata.item_ptrs: - chunk_decref(id, 1, stats) - - # in forced delete mode, we try hard to delete at least the manifest entry, - # if possible also the archive superblock, even if processing the items raises - # some harmless exception. - chunk_decref(self.id, 1, stats) + def delete(self): + # quick and dirty: we just nuke the archive from the archives list - that will + # potentially orphan all chunks previously referenced by the archive, except the ones also + # referenced by other archives. In the end, "borg compact" will clean up and free space. del self.manifest.archives[self.name] - while fetch_async_response(wait=True) is not None: - # we did async deletes, process outstanding results (== exceptions), - # so there is nothing pending when we return and our caller wants to commit. - pass - if error: - logger.warning("forced deletion succeeded, but the deleted archive was corrupted.") - logger.warning("borg check --repair is required to free all space.") @staticmethod def compare_archives_iter( @@ -2501,7 +2428,7 @@ def save(self, archive, target, comment=None, replace_original=True): target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata) if replace_original: - archive.delete(Statistics(), progress=self.progress) + archive.delete() target.rename(archive.name) if self.stats: target.start = _start diff --git a/src/borg/archiver/delete_cmd.py b/src/borg/archiver/delete_cmd.py index 7095cda90..275607299 100644 --- a/src/borg/archiver/delete_cmd.py +++ b/src/borg/archiver/delete_cmd.py @@ -1,11 +1,9 @@ import argparse import logging -from ._common import with_repository, Highlander -from ..archive import Archive, Statistics -from ..cache import Cache +from ._common import with_repository from ..constants import * # NOQA -from ..helpers import log_multi, format_archive, sig_int, CommandError, Error +from ..helpers import format_archive, CommandError from ..manifest import Manifest from ..logger import create_logger @@ -29,67 +27,30 @@ def do_delete(self, args, repository): "or just delete the whole repository (might be much faster)." ) - if args.forced == 2: - deleted = False - logger_list = logging.getLogger("borg.output.list") - for i, archive_name in enumerate(archive_names, 1): - try: - current_archive = manifest.archives.pop(archive_name) - except KeyError: - self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).") - else: - deleted = True - if self.output_list: - msg = "Would delete: {} ({}/{})" if dry_run else "Deleted archive: {} ({}/{})" - logger_list.info(msg.format(format_archive(current_archive), i, len(archive_names))) - if dry_run: - logger.info("Finished dry-run.") - elif deleted: - manifest.write() - # note: might crash in compact() after committing the repo - repository.commit(compact=False) - self.print_warning('Done. Run "borg check --repair" to clean up the mess.', wc=None) + deleted = False + logger_list = logging.getLogger("borg.output.list") + for i, archive_name in enumerate(archive_names, 1): + try: + # this does NOT use Archive.delete, so this code hopefully even works in cases a corrupt archive + # would make the code in class Archive crash, so the user can at least get rid of such archives. + current_archive = manifest.archives.pop(archive_name) + except KeyError: + self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).") else: - self.print_warning("Aborted.", wc=None) - return + deleted = True + if self.output_list: + msg = "Would delete: {} ({}/{})" if dry_run else "Deleted archive: {} ({}/{})" + logger_list.info(msg.format(format_archive(current_archive), i, len(archive_names))) + if dry_run: + logger.info("Finished dry-run.") + elif deleted: + manifest.write() + repository.commit(compact=False) + self.print_warning('Done. Run "borg compact" to free space.', wc=None) + else: + self.print_warning("Aborted.", wc=None) + return - stats = Statistics(iec=args.iec) - with Cache(repository, manifest, progress=args.progress, lock_wait=self.lock_wait, iec=args.iec) as cache: - - def checkpoint_func(): - manifest.write() - repository.commit(compact=False) - cache.commit() - - msg_delete = "Would delete archive: {} ({}/{})" if dry_run else "Deleting archive: {} ({}/{})" - msg_not_found = "Archive {} not found ({}/{})." - logger_list = logging.getLogger("borg.output.list") - uncommitted_deletes = 0 - for i, archive_name in enumerate(archive_names, 1): - if sig_int and sig_int.action_done(): - break - try: - archive_info = manifest.archives[archive_name] - except KeyError: - self.print_warning(msg_not_found.format(archive_name, i, len(archive_names))) - else: - if self.output_list: - logger_list.info(msg_delete.format(format_archive(archive_info), i, len(archive_names))) - - if not dry_run: - archive = Archive(manifest, archive_name, cache=cache) - archive.delete(stats, progress=args.progress, forced=args.forced) - checkpointed = self.maybe_checkpoint( - checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval - ) - uncommitted_deletes = 0 if checkpointed else (uncommitted_deletes + 1) - if sig_int: - # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case. - raise Error("Got Ctrl-C / SIGINT.") - elif uncommitted_deletes > 0: - checkpoint_func() - if args.stats: - log_multi(str(stats), logger=logging.getLogger("borg.output.stats")) def build_parser_delete(self, subparsers, common_parser, mid_common_parser): from ._common import process_epilog, define_archive_filters_group @@ -103,16 +64,9 @@ def build_parser_delete(self, subparsers, common_parser, mid_common_parser): When in doubt, use ``--dry-run --list`` to see what would be deleted. - When using ``--stats``, you will get some statistics about how much data was - deleted - the "Deleted data" deduplicated size there is most interesting as - that is how much your repository will shrink. - Please note that the "All archives" stats refer to the state after deletion. - You can delete multiple archives by specifying a matching pattern, using the ``--match-archives PATTERN`` option (for more info on these patterns, see :ref:`borg_patterns`). - - Always first use ``--dry-run --list`` to see what would be deleted. """ ) subparser = subparsers.add_parser( @@ -135,24 +89,4 @@ def build_parser_delete(self, subparsers, common_parser, mid_common_parser): dest="consider_checkpoints", help="consider checkpoint archives for deletion (default: not considered).", ) - subparser.add_argument( - "-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive" - ) - subparser.add_argument( - "--force", - dest="forced", - action="count", - default=0, - help="force deletion of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.", - ) - subparser.add_argument( - "-c", - "--checkpoint-interval", - metavar="SECONDS", - dest="checkpoint_interval", - type=int, - default=1800, - action=Highlander, - help="write checkpoint every SECONDS seconds (Default: 1800)", - ) define_archive_filters_group(subparser) diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index e8121993a..47443d079 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -7,10 +7,10 @@ import re from ._common import with_repository, Highlander -from ..archive import Archive, Statistics +from ..archive import Archive from ..cache import Cache from ..constants import * # NOQA -from ..helpers import ArchiveFormatter, interval, sig_int, log_multi, ProgressIndicatorPercent, CommandError, Error +from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error from ..manifest import Manifest from ..logger import create_logger @@ -127,14 +127,7 @@ def do_prune(self, args, repository, manifest): keep += prune_split(archives, rule, num, kept_because) to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) - stats = Statistics(iec=args.iec) with Cache(repository, manifest, lock_wait=self.lock_wait, iec=args.iec) as cache: - - def checkpoint_func(): - manifest.write() - repository.commit(compact=False) - cache.commit() - list_logger = logging.getLogger("borg.output.list") # set up counters for the progress display to_delete_len = len(to_delete) @@ -152,11 +145,8 @@ def checkpoint_func(): archives_deleted += 1 log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len) archive = Archive(manifest, archive.name, cache) - archive.delete(stats, forced=args.forced) - checkpointed = self.maybe_checkpoint( - checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval - ) - uncommitted_deletes = 0 if checkpointed else (uncommitted_deletes + 1) + archive.delete() + uncommitted_deletes += 1 else: if is_checkpoint(archive.name): log_message = "Keeping checkpoint archive:" @@ -172,12 +162,11 @@ def checkpoint_func(): list_logger.info(f"{log_message:<44} {formatter.format_item(archive, jsonline=False)}") pi.finish() if sig_int: - # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case. raise Error("Got Ctrl-C / SIGINT.") elif uncommitted_deletes > 0: - checkpoint_func() - if args.stats: - log_multi(str(stats), logger=logging.getLogger("borg.output.stats")) + manifest.write() + repository.commit(compact=False) + cache.commit() def build_parser_prune(self, subparsers, common_parser, mid_common_parser): from ._common import process_epilog @@ -235,11 +224,6 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): keep the last N archives under the assumption that you do not create more than one backup archive in the same second). - When using ``--stats``, you will get some statistics about how much data was - deleted - the "Deleted data" deduplicated size there is most interesting as - that is how much your repository will shrink. - Please note that the "All archives" stats refer to the state after pruning. - You can influence how the ``--list`` output is formatted by using the ``--short`` option (less wide output) or by giving a custom format using ``--format`` (see the ``borg rlist`` description for more details about the format string). @@ -256,15 +240,6 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): ) subparser.set_defaults(func=self.do_prune) subparser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", help="do not change repository") - subparser.add_argument( - "--force", - dest="forced", - action="store_true", - help="force pruning of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.", - ) - subparser.add_argument( - "-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive" - ) subparser.add_argument( "--list", dest="output_list", action="store_true", help="output verbose list of archives it keeps/prunes" ) @@ -353,13 +328,3 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): help="number of yearly archives to keep", ) define_archive_filters_group(subparser, sort_by=False, first_last=False) - subparser.add_argument( - "-c", - "--checkpoint-interval", - metavar="SECONDS", - dest="checkpoint_interval", - type=int, - default=1800, - action=Highlander, - help="write checkpoint every SECONDS seconds (Default: 1800)", - ) diff --git a/src/borg/testsuite/archiver/delete_cmd.py b/src/borg/testsuite/archiver/delete_cmd.py index e931cc588..f29324b4b 100644 --- a/src/borg/testsuite/archiver/delete_cmd.py +++ b/src/borg/testsuite/archiver/delete_cmd.py @@ -1,13 +1,10 @@ -from ...archive import Archive from ...constants import * # NOQA -from ...manifest import Manifest -from ...repository3 import Repository3 -from . import cmd, create_regular_file, src_file, create_src_archive, generate_archiver_tests, RK_ENCRYPTION +from . import cmd, create_regular_file, generate_archiver_tests, RK_ENCRYPTION pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA -def test_delete(archivers, request): +def test_delete_options(archivers, request): archiver = request.getfixturevalue(archivers) create_regular_file(archiver.input_path, "file1", size=1024 * 80) create_regular_file(archiver.input_path, "dir2/file2", size=1024 * 80) @@ -17,14 +14,11 @@ def test_delete(archivers, request): cmd(archiver, "create", "test.3", "input") cmd(archiver, "create", "another_test.1", "input") cmd(archiver, "create", "another_test.2", "input") - cmd(archiver, "extract", "test", "--dry-run") - cmd(archiver, "extract", "test.2", "--dry-run") cmd(archiver, "delete", "--match-archives", "sh:another_*") - cmd(archiver, "delete", "--last", "1") + cmd(archiver, "delete", "--last", "1") # test.3 cmd(archiver, "delete", "-a", "test") - cmd(archiver, "extract", "test.2", "--dry-run") - output = cmd(archiver, "delete", "-a", "test.2", "--stats") - assert "Original size: -" in output # negative size == deleted data + cmd(archiver, "extract", "test.2", "--dry-run") # still there? + cmd(archiver, "delete", "-a", "test.2") output = cmd(archiver, "rlist") assert output == "" # no archives left! @@ -35,47 +29,6 @@ def test_delete_multiple(archivers, request): cmd(archiver, "rcreate", RK_ENCRYPTION) cmd(archiver, "create", "test1", "input") cmd(archiver, "create", "test2", "input") - cmd(archiver, "create", "test3", "input") cmd(archiver, "delete", "-a", "test1") cmd(archiver, "delete", "-a", "test2") - cmd(archiver, "extract", "test3", "--dry-run") - cmd(archiver, "delete", "-a", "test3") assert not cmd(archiver, "rlist") - - -def test_delete_force(archivers, request): - archiver = request.getfixturevalue(archivers) - cmd(archiver, "rcreate", "--encryption=none") - create_src_archive(archiver, "test") - with Repository3(archiver.repository_path, exclusive=True) as repository: - manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) - archive = Archive(manifest, "test") - for item in archive.iter_items(): - if item.path.endswith(src_file): - repository.delete(item.chunks[-1].id) - break - else: - assert False # missed the file - repository.commit(compact=False) - output = cmd(archiver, "delete", "-a", "test", "--force") - assert "deleted archive was corrupted" in output - - cmd(archiver, "check", "--repair") - output = cmd(archiver, "rlist") - assert "test" not in output - - -def test_delete_double_force(archivers, request): - archiver = request.getfixturevalue(archivers) - cmd(archiver, "rcreate", "--encryption=none") - create_src_archive(archiver, "test") - with Repository3(archiver.repository_path, exclusive=True) as repository: - manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) - archive = Archive(manifest, "test") - id = archive.metadata.items[0] - repository.put(id, b"corrupted items metadata stream chunk") - repository.commit(compact=False) - cmd(archiver, "delete", "-a", "test", "--force", "--force") - cmd(archiver, "check", "--repair") - output = cmd(archiver, "rlist") - assert "test" not in output