1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-29 11:16:43 +00:00

delete: just remove archive from manifest, let borg compact clean up later.

much faster and easier now, similar to what borg delete --force --force used to do.

considering that speed, no need for checkpointing anymore.

--stats does not work that way, thus it was removed. borg compact now shows some stats.
This commit is contained in:
Thomas Waldmann 2024-08-13 18:58:32 +02:00
parent 17ea118155
commit 4c052cd65d
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
4 changed files with 41 additions and 262 deletions

View file

@ -1038,84 +1038,11 @@ def rename(self, name):
self.set_meta("name", name)
del self.manifest.archives[oldname]
def delete(self, stats, progress=False, forced=False):
class ChunksIndexError(Error):
"""Chunk ID {} missing from chunks index, corrupted chunks index - aborting transaction."""
exception_ignored = object()
def fetch_async_response(wait=True):
try:
return self.repository.async_response(wait=wait)
except Repository3.ObjectNotFound:
nonlocal error
# object not in repo - strange, but we wanted to delete it anyway.
if forced == 0:
raise
error = True
return exception_ignored # must not return None here
def chunk_decref(id, size, stats):
try:
self.cache.chunk_decref(id, size, stats, wait=False)
except KeyError:
nonlocal error
if forced == 0:
cid = bin_to_hex(id)
raise ChunksIndexError(cid)
error = True
else:
fetch_async_response(wait=False)
error = False
try:
unpacker = msgpack.Unpacker(use_list=False)
items_ids = self.metadata.items
pi = ProgressIndicatorPercent(
total=len(items_ids), msg="Decrementing references %3.0f%%", msgid="archive.delete"
)
for i, (items_id, data) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
if progress:
pi.show(i)
_, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM)
unpacker.feed(data)
chunk_decref(items_id, 1, stats)
try:
for item in unpacker:
item = Item(internal_dict=item)
if "chunks" in item:
for chunk_id, size in item.chunks:
chunk_decref(chunk_id, size, stats)
except (TypeError, ValueError):
# if items metadata spans multiple chunks and one chunk got dropped somehow,
# it could be that unpacker yields bad types
if forced == 0:
raise
error = True
if progress:
pi.finish()
except (msgpack.UnpackException, Repository3.ObjectNotFound):
# items metadata corrupted
if forced == 0:
raise
error = True
# delete the blocks that store all the references that end up being loaded into metadata.items:
for id in self.metadata.item_ptrs:
chunk_decref(id, 1, stats)
# in forced delete mode, we try hard to delete at least the manifest entry,
# if possible also the archive superblock, even if processing the items raises
# some harmless exception.
chunk_decref(self.id, 1, stats)
def delete(self):
# quick and dirty: we just nuke the archive from the archives list - that will
# potentially orphan all chunks previously referenced by the archive, except the ones also
# referenced by other archives. In the end, "borg compact" will clean up and free space.
del self.manifest.archives[self.name]
while fetch_async_response(wait=True) is not None:
# we did async deletes, process outstanding results (== exceptions),
# so there is nothing pending when we return and our caller wants to commit.
pass
if error:
logger.warning("forced deletion succeeded, but the deleted archive was corrupted.")
logger.warning("borg check --repair is required to free all space.")
@staticmethod
def compare_archives_iter(
@ -2501,7 +2428,7 @@ def save(self, archive, target, comment=None, replace_original=True):
target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata)
if replace_original:
archive.delete(Statistics(), progress=self.progress)
archive.delete()
target.rename(archive.name)
if self.stats:
target.start = _start

View file

@ -1,11 +1,9 @@
import argparse
import logging
from ._common import with_repository, Highlander
from ..archive import Archive, Statistics
from ..cache import Cache
from ._common import with_repository
from ..constants import * # NOQA
from ..helpers import log_multi, format_archive, sig_int, CommandError, Error
from ..helpers import format_archive, CommandError
from ..manifest import Manifest
from ..logger import create_logger
@ -29,67 +27,30 @@ def do_delete(self, args, repository):
"or just delete the whole repository (might be much faster)."
)
if args.forced == 2:
deleted = False
logger_list = logging.getLogger("borg.output.list")
for i, archive_name in enumerate(archive_names, 1):
try:
current_archive = manifest.archives.pop(archive_name)
except KeyError:
self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
else:
deleted = True
if self.output_list:
msg = "Would delete: {} ({}/{})" if dry_run else "Deleted archive: {} ({}/{})"
logger_list.info(msg.format(format_archive(current_archive), i, len(archive_names)))
if dry_run:
logger.info("Finished dry-run.")
elif deleted:
manifest.write()
# note: might crash in compact() after committing the repo
repository.commit(compact=False)
self.print_warning('Done. Run "borg check --repair" to clean up the mess.', wc=None)
deleted = False
logger_list = logging.getLogger("borg.output.list")
for i, archive_name in enumerate(archive_names, 1):
try:
# this does NOT use Archive.delete, so this code hopefully even works in cases a corrupt archive
# would make the code in class Archive crash, so the user can at least get rid of such archives.
current_archive = manifest.archives.pop(archive_name)
except KeyError:
self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
else:
self.print_warning("Aborted.", wc=None)
return
deleted = True
if self.output_list:
msg = "Would delete: {} ({}/{})" if dry_run else "Deleted archive: {} ({}/{})"
logger_list.info(msg.format(format_archive(current_archive), i, len(archive_names)))
if dry_run:
logger.info("Finished dry-run.")
elif deleted:
manifest.write()
repository.commit(compact=False)
self.print_warning('Done. Run "borg compact" to free space.', wc=None)
else:
self.print_warning("Aborted.", wc=None)
return
stats = Statistics(iec=args.iec)
with Cache(repository, manifest, progress=args.progress, lock_wait=self.lock_wait, iec=args.iec) as cache:
def checkpoint_func():
manifest.write()
repository.commit(compact=False)
cache.commit()
msg_delete = "Would delete archive: {} ({}/{})" if dry_run else "Deleting archive: {} ({}/{})"
msg_not_found = "Archive {} not found ({}/{})."
logger_list = logging.getLogger("borg.output.list")
uncommitted_deletes = 0
for i, archive_name in enumerate(archive_names, 1):
if sig_int and sig_int.action_done():
break
try:
archive_info = manifest.archives[archive_name]
except KeyError:
self.print_warning(msg_not_found.format(archive_name, i, len(archive_names)))
else:
if self.output_list:
logger_list.info(msg_delete.format(format_archive(archive_info), i, len(archive_names)))
if not dry_run:
archive = Archive(manifest, archive_name, cache=cache)
archive.delete(stats, progress=args.progress, forced=args.forced)
checkpointed = self.maybe_checkpoint(
checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval
)
uncommitted_deletes = 0 if checkpointed else (uncommitted_deletes + 1)
if sig_int:
# Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.
raise Error("Got Ctrl-C / SIGINT.")
elif uncommitted_deletes > 0:
checkpoint_func()
if args.stats:
log_multi(str(stats), logger=logging.getLogger("borg.output.stats"))
def build_parser_delete(self, subparsers, common_parser, mid_common_parser):
from ._common import process_epilog, define_archive_filters_group
@ -103,16 +64,9 @@ def build_parser_delete(self, subparsers, common_parser, mid_common_parser):
When in doubt, use ``--dry-run --list`` to see what would be deleted.
When using ``--stats``, you will get some statistics about how much data was
deleted - the "Deleted data" deduplicated size there is most interesting as
that is how much your repository will shrink.
Please note that the "All archives" stats refer to the state after deletion.
You can delete multiple archives by specifying a matching pattern,
using the ``--match-archives PATTERN`` option (for more info on these patterns,
see :ref:`borg_patterns`).
Always first use ``--dry-run --list`` to see what would be deleted.
"""
)
subparser = subparsers.add_parser(
@ -135,24 +89,4 @@ def build_parser_delete(self, subparsers, common_parser, mid_common_parser):
dest="consider_checkpoints",
help="consider checkpoint archives for deletion (default: not considered).",
)
subparser.add_argument(
"-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive"
)
subparser.add_argument(
"--force",
dest="forced",
action="count",
default=0,
help="force deletion of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.",
)
subparser.add_argument(
"-c",
"--checkpoint-interval",
metavar="SECONDS",
dest="checkpoint_interval",
type=int,
default=1800,
action=Highlander,
help="write checkpoint every SECONDS seconds (Default: 1800)",
)
define_archive_filters_group(subparser)

View file

@ -7,10 +7,10 @@
import re
from ._common import with_repository, Highlander
from ..archive import Archive, Statistics
from ..archive import Archive
from ..cache import Cache
from ..constants import * # NOQA
from ..helpers import ArchiveFormatter, interval, sig_int, log_multi, ProgressIndicatorPercent, CommandError, Error
from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error
from ..manifest import Manifest
from ..logger import create_logger
@ -127,14 +127,7 @@ def do_prune(self, args, repository, manifest):
keep += prune_split(archives, rule, num, kept_because)
to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
stats = Statistics(iec=args.iec)
with Cache(repository, manifest, lock_wait=self.lock_wait, iec=args.iec) as cache:
def checkpoint_func():
manifest.write()
repository.commit(compact=False)
cache.commit()
list_logger = logging.getLogger("borg.output.list")
# set up counters for the progress display
to_delete_len = len(to_delete)
@ -152,11 +145,8 @@ def checkpoint_func():
archives_deleted += 1
log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len)
archive = Archive(manifest, archive.name, cache)
archive.delete(stats, forced=args.forced)
checkpointed = self.maybe_checkpoint(
checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval
)
uncommitted_deletes = 0 if checkpointed else (uncommitted_deletes + 1)
archive.delete()
uncommitted_deletes += 1
else:
if is_checkpoint(archive.name):
log_message = "Keeping checkpoint archive:"
@ -172,12 +162,11 @@ def checkpoint_func():
list_logger.info(f"{log_message:<44} {formatter.format_item(archive, jsonline=False)}")
pi.finish()
if sig_int:
# Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.
raise Error("Got Ctrl-C / SIGINT.")
elif uncommitted_deletes > 0:
checkpoint_func()
if args.stats:
log_multi(str(stats), logger=logging.getLogger("borg.output.stats"))
manifest.write()
repository.commit(compact=False)
cache.commit()
def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
from ._common import process_epilog
@ -235,11 +224,6 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
keep the last N archives under the assumption that you do not create more than one
backup archive in the same second).
When using ``--stats``, you will get some statistics about how much data was
deleted - the "Deleted data" deduplicated size there is most interesting as
that is how much your repository will shrink.
Please note that the "All archives" stats refer to the state after pruning.
You can influence how the ``--list`` output is formatted by using the ``--short``
option (less wide output) or by giving a custom format using ``--format`` (see
the ``borg rlist`` description for more details about the format string).
@ -256,15 +240,6 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
)
subparser.set_defaults(func=self.do_prune)
subparser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", help="do not change repository")
subparser.add_argument(
"--force",
dest="forced",
action="store_true",
help="force pruning of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.",
)
subparser.add_argument(
"-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive"
)
subparser.add_argument(
"--list", dest="output_list", action="store_true", help="output verbose list of archives it keeps/prunes"
)
@ -353,13 +328,3 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
help="number of yearly archives to keep",
)
define_archive_filters_group(subparser, sort_by=False, first_last=False)
subparser.add_argument(
"-c",
"--checkpoint-interval",
metavar="SECONDS",
dest="checkpoint_interval",
type=int,
default=1800,
action=Highlander,
help="write checkpoint every SECONDS seconds (Default: 1800)",
)

View file

@ -1,13 +1,10 @@
from ...archive import Archive
from ...constants import * # NOQA
from ...manifest import Manifest
from ...repository3 import Repository3
from . import cmd, create_regular_file, src_file, create_src_archive, generate_archiver_tests, RK_ENCRYPTION
from . import cmd, create_regular_file, generate_archiver_tests, RK_ENCRYPTION
pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA
def test_delete(archivers, request):
def test_delete_options(archivers, request):
archiver = request.getfixturevalue(archivers)
create_regular_file(archiver.input_path, "file1", size=1024 * 80)
create_regular_file(archiver.input_path, "dir2/file2", size=1024 * 80)
@ -17,14 +14,11 @@ def test_delete(archivers, request):
cmd(archiver, "create", "test.3", "input")
cmd(archiver, "create", "another_test.1", "input")
cmd(archiver, "create", "another_test.2", "input")
cmd(archiver, "extract", "test", "--dry-run")
cmd(archiver, "extract", "test.2", "--dry-run")
cmd(archiver, "delete", "--match-archives", "sh:another_*")
cmd(archiver, "delete", "--last", "1")
cmd(archiver, "delete", "--last", "1") # test.3
cmd(archiver, "delete", "-a", "test")
cmd(archiver, "extract", "test.2", "--dry-run")
output = cmd(archiver, "delete", "-a", "test.2", "--stats")
assert "Original size: -" in output # negative size == deleted data
cmd(archiver, "extract", "test.2", "--dry-run") # still there?
cmd(archiver, "delete", "-a", "test.2")
output = cmd(archiver, "rlist")
assert output == "" # no archives left!
@ -35,47 +29,6 @@ def test_delete_multiple(archivers, request):
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "create", "test1", "input")
cmd(archiver, "create", "test2", "input")
cmd(archiver, "create", "test3", "input")
cmd(archiver, "delete", "-a", "test1")
cmd(archiver, "delete", "-a", "test2")
cmd(archiver, "extract", "test3", "--dry-run")
cmd(archiver, "delete", "-a", "test3")
assert not cmd(archiver, "rlist")
def test_delete_force(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "rcreate", "--encryption=none")
create_src_archive(archiver, "test")
with Repository3(archiver.repository_path, exclusive=True) as repository:
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive = Archive(manifest, "test")
for item in archive.iter_items():
if item.path.endswith(src_file):
repository.delete(item.chunks[-1].id)
break
else:
assert False # missed the file
repository.commit(compact=False)
output = cmd(archiver, "delete", "-a", "test", "--force")
assert "deleted archive was corrupted" in output
cmd(archiver, "check", "--repair")
output = cmd(archiver, "rlist")
assert "test" not in output
def test_delete_double_force(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "rcreate", "--encryption=none")
create_src_archive(archiver, "test")
with Repository3(archiver.repository_path, exclusive=True) as repository:
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive = Archive(manifest, "test")
id = archive.metadata.items[0]
repository.put(id, b"corrupted items metadata stream chunk")
repository.commit(compact=False)
cmd(archiver, "delete", "-a", "test", "--force", "--force")
cmd(archiver, "check", "--repair")
output = cmd(archiver, "rlist")
assert "test" not in output