1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-21 23:33:07 +00:00

check --find-lost-archives (was: --undelete-archives)

Consider soft-deleted archives/ directory entries, but only create a new
archives/ directory entry if:
- there is no entry for that archive ID
- there is no soft-deleted entry for that archive ID either

Support running with or without --repair.

Without --repair, it can be used to detect such inconsistencies and return with rc != 0.

--repository-only contradicts --find-lost-archives.
This commit is contained in:
Thomas Waldmann 2024-11-02 18:10:12 +01:00
parent c35cbc9028
commit a48a8d2bea
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
4 changed files with 48 additions and 30 deletions

View file

@ -1635,7 +1635,7 @@ def check(
*,
verify_data=False,
repair=False,
undelete_archives=False,
find_lost_archives=False,
match=None,
sort_by="",
first=0,
@ -1648,7 +1648,7 @@ def check(
"""Perform a set of checks on 'repository'
:param repair: enable repair mode, write updated or corrected data into repository
:param undelete_archives: create archive directory entries that are missing
:param find_lost_archives: create archive directory entries that are missing
:param first/last/sort_by: only check this number of first/last archives ordered by sort_by
:param match: only check archives matching this pattern
:param older/newer: only check archives older/newer than timedelta from now
@ -1685,7 +1685,7 @@ def check(
rebuild_manifest = True
if rebuild_manifest:
self.manifest = self.rebuild_manifest()
if undelete_archives:
if find_lost_archives:
self.rebuild_archives_directory()
self.rebuild_archives(
match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
@ -1815,8 +1815,10 @@ def rebuild_archives_directory(self):
"""Rebuild the archives directory, undeleting archives.
Iterates through all objects in the repository looking for archive metadata blocks.
When finding some that do not have a corresponding archives directory entry, it will
create that entry (undeleting all archives).
When finding some that do not have a corresponding archives directory entry (either
a normal entry for an "existing" archive, or a soft-deleted entry for a "deleted"
archive), it will create that entry (making the archives directory consistent with
the repository).
"""
def valid_archive(obj):
@ -1862,12 +1864,18 @@ def valid_archive(obj):
archive = ArchiveItem(internal_dict=archive)
name = archive.name
archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id)
logger.info(f"Found archive {name} {archive_id_hex}.")
if self.manifest.archives.exists_name_and_id(name, archive_id):
logger.info("We already have an archives directory entry for this.")
if self.manifest.archives.exists_id(archive_id, deleted=False):
logger.debug(f"We already have an archives directory entry for {name} {archive_id_hex}.")
elif self.manifest.archives.exists_id(archive_id, deleted=True):
logger.debug(f"We already have a deleted archives directory entry for {name} {archive_id_hex}.")
else:
logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
self.manifest.archives.create(name, archive_id, archive.time)
self.error_found = True
if self.repair:
logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
self.manifest.archives.create(name, archive_id, archive.time)
else:
logger.warning(f"Would create archives directory entry for {name} {archive_id_hex}.")
pi.finish()
logger.info("Rebuilding missing archives directory entries completed.")

View file

@ -35,10 +35,10 @@ def do_check(self, args, repository):
raise CommandError(
"--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments."
)
if args.repo_only and args.find_lost_archives:
raise CommandError("--repository-only contradicts the --find-lost-archives option.")
if args.repair and args.max_duration:
raise CommandError("--repair does not allow --max-duration argument.")
if args.undelete_archives and not args.repair:
raise CommandError("--undelete-archives requires --repair argument.")
if args.max_duration and not args.repo_only:
# when doing a partial repo check, we can only check xxh64 hashes in repository files.
# archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
@ -51,7 +51,7 @@ def do_check(self, args, repository):
repository,
verify_data=args.verify_data,
repair=args.repair,
undelete_archives=args.undelete_archives,
find_lost_archives=args.find_lost_archives,
match=args.match_archives,
sort_by=args.sort_by or "ts",
first=args.first,
@ -180,11 +180,12 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser):
Consequently, if lost chunks were repaired earlier, it is advised to run
``--repair`` a second time after creating some new backups.
If ``--repair --undelete-archives`` is given, Borg will scan the repository
If ``--repair --find-lost-archives`` is given, Borg will scan the repository
for archive metadata and if it finds some where no corresponding archives
directory entry exists, it will create the entries. This is basically undoing
``borg delete archive`` or ``borg prune ...`` commands and only possible before
``borg compact`` would remove the archives' data completely.
directory entry exists, it will create one.
This will make archives reappear for which the directory entry was lost.
This is only possible before ``borg compact`` would remove the archives'
data completely.
"""
)
subparser = subparsers.add_parser(
@ -213,10 +214,7 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser):
"--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found"
)
subparser.add_argument(
"--undelete-archives",
dest="undelete_archives",
action="store_true",
help="attempt to undelete archives (use with --repair)",
"--find-lost-archives", dest="find_lost_archives", action="store_true", help="attempt to find lost archives"
)
subparser.add_argument(
"--max-duration",

View file

@ -222,6 +222,14 @@ def exists(self, name):
else:
return name in self._archives
def exists_id(self, id, *, deleted=False):
# check if an archive with this id exists
assert isinstance(id, bytes)
if not self.legacy:
return id in self.ids(deleted=deleted)
else:
raise NotImplementedError
def exists_name_and_id(self, name, id):
# check if an archive with this name AND id exists
assert isinstance(name, str)

View file

@ -1,4 +1,5 @@
from datetime import datetime, timezone, timedelta
from pathlib import Path
import shutil
from unittest.mock import patch
@ -270,18 +271,21 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request):
def test_check_undelete_archives(archivers, request):
archiver = request.getfixturevalue(archivers)
check_cmd_setup(archiver) # creates archive1 and archive2
# borg delete does it rather quick and dirty: it only kills the archives directory entry
cmd(archiver, "delete", "archive1")
cmd(archiver, "delete", "archive2")
output = cmd(archiver, "repo-list")
assert "archive1" not in output
assert "archive2" not in output
# borg check will re-discover archive1 and archive2 and new archives directory entries
# will be created because we requested undeleting archives.
cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0)
existing_archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines())
create_src_archive(archiver, "archive3")
archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines())
new_archive_id_hex = (archive_ids - existing_archive_ids).pop()
(Path(archiver.repository_path) / "archives" / new_archive_id_hex).unlink() # lose the entry for archive3
output = cmd(archiver, "repo-list")
assert "archive1" in output
assert "archive2" in output
assert "archive3" not in output
# borg check will re-discover archive3 and create a new archives directory entry.
cmd(archiver, "check", "--repair", "--find-lost-archives", exit_code=0)
output = cmd(archiver, "repo-list")
assert "archive1" in output
assert "archive2" in output
assert "archive3" in output
def test_spoofed_archive(archivers, request):