1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-29 11:16:43 +00:00

Merge pull request #8468 from ThomasWaldmann/check-improvements

Check improvements
This commit is contained in:
TW 2024-10-09 19:04:47 +02:00 committed by GitHub
commit 3cae96cc99
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 24 additions and 3 deletions

View file

@ -1660,7 +1660,9 @@ def check(
self.check_all = not any((first, last, match, older, newer, oldest, newest))
self.repair = repair
self.repository = repository
self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=True, cache_immediately=not repair)
# Repository.check already did a full repository-level check and has built and cached a fresh chunkindex -
# we can use that here, so we don't disable the caches (also no need to cache immediately, again):
self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=False, cache_immediately=False)
self.key = self.make_key(repository)
self.repo_objs = RepoObj(self.key)
if verify_data:

View file

@ -41,6 +41,7 @@ def do_check(self, args, repository):
raise CommandError("--undelete-archives requires --repair argument.")
if args.max_duration and not args.repo_only:
# when doing a partial repo check, we can only check xxh64 hashes in repository files.
# archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
# also, there is no max_duration support in the archives check code anyway.
raise CommandError("--repository-only is required for --max-duration support.")
if not args.archives_only:
@ -77,8 +78,8 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser):
the repository. The read data is checked by size and hash. Bit rot and other
types of accidental damage can be detected this way. Running the repository
check can be split into multiple partial checks using ``--max-duration``.
When checking a remote repository, please note that the checks run on the
server and do not cause significant network traffic.
When checking a ssh:// remote repository, please note that the checks run on
the server and do not cause significant network traffic.
2. Checking consistency and correctness of the archive metadata and optionally
archive data (requires ``--verify-data``). This includes ensuring that the

View file

@ -8,6 +8,7 @@
from .checksums import xxh64
from .constants import * # NOQA
from .hashindex import ChunkIndex, ChunkIndexEntry
from .helpers import Error, ErrorWithTraceback, IntegrityError
from .helpers import Location
from .helpers import bin_to_hex, hex_to_bin
@ -306,6 +307,12 @@ def check_object(obj):
t_start = time.monotonic()
t_last_checkpoint = t_start
objs_checked = objs_errors = 0
chunks = ChunkIndex()
# we don't do refcounting anymore, neither we can know here whether any archive
# is using this object, but we assume that this is the case and set refcount to
# MAX_VALUE. As we don't do garbage collection here, this is not a problem.
# We also don't know the plaintext size, so we set it to 0.
init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
infos = self.store.list("data")
try:
for info in infos:
@ -338,6 +345,12 @@ def check_object(obj):
self.store.delete(key)
else:
log_error("reloading did help, inconsistent behaviour detected!")
if not (obj_corrupted and repair):
# add all existing objects to the index.
# borg check: the index may have corrupted objects (we did not delete them)
# borg check --repair: the index will only have non-corrupted objects.
id = hex_to_bin(info.name)
chunks[id] = init_entry
now = time.monotonic()
if now > t_last_checkpoint + 300: # checkpoint every 5 mins
t_last_checkpoint = now
@ -353,6 +366,11 @@ def check_object(obj):
self.store.delete("config/last-key-checked")
except StoreObjectNotFound:
pass
if not partial:
# if we did a full pass in one go, we built a complete, uptodate ChunkIndex, cache it!
from .cache import write_chunkindex_to_repo_cache
write_chunkindex_to_repo_cache(self, chunks, compact=True, clear=True, force_write=True)
except StoreObjectNotFound:
# it can be that there is no "data/" at all, then it crashes when iterating infos.
pass