mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-29 11:16:43 +00:00
Merge pull request #8468 from ThomasWaldmann/check-improvements
Check improvements
This commit is contained in:
commit
3cae96cc99
3 changed files with 24 additions and 3 deletions
|
@ -1660,7 +1660,9 @@ def check(
|
|||
self.check_all = not any((first, last, match, older, newer, oldest, newest))
|
||||
self.repair = repair
|
||||
self.repository = repository
|
||||
self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=True, cache_immediately=not repair)
|
||||
# Repository.check already did a full repository-level check and has built and cached a fresh chunkindex -
|
||||
# we can use that here, so we don't disable the caches (also no need to cache immediately, again):
|
||||
self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=False, cache_immediately=False)
|
||||
self.key = self.make_key(repository)
|
||||
self.repo_objs = RepoObj(self.key)
|
||||
if verify_data:
|
||||
|
|
|
@ -41,6 +41,7 @@ def do_check(self, args, repository):
|
|||
raise CommandError("--undelete-archives requires --repair argument.")
|
||||
if args.max_duration and not args.repo_only:
|
||||
# when doing a partial repo check, we can only check xxh64 hashes in repository files.
|
||||
# archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
|
||||
# also, there is no max_duration support in the archives check code anyway.
|
||||
raise CommandError("--repository-only is required for --max-duration support.")
|
||||
if not args.archives_only:
|
||||
|
@ -77,8 +78,8 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser):
|
|||
the repository. The read data is checked by size and hash. Bit rot and other
|
||||
types of accidental damage can be detected this way. Running the repository
|
||||
check can be split into multiple partial checks using ``--max-duration``.
|
||||
When checking a remote repository, please note that the checks run on the
|
||||
server and do not cause significant network traffic.
|
||||
When checking a ssh:// remote repository, please note that the checks run on
|
||||
the server and do not cause significant network traffic.
|
||||
|
||||
2. Checking consistency and correctness of the archive metadata and optionally
|
||||
archive data (requires ``--verify-data``). This includes ensuring that the
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
from .checksums import xxh64
|
||||
from .constants import * # NOQA
|
||||
from .hashindex import ChunkIndex, ChunkIndexEntry
|
||||
from .helpers import Error, ErrorWithTraceback, IntegrityError
|
||||
from .helpers import Location
|
||||
from .helpers import bin_to_hex, hex_to_bin
|
||||
|
@ -306,6 +307,12 @@ def check_object(obj):
|
|||
t_start = time.monotonic()
|
||||
t_last_checkpoint = t_start
|
||||
objs_checked = objs_errors = 0
|
||||
chunks = ChunkIndex()
|
||||
# we don't do refcounting anymore, neither we can know here whether any archive
|
||||
# is using this object, but we assume that this is the case and set refcount to
|
||||
# MAX_VALUE. As we don't do garbage collection here, this is not a problem.
|
||||
# We also don't know the plaintext size, so we set it to 0.
|
||||
init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
|
||||
infos = self.store.list("data")
|
||||
try:
|
||||
for info in infos:
|
||||
|
@ -338,6 +345,12 @@ def check_object(obj):
|
|||
self.store.delete(key)
|
||||
else:
|
||||
log_error("reloading did help, inconsistent behaviour detected!")
|
||||
if not (obj_corrupted and repair):
|
||||
# add all existing objects to the index.
|
||||
# borg check: the index may have corrupted objects (we did not delete them)
|
||||
# borg check --repair: the index will only have non-corrupted objects.
|
||||
id = hex_to_bin(info.name)
|
||||
chunks[id] = init_entry
|
||||
now = time.monotonic()
|
||||
if now > t_last_checkpoint + 300: # checkpoint every 5 mins
|
||||
t_last_checkpoint = now
|
||||
|
@ -353,6 +366,11 @@ def check_object(obj):
|
|||
self.store.delete("config/last-key-checked")
|
||||
except StoreObjectNotFound:
|
||||
pass
|
||||
if not partial:
|
||||
# if we did a full pass in one go, we built a complete, uptodate ChunkIndex, cache it!
|
||||
from .cache import write_chunkindex_to_repo_cache
|
||||
|
||||
write_chunkindex_to_repo_cache(self, chunks, compact=True, clear=True, force_write=True)
|
||||
except StoreObjectNotFound:
|
||||
# it can be that there is no "data/" at all, then it crashes when iterating infos.
|
||||
pass
|
||||
|
|
Loading…
Reference in a new issue