diff --git a/src/borg/archive.py b/src/borg/archive.py index 4ba885939..344a46505 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1660,7 +1660,9 @@ def check( self.check_all = not any((first, last, match, older, newer, oldest, newest)) self.repair = repair self.repository = repository - self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=True, cache_immediately=not repair) + # Repository.check already did a full repository-level check and has built and cached a fresh chunkindex - + # we can use that here, so we don't disable the caches (also no need to cache immediately, again): + self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=False, cache_immediately=False) self.key = self.make_key(repository) self.repo_objs = RepoObj(self.key) if verify_data: diff --git a/src/borg/archiver/check_cmd.py b/src/borg/archiver/check_cmd.py index 6f075e8f4..a7d0ea990 100644 --- a/src/borg/archiver/check_cmd.py +++ b/src/borg/archiver/check_cmd.py @@ -41,6 +41,7 @@ def do_check(self, args, repository): raise CommandError("--undelete-archives requires --repair argument.") if args.max_duration and not args.repo_only: # when doing a partial repo check, we can only check xxh64 hashes in repository files. + # archives check requires that a full repo check was done before and has built/cached a ChunkIndex. # also, there is no max_duration support in the archives check code anyway. raise CommandError("--repository-only is required for --max-duration support.") if not args.archives_only: @@ -77,8 +78,8 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser): the repository. The read data is checked by size and hash. Bit rot and other types of accidental damage can be detected this way. Running the repository check can be split into multiple partial checks using ``--max-duration``. - When checking a remote repository, please note that the checks run on the - server and do not cause significant network traffic. + When checking a ssh:// remote repository, please note that the checks run on + the server and do not cause significant network traffic. 2. Checking consistency and correctness of the archive metadata and optionally archive data (requires ``--verify-data``). This includes ensuring that the diff --git a/src/borg/repository.py b/src/borg/repository.py index a6a5c7c73..fc671861f 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -8,6 +8,7 @@ from .checksums import xxh64 from .constants import * # NOQA +from .hashindex import ChunkIndex, ChunkIndexEntry from .helpers import Error, ErrorWithTraceback, IntegrityError from .helpers import Location from .helpers import bin_to_hex, hex_to_bin @@ -306,6 +307,12 @@ def check_object(obj): t_start = time.monotonic() t_last_checkpoint = t_start objs_checked = objs_errors = 0 + chunks = ChunkIndex() + # we don't do refcounting anymore, neither we can know here whether any archive + # is using this object, but we assume that this is the case and set refcount to + # MAX_VALUE. As we don't do garbage collection here, this is not a problem. + # We also don't know the plaintext size, so we set it to 0. + init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0) infos = self.store.list("data") try: for info in infos: @@ -338,6 +345,12 @@ def check_object(obj): self.store.delete(key) else: log_error("reloading did help, inconsistent behaviour detected!") + if not (obj_corrupted and repair): + # add all existing objects to the index. + # borg check: the index may have corrupted objects (we did not delete them) + # borg check --repair: the index will only have non-corrupted objects. + id = hex_to_bin(info.name) + chunks[id] = init_entry now = time.monotonic() if now > t_last_checkpoint + 300: # checkpoint every 5 mins t_last_checkpoint = now @@ -353,6 +366,11 @@ def check_object(obj): self.store.delete("config/last-key-checked") except StoreObjectNotFound: pass + if not partial: + # if we did a full pass in one go, we built a complete, uptodate ChunkIndex, cache it! + from .cache import write_chunkindex_to_repo_cache + + write_chunkindex_to_repo_cache(self, chunks, compact=True, clear=True, force_write=True) except StoreObjectNotFound: # it can be that there is no "data/" at all, then it crashes when iterating infos. pass