Merge pull request #7846 from ThomasWaldmann/files-cache-with-size

files cache with size
2025-01-01 12:45:34 +00:00 · 2024-07-18 23:33:10 +02:00 · 2024-07-18 23:33:10 +02:00 · 66b62c6fc9
commit 66b62c6fc9
parent 55bf40db3b 619a06a5ba
20 changed files with 681 additions and 486 deletions
--- a/docs/usage/general/environment.rst.inc
+++ b/docs/usage/general/environment.rst.inc
@ -84,6 +84,18 @@ General:
        - ``pyfuse3``: only try to load pyfuse3
        - ``llfuse``: only try to load llfuse
        - ``none``: do not try to load an implementation
+    BORG_CACHE_IMPL
+        Choose the implementation for the clientside cache, choose one of:
+
+        - ``local``: uses a persistent chunks cache and keeps it in a perfect state (precise refcounts and
+          sizes), requiring a potentially resource expensive cache sync in multi-client scenarios.
+          Also has a persistent files cache.
+        - ``adhoc``: builds a non-persistent chunks cache by querying the repo. Chunks cache contents
+          are somewhat sloppy for already existing chunks, concerning their refcount ("infinite") and
+          size (0). No files cache (slow, will chunk all input files). DEPRECATED.
+        - ``adhocwithfiles``: Like ``adhoc``, but with a persistent files cache. Default implementation.
+        - ``cli``: Determine the cache implementation from cli options. Without special options, will
+          usually end up with the ``local`` implementation.
    BORG_SELFTEST
        This can be used to influence borg's builtin self-tests. The default is to execute the tests
        at the beginning of each borg command invocation.
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -643,14 +643,14 @@ def write_checkpoint(self):
        # so we can already remove it here, the next .save() will then commit this cleanup.
        # remove its manifest entry, remove its ArchiveItem chunk, remove its item_ptrs chunks:
        del self.manifest.archives[self.checkpoint_name]
-        self.cache.chunk_decref(self.id, self.stats)
+        self.cache.chunk_decref(self.id, 1, self.stats)
        for id in metadata.item_ptrs:
-            self.cache.chunk_decref(id, self.stats)
+            self.cache.chunk_decref(id, 1, self.stats)
        # also get rid of that part item, we do not want to have it in next checkpoint or final archive
        tail_chunks = self.items_buffer.restore_chunks_state()
        # tail_chunks contain the tail of the archive items metadata stream, not needed for next commit.
        for id in tail_chunks:
-            self.cache.chunk_decref(id, self.stats)
+            self.cache.chunk_decref(id, 1, self.stats)  # TODO can we have real size here?

    def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
        name = name or self.name
@ -1024,7 +1024,7 @@ def set_meta(self, key, value):
        new_id = self.key.id_hash(data)
        self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
        self.manifest.archives[self.name] = (new_id, metadata.time)
-        self.cache.chunk_decref(self.id, self.stats)
+        self.cache.chunk_decref(self.id, 1, self.stats)
        self.id = new_id

    def rename(self, name):
@ -1052,12 +1052,15 @@ def fetch_async_response(wait=True):
                error = True
                return exception_ignored  # must not return None here

-        def chunk_decref(id, stats):
+        def chunk_decref(id, size, stats):
            try:
-                self.cache.chunk_decref(id, stats, wait=False)
+                self.cache.chunk_decref(id, size, stats, wait=False)
            except KeyError:
-                cid = bin_to_hex(id)
-                raise ChunksIndexError(cid)
+                nonlocal error
+                if forced == 0:
+                    cid = bin_to_hex(id)
+                    raise ChunksIndexError(cid)
+                error = True
            else:
                fetch_async_response(wait=False)

@ -1073,13 +1076,13 @@ def chunk_decref(id, stats):
                    pi.show(i)
                _, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM)
                unpacker.feed(data)
-                chunk_decref(items_id, stats)
+                chunk_decref(items_id, 1, stats)
                try:
                    for item in unpacker:
                        item = Item(internal_dict=item)
                        if "chunks" in item:
                            for chunk_id, size in item.chunks:
-                                chunk_decref(chunk_id, stats)
+                                chunk_decref(chunk_id, size, stats)
                except (TypeError, ValueError):
                    # if items metadata spans multiple chunks and one chunk got dropped somehow,
                    # it could be that unpacker yields bad types
@ -1096,12 +1099,12 @@ def chunk_decref(id, stats):

        # delete the blocks that store all the references that end up being loaded into metadata.items:
        for id in self.metadata.item_ptrs:
-            chunk_decref(id, stats)
+            chunk_decref(id, 1, stats)

        # in forced delete mode, we try hard to delete at least the manifest entry,
        # if possible also the archive superblock, even if processing the items raises
        # some harmless exception.
-        chunk_decref(self.id, stats)
+        chunk_decref(self.id, 1, stats)
        del self.manifest.archives[self.name]
        while fetch_async_response(wait=True) is not None:
            # we did async deletes, process outstanding results (== exceptions),
@ -1510,7 +1513,7 @@ def process_pipe(self, *, path, cache, fd, mode, user=None, group=None):
        except BackupOSError:
            # see comments in process_file's exception handler, same issue here.
            for chunk in item.get("chunks", []):
-                cache.chunk_decref(chunk.id, self.stats, wait=False)
+                cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
            raise
        else:
            item.get_size(memorize=True)
@ -1544,7 +1547,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
                        item.chunks = []
                        for chunk_id, chunk_size in hl_chunks:
                            # process one-by-one, so we will know in item.chunks how far we got
-                            chunk_entry = cache.chunk_incref(chunk_id, self.stats)
+                            chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats)
                            item.chunks.append(chunk_entry)
                    else:  # normal case, no "2nd+" hardlink
                        if not is_special_file:
@ -1552,26 +1555,26 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
                            started_hashing = time.monotonic()
                            path_hash = self.key.id_hash(hashed_path)
                            self.stats.hashing_time += time.monotonic() - started_hashing
-                            known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
+                            known, chunks = cache.file_known_and_unchanged(hashed_path, path_hash, st)
                        else:
                            # in --read-special mode, we may be called for special files.
                            # there should be no information in the cache about special files processed in
                            # read-special mode, but we better play safe as this was wrong in the past:
                            hashed_path = path_hash = None
-                            known, ids = False, None
-                        if ids is not None:
+                            known, chunks = False, None
+                        if chunks is not None:
                            # Make sure all ids are available
-                            for id_ in ids:
-                                if not cache.seen_chunk(id_):
+                            for chunk in chunks:
+                                if not cache.seen_chunk(chunk.id):
                                    # cache said it is unmodified, but we lost a chunk: process file like modified
                                    status = "M"
                                    break
                            else:
                                item.chunks = []
-                                for chunk_id in ids:
+                                for chunk in chunks:
                                    # process one-by-one, so we will know in item.chunks how far we got
-                                    chunk_entry = cache.chunk_incref(chunk_id, self.stats)
-                                    item.chunks.append(chunk_entry)
+                                    cache.chunk_incref(chunk.id, chunk.size, self.stats)
+                                    item.chunks.append(chunk)
                                status = "U"  # regular file, unchanged
                        else:
                            status = "M" if known else "A"  # regular file, modified or added
@ -1606,7 +1609,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
                                # block or char device will change without its mtime/size/inode changing.
                                # also, we must not memorize a potentially inconsistent/corrupt file that
                                # changed while we backed it up.
-                                cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
+                                cache.memorize_file(hashed_path, path_hash, st, item.chunks)
                        self.stats.files_stats[status] += 1  # must be done late
                        if not changed_while_backup:
                            status = None  # we already called print_file_status
@ -1620,7 +1623,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
                    # but we will not add an item (see add_item in create_helper) and thus
                    # they would be orphaned chunks in case that we commit the transaction.
                    for chunk in item.get("chunks", []):
-                        cache.chunk_decref(chunk.id, self.stats, wait=False)
+                        cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
                    # Now that we have cleaned up the chunk references, we can re-raise the exception.
                    # This will skip processing of this file, but might retry or continue with the next one.
                    raise
@ -1731,7 +1734,7 @@ def process_file(self, *, tarinfo, status, type, tar):
            except BackupOSError:
                # see comment in FilesystemObjectProcessors.process_file, same issue here.
                for chunk in item.get("chunks", []):
-                    self.cache.chunk_decref(chunk.id, self.stats, wait=False)
+                    self.cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
                raise


@ -2328,10 +2331,10 @@ def orphan_chunks_check(self):
            unused = {id_ for id_, entry in self.chunks.iteritems() if entry.refcount == 0}
            orphaned = unused - self.possibly_superseded
            if orphaned:
-                logger.error(f"{len(orphaned)} orphaned objects found!")
+                logger.info(f"{len(orphaned)} orphaned (unused) objects found.")
                for chunk_id in orphaned:
                    logger.debug(f"chunk {bin_to_hex(chunk_id)} is orphaned.")
-                self.error_found = True
+                # To support working with AdHocCache or AdHocWithFilesCache, we do not set self.error_found = True.
            if self.repair and unused:
                logger.info(
                    "Deleting %d orphaned and %d superseded objects..." % (len(orphaned), len(self.possibly_superseded))
@ -2444,7 +2447,7 @@ def process_item(self, archive, target, item):
    def process_chunks(self, archive, target, item):
        if not target.recreate_rechunkify:
            for chunk_id, size in item.chunks:
-                self.cache.chunk_incref(chunk_id, target.stats)
+                self.cache.chunk_incref(chunk_id, size, target.stats)
            return item.chunks
        chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
        chunk_processor = partial(self.chunk_processor, target)
@ -2452,8 +2455,9 @@ def process_chunks(self, archive, target, item):

    def chunk_processor(self, target, chunk):
        chunk_id, data = cached_hash(chunk, self.key.id_hash)
+        size = len(data)
        if chunk_id in self.seen_chunks:
-            return self.cache.chunk_incref(chunk_id, target.stats)
+            return self.cache.chunk_incref(chunk_id, size, target.stats)
        chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
        self.cache.repository.async_response(wait=False)
        self.seen_chunks.add(chunk_entry.id)
--- a/src/borg/archiver/config_cmd.py
+++ b/src/borg/archiver/config_cmd.py
@ -5,7 +5,6 @@
 from ..cache import Cache, assert_secure
 from ..constants import *  # NOQA
 from ..helpers import Error, CommandError
-from ..helpers import Location
 from ..helpers import parse_file_size, hex_to_bin
 from ..manifest import Manifest

@ -52,11 +51,8 @@ def repo_validate(section, name, value=None, check_value=True):
        def cache_validate(section, name, value=None, check_value=True):
            if section not in ["cache"]:
                raise ValueError("Invalid section")
-            if name in ["previous_location"]:
-                if check_value:
-                    Location(value)
-            else:
-                raise ValueError("Invalid name")
+            # currently, we do not support setting anything in the cache via borg config.
+            raise ValueError("Invalid name")

        def list_config(config):
            default_values = {
--- a/src/borg/archiver/create_cmd.py
+++ b/src/borg/archiver/create_cmd.py
@ -224,7 +224,9 @@ def create_inner(archive, cache, fso):
                manifest,
                progress=args.progress,
                lock_wait=self.lock_wait,
-                permit_adhoc_cache=args.no_cache_sync,
+                no_cache_sync_permitted=args.no_cache_sync,
+                no_cache_sync_forced=args.no_cache_sync_forced,
+                prefer_adhoc_cache=args.prefer_adhoc_cache,
                cache_mode=args.files_cache_mode,
                iec=args.iec,
            ) as cache:
@ -801,7 +803,19 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
            "--no-cache-sync",
            dest="no_cache_sync",
            action="store_true",
-            help="experimental: do not synchronize the cache. Implies not using the files cache.",
+            help="experimental: do not synchronize the chunks cache.",
+        )
+        subparser.add_argument(
+            "--no-cache-sync-forced",
+            dest="no_cache_sync_forced",
+            action="store_true",
+            help="experimental: do not synchronize the chunks cache (forced).",
+        )
+        subparser.add_argument(
+            "--prefer-adhoc-cache",
+            dest="prefer_adhoc_cache",
+            action="store_true",
+            help="experimental: prefer AdHocCache (w/o files cache) over AdHocWithFilesCache (with files cache).",
        )
        subparser.add_argument(
            "--stdin-name",
--- a/src/borg/archiver/rinfo_cmd.py
+++ b/src/borg/archiver/rinfo_cmd.py
@ -59,16 +59,9 @@ def do_rinfo(self, args, repository, manifest, cache):
                output += f" out of {format_file_size(storage_quota, iec=args.iec)}"
            output += "\n"

-            output += (
-                textwrap.dedent(
-                    """
-                    Cache: {cache.path}
-                    Security dir: {security_dir}
-                    """
-                )
-                .strip()
-                .format(**info)
-            )
+            if hasattr(info["cache"], "path"):
+                output += "Cache: {cache.path}\n".format(**info)
+            output += "Security dir: {security_dir}\n".format(**info)

            print(output)
            print(str(cache))
--- a/src/borg/archiver/transfer_cmd.py
+++ b/src/borg/archiver/transfer_cmd.py
@ -143,7 +143,7 @@ def do_transfer(self, args, *, repository, manifest, cache, other_repository=Non
                                transfer_size += size
                            else:
                                if not dry_run:
-                                    chunk_entry = cache.chunk_incref(chunk_id, archive.stats)
+                                    chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats)
                                    chunks.append(chunk_entry)
                                present_size += size
                        if not dry_run:
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@ -1184,13 +1184,13 @@ def default(self, o):
        from ..repository import Repository
        from ..remote import RemoteRepository
        from ..archive import Archive
-        from ..cache import LocalCache, AdHocCache
+        from ..cache import LocalCache, AdHocCache, AdHocWithFilesCache

        if isinstance(o, Repository) or isinstance(o, RemoteRepository):
            return {"id": bin_to_hex(o.id), "location": o._location.canonical_path()}
        if isinstance(o, Archive):
            return o.info()
-        if isinstance(o, LocalCache):
+        if isinstance(o, (LocalCache, AdHocWithFilesCache)):
            return {"path": o.path, "stats": o.stats()}
        if isinstance(o, AdHocCache):
            return {"stats": o.stats()}
--- a/src/borg/testsuite/archiver/init.py
+++ b/src/borg/testsuite/archiver/init.py
@ -18,7 +18,7 @@
 from ... import xattr, platform
 from ...archive import Archive
 from ...archiver import Archiver, PURE_PYTHON_MSGPACK_WARNING
-from ...cache import Cache
+from ...cache import Cache, LocalCache
 from ...constants import *  # NOQA
 from ...helpers import Location, umount
 from ...helpers import EXIT_SUCCESS
@ -356,9 +356,15 @@ def check_cache(archiver):
        manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
        with Cache(repository, manifest, sync=False) as cache:
            original_chunks = cache.chunks
+            # the LocalCache implementation has an on-disk chunks cache,
+            # but AdHocWithFilesCache and AdHocCache don't have persistent chunks cache.
+            persistent = isinstance(cache, LocalCache)
        Cache.destroy(repository)
        with Cache(repository, manifest) as cache:
            correct_chunks = cache.chunks
+    if not persistent:
+        # there is no point in doing the checks
+        return
    assert original_chunks is not correct_chunks
    seen = set()
    for id, (refcount, size) in correct_chunks.iteritems():
--- a/src/borg/testsuite/archiver/check_cmd.py
+++ b/src/borg/testsuite/archiver/check_cmd.py
@ -338,10 +338,11 @@ def test_extra_chunks(archivers, request):
    with Repository(archiver.repository_location, exclusive=True) as repository:
        repository.put(b"01234567890123456789012345678901", b"xxxx")
        repository.commit(compact=False)
-    cmd(archiver, "check", exit_code=1)
-    cmd(archiver, "check", exit_code=1)
+    output = cmd(archiver, "check", "-v", exit_code=0)  # orphans are not considered warnings anymore
+    assert "1 orphaned (unused) objects found." in output
    cmd(archiver, "check", "--repair", exit_code=0)
-    cmd(archiver, "check", exit_code=0)
+    output = cmd(archiver, "check", "-v", exit_code=0)
+    assert "orphaned (unused) objects found." not in output
    cmd(archiver, "extract", "archive1", "--dry-run", exit_code=0)


--- a/src/borg/testsuite/archiver/checks.py
+++ b/src/borg/testsuite/archiver/checks.py
@ -4,7 +4,7 @@

 import pytest

-from ...cache import Cache, LocalCache
+from ...cache import Cache, LocalCache, get_cache_impl
 from ...constants import *  # NOQA
 from ...helpers import Location, get_security_dir, bin_to_hex
 from ...helpers import EXIT_ERROR
@ -153,32 +153,29 @@ def test_repository_move(archivers, request, monkeypatch):
    security_dir = get_security_directory(archiver.repository_path)
    os.replace(archiver.repository_path, archiver.repository_path + "_new")
    archiver.repository_location += "_new"
+    # borg should notice that the repository location changed and abort.
+    if archiver.FORK_DEFAULT:
+        cmd(archiver, "rinfo", exit_code=EXIT_ERROR)
+    else:
+        with pytest.raises(Cache.RepositoryAccessAborted):
+            cmd(archiver, "rinfo")
+    # if we explicitly allow relocated repos, it should work fine.
    monkeypatch.setenv("BORG_RELOCATED_REPO_ACCESS_IS_OK", "yes")
    cmd(archiver, "rinfo")
    monkeypatch.delenv("BORG_RELOCATED_REPO_ACCESS_IS_OK")
    with open(os.path.join(security_dir, "location")) as fd:
        location = fd.read()
        assert location == Location(archiver.repository_location).canonical_path()
-    # Needs no confirmation anymore
-    cmd(archiver, "rinfo")
-    shutil.rmtree(archiver.cache_path)
+    # after new repo location was confirmed once, it needs no further confirmation anymore.
    cmd(archiver, "rinfo")
    shutil.rmtree(security_dir)
+    # it also needs no confirmation if we have no knowledge about the previous location.
    cmd(archiver, "rinfo")
+    # it will re-create security-related infos in the security dir:
    for file in ("location", "key-type", "manifest-timestamp"):
        assert os.path.exists(os.path.join(security_dir, file))


-def test_security_dir_compat(archivers, request):
-    archiver = request.getfixturevalue(archivers)
-    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    with open(os.path.join(get_security_directory(archiver.repository_path), "location"), "w") as fd:
-        fd.write("something outdated")
-    # This is fine, because the cache still has the correct information. security_dir and cache can disagree
-    # if older versions are used to confirm a renamed repository.
-    cmd(archiver, "rinfo")
-
-
 def test_unknown_unencrypted(archivers, request, monkeypatch):
    archiver = request.getfixturevalue(archivers)
    cmd(archiver, "rcreate", "--encryption=none")
@ -207,9 +204,12 @@ def test_unknown_feature_on_create(archivers, request):
    cmd_raises_unknown_feature(archiver, ["create", "test", "input"])


+@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "adhoc"), reason="only works with LocalCache")
 def test_unknown_feature_on_cache_sync(archivers, request):
+    # LocalCache.sync checks repo compat
    archiver = request.getfixturevalue(archivers)
    cmd(archiver, "rcreate", RK_ENCRYPTION)
+    # delete the cache to trigger a cache sync later in borg create
    cmd(archiver, "rdelete", "--cache-only")
    add_unknown_feature(archiver.repository_path, Manifest.Operation.READ)
    cmd_raises_unknown_feature(archiver, ["create", "test", "input"])
@ -277,6 +277,7 @@ def test_unknown_mandatory_feature_in_cache(archivers, request):
            repository._location = Location(archiver.repository_location)
        manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
        with Cache(repository, manifest) as cache:
+            is_localcache = isinstance(cache, LocalCache)
            cache.begin_txn()
            cache.cache_config.mandatory_features = {"unknown-feature"}
            cache.commit()
@ -295,7 +296,8 @@ def wipe_wrapper(*args):
        with patch.object(LocalCache, "wipe_cache", wipe_wrapper):
            cmd(archiver, "create", "test", "input")

-        assert called
+        if is_localcache:
+            assert called

    with Repository(archiver.repository_path, exclusive=True) as repository:
        if remote_repo:
@ -315,10 +317,14 @@ def test_check_cache(archivers, request):
            cache.begin_txn()
            cache.chunks.incref(list(cache.chunks.iteritems())[0][0])
            cache.commit()
+            persistent = isinstance(cache, LocalCache)
+    if not persistent:
+        pytest.skip("check_cache is pointless if we do not have a persistent chunks cache")
    with pytest.raises(AssertionError):
        check_cache(archiver)


+@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "adhoc"), reason="only works with LocalCache")
 def test_env_use_chunks_archive(archivers, request, monkeypatch):
    archiver = request.getfixturevalue(archivers)
    create_test_files(archiver.input_path)
--- a/src/borg/testsuite/archiver/corruption.py
+++ b/src/borg/testsuite/archiver/corruption.py
@ -34,7 +34,7 @@ def test_check_corrupted_repository(archiver):
 def corrupt_archiver(archiver):
    create_test_files(archiver.input_path)
    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    archiver.cache_path = json.loads(cmd(archiver, "rinfo", "--json"))["cache"]["path"]
+    archiver.cache_path = json.loads(cmd(archiver, "rinfo", "--json"))["cache"].get("path")


 def corrupt(file, amount=1):
@ -48,9 +48,16 @@ def corrupt(file, amount=1):
@pytest.mark.allow_cache_wipe
 def test_cache_chunks(archiver):
    corrupt_archiver(archiver)
+    if archiver.cache_path is None:
+        pytest.skip("no cache path for this kind of Cache implementation")
+
    create_src_archive(archiver, "test")
    chunks_path = os.path.join(archiver.cache_path, "chunks")
+    if not os.path.exists(chunks_path):
+        pytest.skip("no persistent chunks index for this kind of Cache implementation")
+
    chunks_before_corruption = set(ChunkIndex(path=chunks_path).iteritems())
+
    corrupt(chunks_path)

    assert not archiver.FORK_DEFAULT  # test does not support forking
@ -74,6 +81,9 @@ def sync_wrapper(cache):

 def test_cache_files(archiver):
    corrupt_archiver(archiver)
+    if archiver.cache_path is None:
+        pytest.skip("no cache path for this kind of Cache implementation")
+
    cmd(archiver, "create", "test", "input")
    corrupt(os.path.join(archiver.cache_path, "files"))
    out = cmd(archiver, "create", "test1", "input")
@ -83,6 +93,9 @@ def test_cache_files(archiver):

 def test_chunks_archive(archiver):
    corrupt_archiver(archiver)
+    if archiver.cache_path is None:
+        pytest.skip("no cache path for this kind of Cache implementation")
+
    cmd(archiver, "create", "test1", "input")
    # Find ID of test1, so we can corrupt it later :)
    target_id = cmd(archiver, "rlist", "--format={id}{NL}").strip()
@ -93,6 +106,8 @@ def test_chunks_archive(archiver):
    cmd(archiver, "rinfo", "--json")

    chunks_archive = os.path.join(archiver.cache_path, "chunks.archive.d")
+    if not os.path.exists(chunks_archive):
+        pytest.skip("Only LocalCache has a per-archive chunks index cache.")
    assert len(os.listdir(chunks_archive)) == 4  # two archives, one chunks cache and one .integrity file each

    corrupt(os.path.join(chunks_archive, target_id + ".compact"))
@ -114,6 +129,9 @@ def test_chunks_archive(archiver):

 def test_old_version_interfered(archiver):
    corrupt_archiver(archiver)
+    if archiver.cache_path is None:
+        pytest.skip("no cache path for this kind of Cache implementation")
+
    # Modify the main manifest ID without touching the manifest ID in the integrity section.
    # This happens if a version without integrity checking modifies the cache.
    config_path = os.path.join(archiver.cache_path, "config")
--- a/src/borg/testsuite/archiver/create_cmd.py
+++ b/src/borg/testsuite/archiver/create_cmd.py
@ -12,6 +12,7 @@
 import pytest

 from ... import platform
+from ...cache import get_cache_impl
 from ...constants import *  # NOQA
 from ...manifest import Manifest
 from ...platform import is_cygwin, is_win32, is_darwin
@ -540,20 +541,21 @@ def test_create_pattern_intermediate_folders_first(archivers, request):
    assert out_list.index("d x/b") < out_list.index("- x/b/foo_b")


-def test_create_no_cache_sync(archivers, request):
+@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "local"), reason="only works with AdHocCache")
+def test_create_no_cache_sync_adhoc(archivers, request):  # TODO: add test for AdHocWithFilesCache
    archiver = request.getfixturevalue(archivers)
    create_test_files(archiver.input_path)
    cmd(archiver, "rcreate", RK_ENCRYPTION)
    cmd(archiver, "rdelete", "--cache-only")
    create_json = json.loads(
-        cmd(archiver, "create", "--no-cache-sync", "--json", "--error", "test", "input")
-    )  # ignore experimental warning
+        cmd(archiver, "create", "--no-cache-sync", "--prefer-adhoc-cache", "--json", "test", "input")
+    )
    info_json = json.loads(cmd(archiver, "info", "-a", "test", "--json"))
    create_stats = create_json["cache"]["stats"]
    info_stats = info_json["cache"]["stats"]
    assert create_stats == info_stats
    cmd(archiver, "rdelete", "--cache-only")
-    cmd(archiver, "create", "--no-cache-sync", "test2", "input")
+    cmd(archiver, "create", "--no-cache-sync", "--prefer-adhoc-cache", "test2", "input")
    cmd(archiver, "rinfo")
    cmd(archiver, "check")

--- a/src/borg/testsuite/archiver/debug_cmds.py
+++ b/src/borg/testsuite/archiver/debug_cmds.py
@ -168,7 +168,12 @@ def test_debug_refcount_obj(archivers, request):
    create_json = json.loads(cmd(archiver, "create", "--json", "test", "input"))
    archive_id = create_json["archive"]["id"]
    output = cmd(archiver, "debug", "refcount-obj", archive_id).strip()
-    assert output == f"object {archive_id} has 1 referrers [info from chunks cache]."
+    # LocalCache does precise refcounting, so we'll get 1 reference for the archive.
+    # AdHocCache or AdHocWithFilesCache doesn't, we'll get ChunkIndex.MAX_VALUE as refcount.
+    assert (
+        output == f"object {archive_id} has 1 referrers [info from chunks cache]."
+        or output == f"object {archive_id} has 4294966271 referrers [info from chunks cache]."
+    )

    # Invalid IDs do not abort or return an error
    output = cmd(archiver, "debug", "refcount-obj", "124", "xyza").strip()
--- a/src/borg/testsuite/archiver/delete_cmd.py
+++ b/src/borg/testsuite/archiver/delete_cmd.py
@ -25,9 +25,8 @@ def test_delete(archivers, request):
    cmd(archiver, "extract", "test.2", "--dry-run")
    output = cmd(archiver, "delete", "-a", "test.2", "--stats")
    assert "Original size: -" in output  # negative size == deleted data
-    # Make sure all data except the manifest has been deleted
-    with Repository(archiver.repository_path) as repository:
-        assert len(repository) == 1
+    output = cmd(archiver, "rlist")
+    assert output == ""  # no archives left!


 def test_delete_multiple(archivers, request):
--- a/src/borg/testsuite/archiver/list_cmd.py
+++ b/src/borg/testsuite/archiver/list_cmd.py
@ -40,9 +40,9 @@ def test_list_chunk_counts(archivers, request):
        fd.write(b"baab" * 2000000)
    cmd(archiver, "rcreate", RK_ENCRYPTION)
    cmd(archiver, "create", "test", "input")
-    output = cmd(archiver, "list", "test", "--format", "{num_chunks} {unique_chunks} {path}{NL}")
-    assert "0 0 input/empty_file" in output
-    assert "2 2 input/two_chunks" in output
+    output = cmd(archiver, "list", "test", "--format", "{num_chunks} {path}{NL}")
+    assert "0 input/empty_file" in output
+    assert "2 input/two_chunks" in output


 def test_list_size(archivers, request):
--- a/src/borg/testsuite/archiver/recreate_cmd.py
+++ b/src/borg/testsuite/archiver/recreate_cmd.py
@ -153,15 +153,18 @@ def test_recreate_rechunkify(archivers, request):
    cmd(archiver, "rcreate", RK_ENCRYPTION)
    cmd(archiver, "create", "test1", "input", "--chunker-params", "7,9,8,128")
    cmd(archiver, "create", "test2", "input", "--files-cache=disabled")
-    chunks_list = cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks} {unique_chunks}")
-    num_chunks, unique_chunks = map(int, chunks_list.split(" "))
-    # test1 and test2 do not deduplicate
-    assert num_chunks == unique_chunks
+    num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
+    num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
+    # right now, the file is chunked differently
+    assert num_chunks1 != num_chunks2
    cmd(archiver, "recreate", "--chunker-params", "default")
    check_cache(archiver)
-    # test1 and test2 do deduplicate after recreate
-    assert int(cmd(archiver, "list", "test1", "input/large_file", "--format={size}"))
-    assert not int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{unique_chunks}"))
+    num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
+    num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
+    # now the files are chunked in the same way
+    # TODO: this is a rather weak test, it could be improved by comparing the IDs in the chunk lists,
+    # to make sure that everything is completely deduplicated now (both files have identical chunks).
+    assert num_chunks1 == num_chunks2


 def test_recreate_fixed_rechunkify(archivers, request):
--- a/src/borg/testsuite/cache.py
+++ b/src/borg/testsuite/cache.py
@ -189,7 +189,7 @@ def test_does_not_contain_manifest(self, cache):

    def test_does_not_delete_existing_chunks(self, repository, cache):
        assert cache.seen_chunk(H(1)) == ChunkIndex.MAX_VALUE
-        cache.chunk_decref(H(1), Statistics())
+        cache.chunk_decref(H(1), 1, Statistics())
        assert repository.get(H(1)) == b"1234"

    def test_seen_chunk_add_chunk_size(self, cache):
@ -199,7 +199,7 @@ def test_deletes_chunks_during_lifetime(self, cache, repository):
        """E.g. checkpoint archives"""
        cache.add_chunk(H(5), {}, b"1010", stats=Statistics())
        assert cache.seen_chunk(H(5)) == 1
-        cache.chunk_decref(H(5), Statistics())
+        cache.chunk_decref(H(5), 1, Statistics())
        assert not cache.seen_chunk(H(5))
        with pytest.raises(Repository.ObjectNotFound):
            repository.get(H(5))
@ -220,9 +220,9 @@ def test_txn(self, cache):

    def test_incref_after_add_chunk(self, cache):
        assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
-        assert cache.chunk_incref(H(3), Statistics()) == (H(3), 4)
+        assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4)

    def test_existing_incref_after_add_chunk(self, cache):
        """This case occurs with part files, see Archive.chunk_file."""
        assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
-        assert cache.chunk_incref(H(1), Statistics()) == (H(1), 4)
+        assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4)
--- a/src/borg/testsuite/conftest.py
+++ b/src/borg/testsuite/conftest.py
@ -127,6 +127,7 @@ def archiver(tmp_path, set_env_variables):
    archiver.patterns_file_path = os.fspath(tmp_path / "patterns")
    os.environ["BORG_KEYS_DIR"] = archiver.keys_path
    os.environ["BORG_CACHE_DIR"] = archiver.cache_path
+    # os.environ["BORG_CACHE_IMPL"] = "adhocwithfiles"
    os.mkdir(archiver.input_path)
    os.chmod(archiver.input_path, 0o777)  # avoid troubles with fakeroot / FUSE
    os.mkdir(archiver.output_path)
--- a/src/borg/upgrade.py
+++ b/src/borg/upgrade.py
@ -84,8 +84,8 @@ def upgrade_item(self, *, item):
            chunks, chunks_healthy = self.hlm.retrieve(id=hlid, default=(None, None))
            if chunks is not None:
                item.chunks = chunks
-                for chunk_id, _ in chunks:
-                    self.cache.chunk_incref(chunk_id, self.archive.stats)
+                for chunk_id, chunk_size in chunks:
+                    self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats)
            if chunks_healthy is not None:
                item.chunks_healthy = chunks
            del item.source  # not used for hardlinks any more, replaced by hlid