1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-01 12:45:34 +00:00

Merge pull request #7846 from ThomasWaldmann/files-cache-with-size

files cache with size
This commit is contained in:
TW 2024-07-18 23:33:10 +02:00 committed by GitHub
commit 66b62c6fc9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 681 additions and 486 deletions

View file

@ -84,6 +84,18 @@ General:
- ``pyfuse3``: only try to load pyfuse3
- ``llfuse``: only try to load llfuse
- ``none``: do not try to load an implementation
BORG_CACHE_IMPL
Choose the implementation for the clientside cache, choose one of:
- ``local``: uses a persistent chunks cache and keeps it in a perfect state (precise refcounts and
sizes), requiring a potentially resource expensive cache sync in multi-client scenarios.
Also has a persistent files cache.
- ``adhoc``: builds a non-persistent chunks cache by querying the repo. Chunks cache contents
are somewhat sloppy for already existing chunks, concerning their refcount ("infinite") and
size (0). No files cache (slow, will chunk all input files). DEPRECATED.
- ``adhocwithfiles``: Like ``adhoc``, but with a persistent files cache. Default implementation.
- ``cli``: Determine the cache implementation from cli options. Without special options, will
usually end up with the ``local`` implementation.
BORG_SELFTEST
This can be used to influence borg's builtin self-tests. The default is to execute the tests
at the beginning of each borg command invocation.

View file

@ -643,14 +643,14 @@ def write_checkpoint(self):
# so we can already remove it here, the next .save() will then commit this cleanup.
# remove its manifest entry, remove its ArchiveItem chunk, remove its item_ptrs chunks:
del self.manifest.archives[self.checkpoint_name]
self.cache.chunk_decref(self.id, self.stats)
self.cache.chunk_decref(self.id, 1, self.stats)
for id in metadata.item_ptrs:
self.cache.chunk_decref(id, self.stats)
self.cache.chunk_decref(id, 1, self.stats)
# also get rid of that part item, we do not want to have it in next checkpoint or final archive
tail_chunks = self.items_buffer.restore_chunks_state()
# tail_chunks contain the tail of the archive items metadata stream, not needed for next commit.
for id in tail_chunks:
self.cache.chunk_decref(id, self.stats)
self.cache.chunk_decref(id, 1, self.stats) # TODO can we have real size here?
def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
name = name or self.name
@ -1024,7 +1024,7 @@ def set_meta(self, key, value):
new_id = self.key.id_hash(data)
self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
self.manifest.archives[self.name] = (new_id, metadata.time)
self.cache.chunk_decref(self.id, self.stats)
self.cache.chunk_decref(self.id, 1, self.stats)
self.id = new_id
def rename(self, name):
@ -1052,12 +1052,15 @@ def fetch_async_response(wait=True):
error = True
return exception_ignored # must not return None here
def chunk_decref(id, stats):
def chunk_decref(id, size, stats):
try:
self.cache.chunk_decref(id, stats, wait=False)
self.cache.chunk_decref(id, size, stats, wait=False)
except KeyError:
cid = bin_to_hex(id)
raise ChunksIndexError(cid)
nonlocal error
if forced == 0:
cid = bin_to_hex(id)
raise ChunksIndexError(cid)
error = True
else:
fetch_async_response(wait=False)
@ -1073,13 +1076,13 @@ def chunk_decref(id, stats):
pi.show(i)
_, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM)
unpacker.feed(data)
chunk_decref(items_id, stats)
chunk_decref(items_id, 1, stats)
try:
for item in unpacker:
item = Item(internal_dict=item)
if "chunks" in item:
for chunk_id, size in item.chunks:
chunk_decref(chunk_id, stats)
chunk_decref(chunk_id, size, stats)
except (TypeError, ValueError):
# if items metadata spans multiple chunks and one chunk got dropped somehow,
# it could be that unpacker yields bad types
@ -1096,12 +1099,12 @@ def chunk_decref(id, stats):
# delete the blocks that store all the references that end up being loaded into metadata.items:
for id in self.metadata.item_ptrs:
chunk_decref(id, stats)
chunk_decref(id, 1, stats)
# in forced delete mode, we try hard to delete at least the manifest entry,
# if possible also the archive superblock, even if processing the items raises
# some harmless exception.
chunk_decref(self.id, stats)
chunk_decref(self.id, 1, stats)
del self.manifest.archives[self.name]
while fetch_async_response(wait=True) is not None:
# we did async deletes, process outstanding results (== exceptions),
@ -1510,7 +1513,7 @@ def process_pipe(self, *, path, cache, fd, mode, user=None, group=None):
except BackupOSError:
# see comments in process_file's exception handler, same issue here.
for chunk in item.get("chunks", []):
cache.chunk_decref(chunk.id, self.stats, wait=False)
cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
raise
else:
item.get_size(memorize=True)
@ -1544,7 +1547,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
item.chunks = []
for chunk_id, chunk_size in hl_chunks:
# process one-by-one, so we will know in item.chunks how far we got
chunk_entry = cache.chunk_incref(chunk_id, self.stats)
chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats)
item.chunks.append(chunk_entry)
else: # normal case, no "2nd+" hardlink
if not is_special_file:
@ -1552,26 +1555,26 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
started_hashing = time.monotonic()
path_hash = self.key.id_hash(hashed_path)
self.stats.hashing_time += time.monotonic() - started_hashing
known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
known, chunks = cache.file_known_and_unchanged(hashed_path, path_hash, st)
else:
# in --read-special mode, we may be called for special files.
# there should be no information in the cache about special files processed in
# read-special mode, but we better play safe as this was wrong in the past:
hashed_path = path_hash = None
known, ids = False, None
if ids is not None:
known, chunks = False, None
if chunks is not None:
# Make sure all ids are available
for id_ in ids:
if not cache.seen_chunk(id_):
for chunk in chunks:
if not cache.seen_chunk(chunk.id):
# cache said it is unmodified, but we lost a chunk: process file like modified
status = "M"
break
else:
item.chunks = []
for chunk_id in ids:
for chunk in chunks:
# process one-by-one, so we will know in item.chunks how far we got
chunk_entry = cache.chunk_incref(chunk_id, self.stats)
item.chunks.append(chunk_entry)
cache.chunk_incref(chunk.id, chunk.size, self.stats)
item.chunks.append(chunk)
status = "U" # regular file, unchanged
else:
status = "M" if known else "A" # regular file, modified or added
@ -1606,7 +1609,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
# block or char device will change without its mtime/size/inode changing.
# also, we must not memorize a potentially inconsistent/corrupt file that
# changed while we backed it up.
cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
cache.memorize_file(hashed_path, path_hash, st, item.chunks)
self.stats.files_stats[status] += 1 # must be done late
if not changed_while_backup:
status = None # we already called print_file_status
@ -1620,7 +1623,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
# but we will not add an item (see add_item in create_helper) and thus
# they would be orphaned chunks in case that we commit the transaction.
for chunk in item.get("chunks", []):
cache.chunk_decref(chunk.id, self.stats, wait=False)
cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
# Now that we have cleaned up the chunk references, we can re-raise the exception.
# This will skip processing of this file, but might retry or continue with the next one.
raise
@ -1731,7 +1734,7 @@ def process_file(self, *, tarinfo, status, type, tar):
except BackupOSError:
# see comment in FilesystemObjectProcessors.process_file, same issue here.
for chunk in item.get("chunks", []):
self.cache.chunk_decref(chunk.id, self.stats, wait=False)
self.cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
raise
@ -2328,10 +2331,10 @@ def orphan_chunks_check(self):
unused = {id_ for id_, entry in self.chunks.iteritems() if entry.refcount == 0}
orphaned = unused - self.possibly_superseded
if orphaned:
logger.error(f"{len(orphaned)} orphaned objects found!")
logger.info(f"{len(orphaned)} orphaned (unused) objects found.")
for chunk_id in orphaned:
logger.debug(f"chunk {bin_to_hex(chunk_id)} is orphaned.")
self.error_found = True
# To support working with AdHocCache or AdHocWithFilesCache, we do not set self.error_found = True.
if self.repair and unused:
logger.info(
"Deleting %d orphaned and %d superseded objects..." % (len(orphaned), len(self.possibly_superseded))
@ -2444,7 +2447,7 @@ def process_item(self, archive, target, item):
def process_chunks(self, archive, target, item):
if not target.recreate_rechunkify:
for chunk_id, size in item.chunks:
self.cache.chunk_incref(chunk_id, target.stats)
self.cache.chunk_incref(chunk_id, size, target.stats)
return item.chunks
chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
chunk_processor = partial(self.chunk_processor, target)
@ -2452,8 +2455,9 @@ def process_chunks(self, archive, target, item):
def chunk_processor(self, target, chunk):
chunk_id, data = cached_hash(chunk, self.key.id_hash)
size = len(data)
if chunk_id in self.seen_chunks:
return self.cache.chunk_incref(chunk_id, target.stats)
return self.cache.chunk_incref(chunk_id, size, target.stats)
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
self.cache.repository.async_response(wait=False)
self.seen_chunks.add(chunk_entry.id)

View file

@ -5,7 +5,6 @@
from ..cache import Cache, assert_secure
from ..constants import * # NOQA
from ..helpers import Error, CommandError
from ..helpers import Location
from ..helpers import parse_file_size, hex_to_bin
from ..manifest import Manifest
@ -52,11 +51,8 @@ def repo_validate(section, name, value=None, check_value=True):
def cache_validate(section, name, value=None, check_value=True):
if section not in ["cache"]:
raise ValueError("Invalid section")
if name in ["previous_location"]:
if check_value:
Location(value)
else:
raise ValueError("Invalid name")
# currently, we do not support setting anything in the cache via borg config.
raise ValueError("Invalid name")
def list_config(config):
default_values = {

View file

@ -224,7 +224,9 @@ def create_inner(archive, cache, fso):
manifest,
progress=args.progress,
lock_wait=self.lock_wait,
permit_adhoc_cache=args.no_cache_sync,
no_cache_sync_permitted=args.no_cache_sync,
no_cache_sync_forced=args.no_cache_sync_forced,
prefer_adhoc_cache=args.prefer_adhoc_cache,
cache_mode=args.files_cache_mode,
iec=args.iec,
) as cache:
@ -801,7 +803,19 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
"--no-cache-sync",
dest="no_cache_sync",
action="store_true",
help="experimental: do not synchronize the cache. Implies not using the files cache.",
help="experimental: do not synchronize the chunks cache.",
)
subparser.add_argument(
"--no-cache-sync-forced",
dest="no_cache_sync_forced",
action="store_true",
help="experimental: do not synchronize the chunks cache (forced).",
)
subparser.add_argument(
"--prefer-adhoc-cache",
dest="prefer_adhoc_cache",
action="store_true",
help="experimental: prefer AdHocCache (w/o files cache) over AdHocWithFilesCache (with files cache).",
)
subparser.add_argument(
"--stdin-name",

View file

@ -59,16 +59,9 @@ def do_rinfo(self, args, repository, manifest, cache):
output += f" out of {format_file_size(storage_quota, iec=args.iec)}"
output += "\n"
output += (
textwrap.dedent(
"""
Cache: {cache.path}
Security dir: {security_dir}
"""
)
.strip()
.format(**info)
)
if hasattr(info["cache"], "path"):
output += "Cache: {cache.path}\n".format(**info)
output += "Security dir: {security_dir}\n".format(**info)
print(output)
print(str(cache))

View file

@ -143,7 +143,7 @@ def do_transfer(self, args, *, repository, manifest, cache, other_repository=Non
transfer_size += size
else:
if not dry_run:
chunk_entry = cache.chunk_incref(chunk_id, archive.stats)
chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats)
chunks.append(chunk_entry)
present_size += size
if not dry_run:

File diff suppressed because it is too large Load diff

View file

@ -1184,13 +1184,13 @@ def default(self, o):
from ..repository import Repository
from ..remote import RemoteRepository
from ..archive import Archive
from ..cache import LocalCache, AdHocCache
from ..cache import LocalCache, AdHocCache, AdHocWithFilesCache
if isinstance(o, Repository) or isinstance(o, RemoteRepository):
return {"id": bin_to_hex(o.id), "location": o._location.canonical_path()}
if isinstance(o, Archive):
return o.info()
if isinstance(o, LocalCache):
if isinstance(o, (LocalCache, AdHocWithFilesCache)):
return {"path": o.path, "stats": o.stats()}
if isinstance(o, AdHocCache):
return {"stats": o.stats()}

View file

@ -18,7 +18,7 @@
from ... import xattr, platform
from ...archive import Archive
from ...archiver import Archiver, PURE_PYTHON_MSGPACK_WARNING
from ...cache import Cache
from ...cache import Cache, LocalCache
from ...constants import * # NOQA
from ...helpers import Location, umount
from ...helpers import EXIT_SUCCESS
@ -356,9 +356,15 @@ def check_cache(archiver):
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, manifest, sync=False) as cache:
original_chunks = cache.chunks
# the LocalCache implementation has an on-disk chunks cache,
# but AdHocWithFilesCache and AdHocCache don't have persistent chunks cache.
persistent = isinstance(cache, LocalCache)
Cache.destroy(repository)
with Cache(repository, manifest) as cache:
correct_chunks = cache.chunks
if not persistent:
# there is no point in doing the checks
return
assert original_chunks is not correct_chunks
seen = set()
for id, (refcount, size) in correct_chunks.iteritems():

View file

@ -338,10 +338,11 @@ def test_extra_chunks(archivers, request):
with Repository(archiver.repository_location, exclusive=True) as repository:
repository.put(b"01234567890123456789012345678901", b"xxxx")
repository.commit(compact=False)
cmd(archiver, "check", exit_code=1)
cmd(archiver, "check", exit_code=1)
output = cmd(archiver, "check", "-v", exit_code=0) # orphans are not considered warnings anymore
assert "1 orphaned (unused) objects found." in output
cmd(archiver, "check", "--repair", exit_code=0)
cmd(archiver, "check", exit_code=0)
output = cmd(archiver, "check", "-v", exit_code=0)
assert "orphaned (unused) objects found." not in output
cmd(archiver, "extract", "archive1", "--dry-run", exit_code=0)

View file

@ -4,7 +4,7 @@
import pytest
from ...cache import Cache, LocalCache
from ...cache import Cache, LocalCache, get_cache_impl
from ...constants import * # NOQA
from ...helpers import Location, get_security_dir, bin_to_hex
from ...helpers import EXIT_ERROR
@ -153,32 +153,29 @@ def test_repository_move(archivers, request, monkeypatch):
security_dir = get_security_directory(archiver.repository_path)
os.replace(archiver.repository_path, archiver.repository_path + "_new")
archiver.repository_location += "_new"
# borg should notice that the repository location changed and abort.
if archiver.FORK_DEFAULT:
cmd(archiver, "rinfo", exit_code=EXIT_ERROR)
else:
with pytest.raises(Cache.RepositoryAccessAborted):
cmd(archiver, "rinfo")
# if we explicitly allow relocated repos, it should work fine.
monkeypatch.setenv("BORG_RELOCATED_REPO_ACCESS_IS_OK", "yes")
cmd(archiver, "rinfo")
monkeypatch.delenv("BORG_RELOCATED_REPO_ACCESS_IS_OK")
with open(os.path.join(security_dir, "location")) as fd:
location = fd.read()
assert location == Location(archiver.repository_location).canonical_path()
# Needs no confirmation anymore
cmd(archiver, "rinfo")
shutil.rmtree(archiver.cache_path)
# after new repo location was confirmed once, it needs no further confirmation anymore.
cmd(archiver, "rinfo")
shutil.rmtree(security_dir)
# it also needs no confirmation if we have no knowledge about the previous location.
cmd(archiver, "rinfo")
# it will re-create security-related infos in the security dir:
for file in ("location", "key-type", "manifest-timestamp"):
assert os.path.exists(os.path.join(security_dir, file))
def test_security_dir_compat(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "rcreate", RK_ENCRYPTION)
with open(os.path.join(get_security_directory(archiver.repository_path), "location"), "w") as fd:
fd.write("something outdated")
# This is fine, because the cache still has the correct information. security_dir and cache can disagree
# if older versions are used to confirm a renamed repository.
cmd(archiver, "rinfo")
def test_unknown_unencrypted(archivers, request, monkeypatch):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "rcreate", "--encryption=none")
@ -207,9 +204,12 @@ def test_unknown_feature_on_create(archivers, request):
cmd_raises_unknown_feature(archiver, ["create", "test", "input"])
@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "adhoc"), reason="only works with LocalCache")
def test_unknown_feature_on_cache_sync(archivers, request):
# LocalCache.sync checks repo compat
archiver = request.getfixturevalue(archivers)
cmd(archiver, "rcreate", RK_ENCRYPTION)
# delete the cache to trigger a cache sync later in borg create
cmd(archiver, "rdelete", "--cache-only")
add_unknown_feature(archiver.repository_path, Manifest.Operation.READ)
cmd_raises_unknown_feature(archiver, ["create", "test", "input"])
@ -277,6 +277,7 @@ def test_unknown_mandatory_feature_in_cache(archivers, request):
repository._location = Location(archiver.repository_location)
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, manifest) as cache:
is_localcache = isinstance(cache, LocalCache)
cache.begin_txn()
cache.cache_config.mandatory_features = {"unknown-feature"}
cache.commit()
@ -295,7 +296,8 @@ def wipe_wrapper(*args):
with patch.object(LocalCache, "wipe_cache", wipe_wrapper):
cmd(archiver, "create", "test", "input")
assert called
if is_localcache:
assert called
with Repository(archiver.repository_path, exclusive=True) as repository:
if remote_repo:
@ -315,10 +317,14 @@ def test_check_cache(archivers, request):
cache.begin_txn()
cache.chunks.incref(list(cache.chunks.iteritems())[0][0])
cache.commit()
persistent = isinstance(cache, LocalCache)
if not persistent:
pytest.skip("check_cache is pointless if we do not have a persistent chunks cache")
with pytest.raises(AssertionError):
check_cache(archiver)
@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "adhoc"), reason="only works with LocalCache")
def test_env_use_chunks_archive(archivers, request, monkeypatch):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path)

View file

@ -34,7 +34,7 @@ def test_check_corrupted_repository(archiver):
def corrupt_archiver(archiver):
create_test_files(archiver.input_path)
cmd(archiver, "rcreate", RK_ENCRYPTION)
archiver.cache_path = json.loads(cmd(archiver, "rinfo", "--json"))["cache"]["path"]
archiver.cache_path = json.loads(cmd(archiver, "rinfo", "--json"))["cache"].get("path")
def corrupt(file, amount=1):
@ -48,9 +48,16 @@ def corrupt(file, amount=1):
@pytest.mark.allow_cache_wipe
def test_cache_chunks(archiver):
corrupt_archiver(archiver)
if archiver.cache_path is None:
pytest.skip("no cache path for this kind of Cache implementation")
create_src_archive(archiver, "test")
chunks_path = os.path.join(archiver.cache_path, "chunks")
if not os.path.exists(chunks_path):
pytest.skip("no persistent chunks index for this kind of Cache implementation")
chunks_before_corruption = set(ChunkIndex(path=chunks_path).iteritems())
corrupt(chunks_path)
assert not archiver.FORK_DEFAULT # test does not support forking
@ -74,6 +81,9 @@ def sync_wrapper(cache):
def test_cache_files(archiver):
corrupt_archiver(archiver)
if archiver.cache_path is None:
pytest.skip("no cache path for this kind of Cache implementation")
cmd(archiver, "create", "test", "input")
corrupt(os.path.join(archiver.cache_path, "files"))
out = cmd(archiver, "create", "test1", "input")
@ -83,6 +93,9 @@ def test_cache_files(archiver):
def test_chunks_archive(archiver):
corrupt_archiver(archiver)
if archiver.cache_path is None:
pytest.skip("no cache path for this kind of Cache implementation")
cmd(archiver, "create", "test1", "input")
# Find ID of test1, so we can corrupt it later :)
target_id = cmd(archiver, "rlist", "--format={id}{NL}").strip()
@ -93,6 +106,8 @@ def test_chunks_archive(archiver):
cmd(archiver, "rinfo", "--json")
chunks_archive = os.path.join(archiver.cache_path, "chunks.archive.d")
if not os.path.exists(chunks_archive):
pytest.skip("Only LocalCache has a per-archive chunks index cache.")
assert len(os.listdir(chunks_archive)) == 4 # two archives, one chunks cache and one .integrity file each
corrupt(os.path.join(chunks_archive, target_id + ".compact"))
@ -114,6 +129,9 @@ def test_chunks_archive(archiver):
def test_old_version_interfered(archiver):
corrupt_archiver(archiver)
if archiver.cache_path is None:
pytest.skip("no cache path for this kind of Cache implementation")
# Modify the main manifest ID without touching the manifest ID in the integrity section.
# This happens if a version without integrity checking modifies the cache.
config_path = os.path.join(archiver.cache_path, "config")

View file

@ -12,6 +12,7 @@
import pytest
from ... import platform
from ...cache import get_cache_impl
from ...constants import * # NOQA
from ...manifest import Manifest
from ...platform import is_cygwin, is_win32, is_darwin
@ -540,20 +541,21 @@ def test_create_pattern_intermediate_folders_first(archivers, request):
assert out_list.index("d x/b") < out_list.index("- x/b/foo_b")
def test_create_no_cache_sync(archivers, request):
@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "local"), reason="only works with AdHocCache")
def test_create_no_cache_sync_adhoc(archivers, request): # TODO: add test for AdHocWithFilesCache
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path)
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "rdelete", "--cache-only")
create_json = json.loads(
cmd(archiver, "create", "--no-cache-sync", "--json", "--error", "test", "input")
) # ignore experimental warning
cmd(archiver, "create", "--no-cache-sync", "--prefer-adhoc-cache", "--json", "test", "input")
)
info_json = json.loads(cmd(archiver, "info", "-a", "test", "--json"))
create_stats = create_json["cache"]["stats"]
info_stats = info_json["cache"]["stats"]
assert create_stats == info_stats
cmd(archiver, "rdelete", "--cache-only")
cmd(archiver, "create", "--no-cache-sync", "test2", "input")
cmd(archiver, "create", "--no-cache-sync", "--prefer-adhoc-cache", "test2", "input")
cmd(archiver, "rinfo")
cmd(archiver, "check")

View file

@ -168,7 +168,12 @@ def test_debug_refcount_obj(archivers, request):
create_json = json.loads(cmd(archiver, "create", "--json", "test", "input"))
archive_id = create_json["archive"]["id"]
output = cmd(archiver, "debug", "refcount-obj", archive_id).strip()
assert output == f"object {archive_id} has 1 referrers [info from chunks cache]."
# LocalCache does precise refcounting, so we'll get 1 reference for the archive.
# AdHocCache or AdHocWithFilesCache doesn't, we'll get ChunkIndex.MAX_VALUE as refcount.
assert (
output == f"object {archive_id} has 1 referrers [info from chunks cache]."
or output == f"object {archive_id} has 4294966271 referrers [info from chunks cache]."
)
# Invalid IDs do not abort or return an error
output = cmd(archiver, "debug", "refcount-obj", "124", "xyza").strip()

View file

@ -25,9 +25,8 @@ def test_delete(archivers, request):
cmd(archiver, "extract", "test.2", "--dry-run")
output = cmd(archiver, "delete", "-a", "test.2", "--stats")
assert "Original size: -" in output # negative size == deleted data
# Make sure all data except the manifest has been deleted
with Repository(archiver.repository_path) as repository:
assert len(repository) == 1
output = cmd(archiver, "rlist")
assert output == "" # no archives left!
def test_delete_multiple(archivers, request):

View file

@ -40,9 +40,9 @@ def test_list_chunk_counts(archivers, request):
fd.write(b"baab" * 2000000)
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "create", "test", "input")
output = cmd(archiver, "list", "test", "--format", "{num_chunks} {unique_chunks} {path}{NL}")
assert "0 0 input/empty_file" in output
assert "2 2 input/two_chunks" in output
output = cmd(archiver, "list", "test", "--format", "{num_chunks} {path}{NL}")
assert "0 input/empty_file" in output
assert "2 input/two_chunks" in output
def test_list_size(archivers, request):

View file

@ -153,15 +153,18 @@ def test_recreate_rechunkify(archivers, request):
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "create", "test1", "input", "--chunker-params", "7,9,8,128")
cmd(archiver, "create", "test2", "input", "--files-cache=disabled")
chunks_list = cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks} {unique_chunks}")
num_chunks, unique_chunks = map(int, chunks_list.split(" "))
# test1 and test2 do not deduplicate
assert num_chunks == unique_chunks
num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
# right now, the file is chunked differently
assert num_chunks1 != num_chunks2
cmd(archiver, "recreate", "--chunker-params", "default")
check_cache(archiver)
# test1 and test2 do deduplicate after recreate
assert int(cmd(archiver, "list", "test1", "input/large_file", "--format={size}"))
assert not int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{unique_chunks}"))
num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
# now the files are chunked in the same way
# TODO: this is a rather weak test, it could be improved by comparing the IDs in the chunk lists,
# to make sure that everything is completely deduplicated now (both files have identical chunks).
assert num_chunks1 == num_chunks2
def test_recreate_fixed_rechunkify(archivers, request):

View file

@ -189,7 +189,7 @@ def test_does_not_contain_manifest(self, cache):
def test_does_not_delete_existing_chunks(self, repository, cache):
assert cache.seen_chunk(H(1)) == ChunkIndex.MAX_VALUE
cache.chunk_decref(H(1), Statistics())
cache.chunk_decref(H(1), 1, Statistics())
assert repository.get(H(1)) == b"1234"
def test_seen_chunk_add_chunk_size(self, cache):
@ -199,7 +199,7 @@ def test_deletes_chunks_during_lifetime(self, cache, repository):
"""E.g. checkpoint archives"""
cache.add_chunk(H(5), {}, b"1010", stats=Statistics())
assert cache.seen_chunk(H(5)) == 1
cache.chunk_decref(H(5), Statistics())
cache.chunk_decref(H(5), 1, Statistics())
assert not cache.seen_chunk(H(5))
with pytest.raises(Repository.ObjectNotFound):
repository.get(H(5))
@ -220,9 +220,9 @@ def test_txn(self, cache):
def test_incref_after_add_chunk(self, cache):
assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
assert cache.chunk_incref(H(3), Statistics()) == (H(3), 4)
assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4)
def test_existing_incref_after_add_chunk(self, cache):
"""This case occurs with part files, see Archive.chunk_file."""
assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
assert cache.chunk_incref(H(1), Statistics()) == (H(1), 4)
assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4)

View file

@ -127,6 +127,7 @@ def archiver(tmp_path, set_env_variables):
archiver.patterns_file_path = os.fspath(tmp_path / "patterns")
os.environ["BORG_KEYS_DIR"] = archiver.keys_path
os.environ["BORG_CACHE_DIR"] = archiver.cache_path
# os.environ["BORG_CACHE_IMPL"] = "adhocwithfiles"
os.mkdir(archiver.input_path)
os.chmod(archiver.input_path, 0o777) # avoid troubles with fakeroot / FUSE
os.mkdir(archiver.output_path)

View file

@ -84,8 +84,8 @@ def upgrade_item(self, *, item):
chunks, chunks_healthy = self.hlm.retrieve(id=hlid, default=(None, None))
if chunks is not None:
item.chunks = chunks
for chunk_id, _ in chunks:
self.cache.chunk_incref(chunk_id, self.archive.stats)
for chunk_id, chunk_size in chunks:
self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats)
if chunks_healthy is not None:
item.chunks_healthy = chunks
del item.source # not used for hardlinks any more, replaced by hlid