mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-01 12:45:34 +00:00
Merge pull request #7846 from ThomasWaldmann/files-cache-with-size
files cache with size
This commit is contained in:
commit
66b62c6fc9
20 changed files with 681 additions and 486 deletions
|
@ -84,6 +84,18 @@ General:
|
|||
- ``pyfuse3``: only try to load pyfuse3
|
||||
- ``llfuse``: only try to load llfuse
|
||||
- ``none``: do not try to load an implementation
|
||||
BORG_CACHE_IMPL
|
||||
Choose the implementation for the clientside cache, choose one of:
|
||||
|
||||
- ``local``: uses a persistent chunks cache and keeps it in a perfect state (precise refcounts and
|
||||
sizes), requiring a potentially resource expensive cache sync in multi-client scenarios.
|
||||
Also has a persistent files cache.
|
||||
- ``adhoc``: builds a non-persistent chunks cache by querying the repo. Chunks cache contents
|
||||
are somewhat sloppy for already existing chunks, concerning their refcount ("infinite") and
|
||||
size (0). No files cache (slow, will chunk all input files). DEPRECATED.
|
||||
- ``adhocwithfiles``: Like ``adhoc``, but with a persistent files cache. Default implementation.
|
||||
- ``cli``: Determine the cache implementation from cli options. Without special options, will
|
||||
usually end up with the ``local`` implementation.
|
||||
BORG_SELFTEST
|
||||
This can be used to influence borg's builtin self-tests. The default is to execute the tests
|
||||
at the beginning of each borg command invocation.
|
||||
|
|
|
@ -643,14 +643,14 @@ def write_checkpoint(self):
|
|||
# so we can already remove it here, the next .save() will then commit this cleanup.
|
||||
# remove its manifest entry, remove its ArchiveItem chunk, remove its item_ptrs chunks:
|
||||
del self.manifest.archives[self.checkpoint_name]
|
||||
self.cache.chunk_decref(self.id, self.stats)
|
||||
self.cache.chunk_decref(self.id, 1, self.stats)
|
||||
for id in metadata.item_ptrs:
|
||||
self.cache.chunk_decref(id, self.stats)
|
||||
self.cache.chunk_decref(id, 1, self.stats)
|
||||
# also get rid of that part item, we do not want to have it in next checkpoint or final archive
|
||||
tail_chunks = self.items_buffer.restore_chunks_state()
|
||||
# tail_chunks contain the tail of the archive items metadata stream, not needed for next commit.
|
||||
for id in tail_chunks:
|
||||
self.cache.chunk_decref(id, self.stats)
|
||||
self.cache.chunk_decref(id, 1, self.stats) # TODO can we have real size here?
|
||||
|
||||
def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
|
||||
name = name or self.name
|
||||
|
@ -1024,7 +1024,7 @@ def set_meta(self, key, value):
|
|||
new_id = self.key.id_hash(data)
|
||||
self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
|
||||
self.manifest.archives[self.name] = (new_id, metadata.time)
|
||||
self.cache.chunk_decref(self.id, self.stats)
|
||||
self.cache.chunk_decref(self.id, 1, self.stats)
|
||||
self.id = new_id
|
||||
|
||||
def rename(self, name):
|
||||
|
@ -1052,12 +1052,15 @@ def fetch_async_response(wait=True):
|
|||
error = True
|
||||
return exception_ignored # must not return None here
|
||||
|
||||
def chunk_decref(id, stats):
|
||||
def chunk_decref(id, size, stats):
|
||||
try:
|
||||
self.cache.chunk_decref(id, stats, wait=False)
|
||||
self.cache.chunk_decref(id, size, stats, wait=False)
|
||||
except KeyError:
|
||||
cid = bin_to_hex(id)
|
||||
raise ChunksIndexError(cid)
|
||||
nonlocal error
|
||||
if forced == 0:
|
||||
cid = bin_to_hex(id)
|
||||
raise ChunksIndexError(cid)
|
||||
error = True
|
||||
else:
|
||||
fetch_async_response(wait=False)
|
||||
|
||||
|
@ -1073,13 +1076,13 @@ def chunk_decref(id, stats):
|
|||
pi.show(i)
|
||||
_, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM)
|
||||
unpacker.feed(data)
|
||||
chunk_decref(items_id, stats)
|
||||
chunk_decref(items_id, 1, stats)
|
||||
try:
|
||||
for item in unpacker:
|
||||
item = Item(internal_dict=item)
|
||||
if "chunks" in item:
|
||||
for chunk_id, size in item.chunks:
|
||||
chunk_decref(chunk_id, stats)
|
||||
chunk_decref(chunk_id, size, stats)
|
||||
except (TypeError, ValueError):
|
||||
# if items metadata spans multiple chunks and one chunk got dropped somehow,
|
||||
# it could be that unpacker yields bad types
|
||||
|
@ -1096,12 +1099,12 @@ def chunk_decref(id, stats):
|
|||
|
||||
# delete the blocks that store all the references that end up being loaded into metadata.items:
|
||||
for id in self.metadata.item_ptrs:
|
||||
chunk_decref(id, stats)
|
||||
chunk_decref(id, 1, stats)
|
||||
|
||||
# in forced delete mode, we try hard to delete at least the manifest entry,
|
||||
# if possible also the archive superblock, even if processing the items raises
|
||||
# some harmless exception.
|
||||
chunk_decref(self.id, stats)
|
||||
chunk_decref(self.id, 1, stats)
|
||||
del self.manifest.archives[self.name]
|
||||
while fetch_async_response(wait=True) is not None:
|
||||
# we did async deletes, process outstanding results (== exceptions),
|
||||
|
@ -1510,7 +1513,7 @@ def process_pipe(self, *, path, cache, fd, mode, user=None, group=None):
|
|||
except BackupOSError:
|
||||
# see comments in process_file's exception handler, same issue here.
|
||||
for chunk in item.get("chunks", []):
|
||||
cache.chunk_decref(chunk.id, self.stats, wait=False)
|
||||
cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
|
||||
raise
|
||||
else:
|
||||
item.get_size(memorize=True)
|
||||
|
@ -1544,7 +1547,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
|
|||
item.chunks = []
|
||||
for chunk_id, chunk_size in hl_chunks:
|
||||
# process one-by-one, so we will know in item.chunks how far we got
|
||||
chunk_entry = cache.chunk_incref(chunk_id, self.stats)
|
||||
chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats)
|
||||
item.chunks.append(chunk_entry)
|
||||
else: # normal case, no "2nd+" hardlink
|
||||
if not is_special_file:
|
||||
|
@ -1552,26 +1555,26 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
|
|||
started_hashing = time.monotonic()
|
||||
path_hash = self.key.id_hash(hashed_path)
|
||||
self.stats.hashing_time += time.monotonic() - started_hashing
|
||||
known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
|
||||
known, chunks = cache.file_known_and_unchanged(hashed_path, path_hash, st)
|
||||
else:
|
||||
# in --read-special mode, we may be called for special files.
|
||||
# there should be no information in the cache about special files processed in
|
||||
# read-special mode, but we better play safe as this was wrong in the past:
|
||||
hashed_path = path_hash = None
|
||||
known, ids = False, None
|
||||
if ids is not None:
|
||||
known, chunks = False, None
|
||||
if chunks is not None:
|
||||
# Make sure all ids are available
|
||||
for id_ in ids:
|
||||
if not cache.seen_chunk(id_):
|
||||
for chunk in chunks:
|
||||
if not cache.seen_chunk(chunk.id):
|
||||
# cache said it is unmodified, but we lost a chunk: process file like modified
|
||||
status = "M"
|
||||
break
|
||||
else:
|
||||
item.chunks = []
|
||||
for chunk_id in ids:
|
||||
for chunk in chunks:
|
||||
# process one-by-one, so we will know in item.chunks how far we got
|
||||
chunk_entry = cache.chunk_incref(chunk_id, self.stats)
|
||||
item.chunks.append(chunk_entry)
|
||||
cache.chunk_incref(chunk.id, chunk.size, self.stats)
|
||||
item.chunks.append(chunk)
|
||||
status = "U" # regular file, unchanged
|
||||
else:
|
||||
status = "M" if known else "A" # regular file, modified or added
|
||||
|
@ -1606,7 +1609,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
|
|||
# block or char device will change without its mtime/size/inode changing.
|
||||
# also, we must not memorize a potentially inconsistent/corrupt file that
|
||||
# changed while we backed it up.
|
||||
cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
|
||||
cache.memorize_file(hashed_path, path_hash, st, item.chunks)
|
||||
self.stats.files_stats[status] += 1 # must be done late
|
||||
if not changed_while_backup:
|
||||
status = None # we already called print_file_status
|
||||
|
@ -1620,7 +1623,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal,
|
|||
# but we will not add an item (see add_item in create_helper) and thus
|
||||
# they would be orphaned chunks in case that we commit the transaction.
|
||||
for chunk in item.get("chunks", []):
|
||||
cache.chunk_decref(chunk.id, self.stats, wait=False)
|
||||
cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
|
||||
# Now that we have cleaned up the chunk references, we can re-raise the exception.
|
||||
# This will skip processing of this file, but might retry or continue with the next one.
|
||||
raise
|
||||
|
@ -1731,7 +1734,7 @@ def process_file(self, *, tarinfo, status, type, tar):
|
|||
except BackupOSError:
|
||||
# see comment in FilesystemObjectProcessors.process_file, same issue here.
|
||||
for chunk in item.get("chunks", []):
|
||||
self.cache.chunk_decref(chunk.id, self.stats, wait=False)
|
||||
self.cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
|
||||
raise
|
||||
|
||||
|
||||
|
@ -2328,10 +2331,10 @@ def orphan_chunks_check(self):
|
|||
unused = {id_ for id_, entry in self.chunks.iteritems() if entry.refcount == 0}
|
||||
orphaned = unused - self.possibly_superseded
|
||||
if orphaned:
|
||||
logger.error(f"{len(orphaned)} orphaned objects found!")
|
||||
logger.info(f"{len(orphaned)} orphaned (unused) objects found.")
|
||||
for chunk_id in orphaned:
|
||||
logger.debug(f"chunk {bin_to_hex(chunk_id)} is orphaned.")
|
||||
self.error_found = True
|
||||
# To support working with AdHocCache or AdHocWithFilesCache, we do not set self.error_found = True.
|
||||
if self.repair and unused:
|
||||
logger.info(
|
||||
"Deleting %d orphaned and %d superseded objects..." % (len(orphaned), len(self.possibly_superseded))
|
||||
|
@ -2444,7 +2447,7 @@ def process_item(self, archive, target, item):
|
|||
def process_chunks(self, archive, target, item):
|
||||
if not target.recreate_rechunkify:
|
||||
for chunk_id, size in item.chunks:
|
||||
self.cache.chunk_incref(chunk_id, target.stats)
|
||||
self.cache.chunk_incref(chunk_id, size, target.stats)
|
||||
return item.chunks
|
||||
chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
|
||||
chunk_processor = partial(self.chunk_processor, target)
|
||||
|
@ -2452,8 +2455,9 @@ def process_chunks(self, archive, target, item):
|
|||
|
||||
def chunk_processor(self, target, chunk):
|
||||
chunk_id, data = cached_hash(chunk, self.key.id_hash)
|
||||
size = len(data)
|
||||
if chunk_id in self.seen_chunks:
|
||||
return self.cache.chunk_incref(chunk_id, target.stats)
|
||||
return self.cache.chunk_incref(chunk_id, size, target.stats)
|
||||
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
|
||||
self.cache.repository.async_response(wait=False)
|
||||
self.seen_chunks.add(chunk_entry.id)
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
from ..cache import Cache, assert_secure
|
||||
from ..constants import * # NOQA
|
||||
from ..helpers import Error, CommandError
|
||||
from ..helpers import Location
|
||||
from ..helpers import parse_file_size, hex_to_bin
|
||||
from ..manifest import Manifest
|
||||
|
||||
|
@ -52,11 +51,8 @@ def repo_validate(section, name, value=None, check_value=True):
|
|||
def cache_validate(section, name, value=None, check_value=True):
|
||||
if section not in ["cache"]:
|
||||
raise ValueError("Invalid section")
|
||||
if name in ["previous_location"]:
|
||||
if check_value:
|
||||
Location(value)
|
||||
else:
|
||||
raise ValueError("Invalid name")
|
||||
# currently, we do not support setting anything in the cache via borg config.
|
||||
raise ValueError("Invalid name")
|
||||
|
||||
def list_config(config):
|
||||
default_values = {
|
||||
|
|
|
@ -224,7 +224,9 @@ def create_inner(archive, cache, fso):
|
|||
manifest,
|
||||
progress=args.progress,
|
||||
lock_wait=self.lock_wait,
|
||||
permit_adhoc_cache=args.no_cache_sync,
|
||||
no_cache_sync_permitted=args.no_cache_sync,
|
||||
no_cache_sync_forced=args.no_cache_sync_forced,
|
||||
prefer_adhoc_cache=args.prefer_adhoc_cache,
|
||||
cache_mode=args.files_cache_mode,
|
||||
iec=args.iec,
|
||||
) as cache:
|
||||
|
@ -801,7 +803,19 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
|
|||
"--no-cache-sync",
|
||||
dest="no_cache_sync",
|
||||
action="store_true",
|
||||
help="experimental: do not synchronize the cache. Implies not using the files cache.",
|
||||
help="experimental: do not synchronize the chunks cache.",
|
||||
)
|
||||
subparser.add_argument(
|
||||
"--no-cache-sync-forced",
|
||||
dest="no_cache_sync_forced",
|
||||
action="store_true",
|
||||
help="experimental: do not synchronize the chunks cache (forced).",
|
||||
)
|
||||
subparser.add_argument(
|
||||
"--prefer-adhoc-cache",
|
||||
dest="prefer_adhoc_cache",
|
||||
action="store_true",
|
||||
help="experimental: prefer AdHocCache (w/o files cache) over AdHocWithFilesCache (with files cache).",
|
||||
)
|
||||
subparser.add_argument(
|
||||
"--stdin-name",
|
||||
|
|
|
@ -59,16 +59,9 @@ def do_rinfo(self, args, repository, manifest, cache):
|
|||
output += f" out of {format_file_size(storage_quota, iec=args.iec)}"
|
||||
output += "\n"
|
||||
|
||||
output += (
|
||||
textwrap.dedent(
|
||||
"""
|
||||
Cache: {cache.path}
|
||||
Security dir: {security_dir}
|
||||
"""
|
||||
)
|
||||
.strip()
|
||||
.format(**info)
|
||||
)
|
||||
if hasattr(info["cache"], "path"):
|
||||
output += "Cache: {cache.path}\n".format(**info)
|
||||
output += "Security dir: {security_dir}\n".format(**info)
|
||||
|
||||
print(output)
|
||||
print(str(cache))
|
||||
|
|
|
@ -143,7 +143,7 @@ def do_transfer(self, args, *, repository, manifest, cache, other_repository=Non
|
|||
transfer_size += size
|
||||
else:
|
||||
if not dry_run:
|
||||
chunk_entry = cache.chunk_incref(chunk_id, archive.stats)
|
||||
chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats)
|
||||
chunks.append(chunk_entry)
|
||||
present_size += size
|
||||
if not dry_run:
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1184,13 +1184,13 @@ def default(self, o):
|
|||
from ..repository import Repository
|
||||
from ..remote import RemoteRepository
|
||||
from ..archive import Archive
|
||||
from ..cache import LocalCache, AdHocCache
|
||||
from ..cache import LocalCache, AdHocCache, AdHocWithFilesCache
|
||||
|
||||
if isinstance(o, Repository) or isinstance(o, RemoteRepository):
|
||||
return {"id": bin_to_hex(o.id), "location": o._location.canonical_path()}
|
||||
if isinstance(o, Archive):
|
||||
return o.info()
|
||||
if isinstance(o, LocalCache):
|
||||
if isinstance(o, (LocalCache, AdHocWithFilesCache)):
|
||||
return {"path": o.path, "stats": o.stats()}
|
||||
if isinstance(o, AdHocCache):
|
||||
return {"stats": o.stats()}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
from ... import xattr, platform
|
||||
from ...archive import Archive
|
||||
from ...archiver import Archiver, PURE_PYTHON_MSGPACK_WARNING
|
||||
from ...cache import Cache
|
||||
from ...cache import Cache, LocalCache
|
||||
from ...constants import * # NOQA
|
||||
from ...helpers import Location, umount
|
||||
from ...helpers import EXIT_SUCCESS
|
||||
|
@ -356,9 +356,15 @@ def check_cache(archiver):
|
|||
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
|
||||
with Cache(repository, manifest, sync=False) as cache:
|
||||
original_chunks = cache.chunks
|
||||
# the LocalCache implementation has an on-disk chunks cache,
|
||||
# but AdHocWithFilesCache and AdHocCache don't have persistent chunks cache.
|
||||
persistent = isinstance(cache, LocalCache)
|
||||
Cache.destroy(repository)
|
||||
with Cache(repository, manifest) as cache:
|
||||
correct_chunks = cache.chunks
|
||||
if not persistent:
|
||||
# there is no point in doing the checks
|
||||
return
|
||||
assert original_chunks is not correct_chunks
|
||||
seen = set()
|
||||
for id, (refcount, size) in correct_chunks.iteritems():
|
||||
|
|
|
@ -338,10 +338,11 @@ def test_extra_chunks(archivers, request):
|
|||
with Repository(archiver.repository_location, exclusive=True) as repository:
|
||||
repository.put(b"01234567890123456789012345678901", b"xxxx")
|
||||
repository.commit(compact=False)
|
||||
cmd(archiver, "check", exit_code=1)
|
||||
cmd(archiver, "check", exit_code=1)
|
||||
output = cmd(archiver, "check", "-v", exit_code=0) # orphans are not considered warnings anymore
|
||||
assert "1 orphaned (unused) objects found." in output
|
||||
cmd(archiver, "check", "--repair", exit_code=0)
|
||||
cmd(archiver, "check", exit_code=0)
|
||||
output = cmd(archiver, "check", "-v", exit_code=0)
|
||||
assert "orphaned (unused) objects found." not in output
|
||||
cmd(archiver, "extract", "archive1", "--dry-run", exit_code=0)
|
||||
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from ...cache import Cache, LocalCache
|
||||
from ...cache import Cache, LocalCache, get_cache_impl
|
||||
from ...constants import * # NOQA
|
||||
from ...helpers import Location, get_security_dir, bin_to_hex
|
||||
from ...helpers import EXIT_ERROR
|
||||
|
@ -153,32 +153,29 @@ def test_repository_move(archivers, request, monkeypatch):
|
|||
security_dir = get_security_directory(archiver.repository_path)
|
||||
os.replace(archiver.repository_path, archiver.repository_path + "_new")
|
||||
archiver.repository_location += "_new"
|
||||
# borg should notice that the repository location changed and abort.
|
||||
if archiver.FORK_DEFAULT:
|
||||
cmd(archiver, "rinfo", exit_code=EXIT_ERROR)
|
||||
else:
|
||||
with pytest.raises(Cache.RepositoryAccessAborted):
|
||||
cmd(archiver, "rinfo")
|
||||
# if we explicitly allow relocated repos, it should work fine.
|
||||
monkeypatch.setenv("BORG_RELOCATED_REPO_ACCESS_IS_OK", "yes")
|
||||
cmd(archiver, "rinfo")
|
||||
monkeypatch.delenv("BORG_RELOCATED_REPO_ACCESS_IS_OK")
|
||||
with open(os.path.join(security_dir, "location")) as fd:
|
||||
location = fd.read()
|
||||
assert location == Location(archiver.repository_location).canonical_path()
|
||||
# Needs no confirmation anymore
|
||||
cmd(archiver, "rinfo")
|
||||
shutil.rmtree(archiver.cache_path)
|
||||
# after new repo location was confirmed once, it needs no further confirmation anymore.
|
||||
cmd(archiver, "rinfo")
|
||||
shutil.rmtree(security_dir)
|
||||
# it also needs no confirmation if we have no knowledge about the previous location.
|
||||
cmd(archiver, "rinfo")
|
||||
# it will re-create security-related infos in the security dir:
|
||||
for file in ("location", "key-type", "manifest-timestamp"):
|
||||
assert os.path.exists(os.path.join(security_dir, file))
|
||||
|
||||
|
||||
def test_security_dir_compat(archivers, request):
|
||||
archiver = request.getfixturevalue(archivers)
|
||||
cmd(archiver, "rcreate", RK_ENCRYPTION)
|
||||
with open(os.path.join(get_security_directory(archiver.repository_path), "location"), "w") as fd:
|
||||
fd.write("something outdated")
|
||||
# This is fine, because the cache still has the correct information. security_dir and cache can disagree
|
||||
# if older versions are used to confirm a renamed repository.
|
||||
cmd(archiver, "rinfo")
|
||||
|
||||
|
||||
def test_unknown_unencrypted(archivers, request, monkeypatch):
|
||||
archiver = request.getfixturevalue(archivers)
|
||||
cmd(archiver, "rcreate", "--encryption=none")
|
||||
|
@ -207,9 +204,12 @@ def test_unknown_feature_on_create(archivers, request):
|
|||
cmd_raises_unknown_feature(archiver, ["create", "test", "input"])
|
||||
|
||||
|
||||
@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "adhoc"), reason="only works with LocalCache")
|
||||
def test_unknown_feature_on_cache_sync(archivers, request):
|
||||
# LocalCache.sync checks repo compat
|
||||
archiver = request.getfixturevalue(archivers)
|
||||
cmd(archiver, "rcreate", RK_ENCRYPTION)
|
||||
# delete the cache to trigger a cache sync later in borg create
|
||||
cmd(archiver, "rdelete", "--cache-only")
|
||||
add_unknown_feature(archiver.repository_path, Manifest.Operation.READ)
|
||||
cmd_raises_unknown_feature(archiver, ["create", "test", "input"])
|
||||
|
@ -277,6 +277,7 @@ def test_unknown_mandatory_feature_in_cache(archivers, request):
|
|||
repository._location = Location(archiver.repository_location)
|
||||
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
|
||||
with Cache(repository, manifest) as cache:
|
||||
is_localcache = isinstance(cache, LocalCache)
|
||||
cache.begin_txn()
|
||||
cache.cache_config.mandatory_features = {"unknown-feature"}
|
||||
cache.commit()
|
||||
|
@ -295,7 +296,8 @@ def wipe_wrapper(*args):
|
|||
with patch.object(LocalCache, "wipe_cache", wipe_wrapper):
|
||||
cmd(archiver, "create", "test", "input")
|
||||
|
||||
assert called
|
||||
if is_localcache:
|
||||
assert called
|
||||
|
||||
with Repository(archiver.repository_path, exclusive=True) as repository:
|
||||
if remote_repo:
|
||||
|
@ -315,10 +317,14 @@ def test_check_cache(archivers, request):
|
|||
cache.begin_txn()
|
||||
cache.chunks.incref(list(cache.chunks.iteritems())[0][0])
|
||||
cache.commit()
|
||||
persistent = isinstance(cache, LocalCache)
|
||||
if not persistent:
|
||||
pytest.skip("check_cache is pointless if we do not have a persistent chunks cache")
|
||||
with pytest.raises(AssertionError):
|
||||
check_cache(archiver)
|
||||
|
||||
|
||||
@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "adhoc"), reason="only works with LocalCache")
|
||||
def test_env_use_chunks_archive(archivers, request, monkeypatch):
|
||||
archiver = request.getfixturevalue(archivers)
|
||||
create_test_files(archiver.input_path)
|
||||
|
|
|
@ -34,7 +34,7 @@ def test_check_corrupted_repository(archiver):
|
|||
def corrupt_archiver(archiver):
|
||||
create_test_files(archiver.input_path)
|
||||
cmd(archiver, "rcreate", RK_ENCRYPTION)
|
||||
archiver.cache_path = json.loads(cmd(archiver, "rinfo", "--json"))["cache"]["path"]
|
||||
archiver.cache_path = json.loads(cmd(archiver, "rinfo", "--json"))["cache"].get("path")
|
||||
|
||||
|
||||
def corrupt(file, amount=1):
|
||||
|
@ -48,9 +48,16 @@ def corrupt(file, amount=1):
|
|||
@pytest.mark.allow_cache_wipe
|
||||
def test_cache_chunks(archiver):
|
||||
corrupt_archiver(archiver)
|
||||
if archiver.cache_path is None:
|
||||
pytest.skip("no cache path for this kind of Cache implementation")
|
||||
|
||||
create_src_archive(archiver, "test")
|
||||
chunks_path = os.path.join(archiver.cache_path, "chunks")
|
||||
if not os.path.exists(chunks_path):
|
||||
pytest.skip("no persistent chunks index for this kind of Cache implementation")
|
||||
|
||||
chunks_before_corruption = set(ChunkIndex(path=chunks_path).iteritems())
|
||||
|
||||
corrupt(chunks_path)
|
||||
|
||||
assert not archiver.FORK_DEFAULT # test does not support forking
|
||||
|
@ -74,6 +81,9 @@ def sync_wrapper(cache):
|
|||
|
||||
def test_cache_files(archiver):
|
||||
corrupt_archiver(archiver)
|
||||
if archiver.cache_path is None:
|
||||
pytest.skip("no cache path for this kind of Cache implementation")
|
||||
|
||||
cmd(archiver, "create", "test", "input")
|
||||
corrupt(os.path.join(archiver.cache_path, "files"))
|
||||
out = cmd(archiver, "create", "test1", "input")
|
||||
|
@ -83,6 +93,9 @@ def test_cache_files(archiver):
|
|||
|
||||
def test_chunks_archive(archiver):
|
||||
corrupt_archiver(archiver)
|
||||
if archiver.cache_path is None:
|
||||
pytest.skip("no cache path for this kind of Cache implementation")
|
||||
|
||||
cmd(archiver, "create", "test1", "input")
|
||||
# Find ID of test1, so we can corrupt it later :)
|
||||
target_id = cmd(archiver, "rlist", "--format={id}{NL}").strip()
|
||||
|
@ -93,6 +106,8 @@ def test_chunks_archive(archiver):
|
|||
cmd(archiver, "rinfo", "--json")
|
||||
|
||||
chunks_archive = os.path.join(archiver.cache_path, "chunks.archive.d")
|
||||
if not os.path.exists(chunks_archive):
|
||||
pytest.skip("Only LocalCache has a per-archive chunks index cache.")
|
||||
assert len(os.listdir(chunks_archive)) == 4 # two archives, one chunks cache and one .integrity file each
|
||||
|
||||
corrupt(os.path.join(chunks_archive, target_id + ".compact"))
|
||||
|
@ -114,6 +129,9 @@ def test_chunks_archive(archiver):
|
|||
|
||||
def test_old_version_interfered(archiver):
|
||||
corrupt_archiver(archiver)
|
||||
if archiver.cache_path is None:
|
||||
pytest.skip("no cache path for this kind of Cache implementation")
|
||||
|
||||
# Modify the main manifest ID without touching the manifest ID in the integrity section.
|
||||
# This happens if a version without integrity checking modifies the cache.
|
||||
config_path = os.path.join(archiver.cache_path, "config")
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
import pytest
|
||||
|
||||
from ... import platform
|
||||
from ...cache import get_cache_impl
|
||||
from ...constants import * # NOQA
|
||||
from ...manifest import Manifest
|
||||
from ...platform import is_cygwin, is_win32, is_darwin
|
||||
|
@ -540,20 +541,21 @@ def test_create_pattern_intermediate_folders_first(archivers, request):
|
|||
assert out_list.index("d x/b") < out_list.index("- x/b/foo_b")
|
||||
|
||||
|
||||
def test_create_no_cache_sync(archivers, request):
|
||||
@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "local"), reason="only works with AdHocCache")
|
||||
def test_create_no_cache_sync_adhoc(archivers, request): # TODO: add test for AdHocWithFilesCache
|
||||
archiver = request.getfixturevalue(archivers)
|
||||
create_test_files(archiver.input_path)
|
||||
cmd(archiver, "rcreate", RK_ENCRYPTION)
|
||||
cmd(archiver, "rdelete", "--cache-only")
|
||||
create_json = json.loads(
|
||||
cmd(archiver, "create", "--no-cache-sync", "--json", "--error", "test", "input")
|
||||
) # ignore experimental warning
|
||||
cmd(archiver, "create", "--no-cache-sync", "--prefer-adhoc-cache", "--json", "test", "input")
|
||||
)
|
||||
info_json = json.loads(cmd(archiver, "info", "-a", "test", "--json"))
|
||||
create_stats = create_json["cache"]["stats"]
|
||||
info_stats = info_json["cache"]["stats"]
|
||||
assert create_stats == info_stats
|
||||
cmd(archiver, "rdelete", "--cache-only")
|
||||
cmd(archiver, "create", "--no-cache-sync", "test2", "input")
|
||||
cmd(archiver, "create", "--no-cache-sync", "--prefer-adhoc-cache", "test2", "input")
|
||||
cmd(archiver, "rinfo")
|
||||
cmd(archiver, "check")
|
||||
|
||||
|
|
|
@ -168,7 +168,12 @@ def test_debug_refcount_obj(archivers, request):
|
|||
create_json = json.loads(cmd(archiver, "create", "--json", "test", "input"))
|
||||
archive_id = create_json["archive"]["id"]
|
||||
output = cmd(archiver, "debug", "refcount-obj", archive_id).strip()
|
||||
assert output == f"object {archive_id} has 1 referrers [info from chunks cache]."
|
||||
# LocalCache does precise refcounting, so we'll get 1 reference for the archive.
|
||||
# AdHocCache or AdHocWithFilesCache doesn't, we'll get ChunkIndex.MAX_VALUE as refcount.
|
||||
assert (
|
||||
output == f"object {archive_id} has 1 referrers [info from chunks cache]."
|
||||
or output == f"object {archive_id} has 4294966271 referrers [info from chunks cache]."
|
||||
)
|
||||
|
||||
# Invalid IDs do not abort or return an error
|
||||
output = cmd(archiver, "debug", "refcount-obj", "124", "xyza").strip()
|
||||
|
|
|
@ -25,9 +25,8 @@ def test_delete(archivers, request):
|
|||
cmd(archiver, "extract", "test.2", "--dry-run")
|
||||
output = cmd(archiver, "delete", "-a", "test.2", "--stats")
|
||||
assert "Original size: -" in output # negative size == deleted data
|
||||
# Make sure all data except the manifest has been deleted
|
||||
with Repository(archiver.repository_path) as repository:
|
||||
assert len(repository) == 1
|
||||
output = cmd(archiver, "rlist")
|
||||
assert output == "" # no archives left!
|
||||
|
||||
|
||||
def test_delete_multiple(archivers, request):
|
||||
|
|
|
@ -40,9 +40,9 @@ def test_list_chunk_counts(archivers, request):
|
|||
fd.write(b"baab" * 2000000)
|
||||
cmd(archiver, "rcreate", RK_ENCRYPTION)
|
||||
cmd(archiver, "create", "test", "input")
|
||||
output = cmd(archiver, "list", "test", "--format", "{num_chunks} {unique_chunks} {path}{NL}")
|
||||
assert "0 0 input/empty_file" in output
|
||||
assert "2 2 input/two_chunks" in output
|
||||
output = cmd(archiver, "list", "test", "--format", "{num_chunks} {path}{NL}")
|
||||
assert "0 input/empty_file" in output
|
||||
assert "2 input/two_chunks" in output
|
||||
|
||||
|
||||
def test_list_size(archivers, request):
|
||||
|
|
|
@ -153,15 +153,18 @@ def test_recreate_rechunkify(archivers, request):
|
|||
cmd(archiver, "rcreate", RK_ENCRYPTION)
|
||||
cmd(archiver, "create", "test1", "input", "--chunker-params", "7,9,8,128")
|
||||
cmd(archiver, "create", "test2", "input", "--files-cache=disabled")
|
||||
chunks_list = cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks} {unique_chunks}")
|
||||
num_chunks, unique_chunks = map(int, chunks_list.split(" "))
|
||||
# test1 and test2 do not deduplicate
|
||||
assert num_chunks == unique_chunks
|
||||
num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
|
||||
num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
|
||||
# right now, the file is chunked differently
|
||||
assert num_chunks1 != num_chunks2
|
||||
cmd(archiver, "recreate", "--chunker-params", "default")
|
||||
check_cache(archiver)
|
||||
# test1 and test2 do deduplicate after recreate
|
||||
assert int(cmd(archiver, "list", "test1", "input/large_file", "--format={size}"))
|
||||
assert not int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{unique_chunks}"))
|
||||
num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
|
||||
num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
|
||||
# now the files are chunked in the same way
|
||||
# TODO: this is a rather weak test, it could be improved by comparing the IDs in the chunk lists,
|
||||
# to make sure that everything is completely deduplicated now (both files have identical chunks).
|
||||
assert num_chunks1 == num_chunks2
|
||||
|
||||
|
||||
def test_recreate_fixed_rechunkify(archivers, request):
|
||||
|
|
|
@ -189,7 +189,7 @@ def test_does_not_contain_manifest(self, cache):
|
|||
|
||||
def test_does_not_delete_existing_chunks(self, repository, cache):
|
||||
assert cache.seen_chunk(H(1)) == ChunkIndex.MAX_VALUE
|
||||
cache.chunk_decref(H(1), Statistics())
|
||||
cache.chunk_decref(H(1), 1, Statistics())
|
||||
assert repository.get(H(1)) == b"1234"
|
||||
|
||||
def test_seen_chunk_add_chunk_size(self, cache):
|
||||
|
@ -199,7 +199,7 @@ def test_deletes_chunks_during_lifetime(self, cache, repository):
|
|||
"""E.g. checkpoint archives"""
|
||||
cache.add_chunk(H(5), {}, b"1010", stats=Statistics())
|
||||
assert cache.seen_chunk(H(5)) == 1
|
||||
cache.chunk_decref(H(5), Statistics())
|
||||
cache.chunk_decref(H(5), 1, Statistics())
|
||||
assert not cache.seen_chunk(H(5))
|
||||
with pytest.raises(Repository.ObjectNotFound):
|
||||
repository.get(H(5))
|
||||
|
@ -220,9 +220,9 @@ def test_txn(self, cache):
|
|||
|
||||
def test_incref_after_add_chunk(self, cache):
|
||||
assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
|
||||
assert cache.chunk_incref(H(3), Statistics()) == (H(3), 4)
|
||||
assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4)
|
||||
|
||||
def test_existing_incref_after_add_chunk(self, cache):
|
||||
"""This case occurs with part files, see Archive.chunk_file."""
|
||||
assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
|
||||
assert cache.chunk_incref(H(1), Statistics()) == (H(1), 4)
|
||||
assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4)
|
||||
|
|
|
@ -127,6 +127,7 @@ def archiver(tmp_path, set_env_variables):
|
|||
archiver.patterns_file_path = os.fspath(tmp_path / "patterns")
|
||||
os.environ["BORG_KEYS_DIR"] = archiver.keys_path
|
||||
os.environ["BORG_CACHE_DIR"] = archiver.cache_path
|
||||
# os.environ["BORG_CACHE_IMPL"] = "adhocwithfiles"
|
||||
os.mkdir(archiver.input_path)
|
||||
os.chmod(archiver.input_path, 0o777) # avoid troubles with fakeroot / FUSE
|
||||
os.mkdir(archiver.output_path)
|
||||
|
|
|
@ -84,8 +84,8 @@ def upgrade_item(self, *, item):
|
|||
chunks, chunks_healthy = self.hlm.retrieve(id=hlid, default=(None, None))
|
||||
if chunks is not None:
|
||||
item.chunks = chunks
|
||||
for chunk_id, _ in chunks:
|
||||
self.cache.chunk_incref(chunk_id, self.archive.stats)
|
||||
for chunk_id, chunk_size in chunks:
|
||||
self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats)
|
||||
if chunks_healthy is not None:
|
||||
item.chunks_healthy = chunks
|
||||
del item.source # not used for hardlinks any more, replaced by hlid
|
||||
|
|
Loading…
Reference in a new issue