1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-01 12:45:34 +00:00

cache: replace .stats() by a dummy

Dummy returns all-zero stats from that call.

Problem was that these values can't be computed from the chunks cache
anymore. No correct refcounts, often no size information.

Also removed hashindex.ChunkIndex.summarize (previously used by the above mentioned
.stats() call) and .stats_against (unused) for same reason.
This commit is contained in:
Thomas Waldmann 2024-08-14 01:46:55 +02:00
parent 0306ba9a63
commit fc6d459875
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
4 changed files with 2 additions and 88 deletions

View file

@ -398,22 +398,7 @@ def __str__(self):
Summary = namedtuple("Summary", ["total_size", "unique_size", "total_unique_chunks", "total_chunks"])
def stats(self):
from .archive import Archive
if isinstance(self, AdHocCache) and getattr(self, "chunks", None) is None:
self.chunks = self._load_chunks_from_repo() # AdHocCache usually only has .chunks after begin_txn.
# XXX: this should really be moved down to `hashindex.pyx`
total_size, unique_size, total_unique_chunks, total_chunks = self.chunks.summarize()
# since borg 1.2 we have new archive metadata telling the total size per archive,
# so we can just sum up all archives to get the "all archives" stats:
total_size = 0
for archive_name in self.manifest.archives:
archive = Archive(self.manifest, archive_name)
stats = archive.calc_stats(self, want_unique=False)
total_size += stats.osize
stats = self.Summary(total_size, unique_size, total_unique_chunks, total_chunks)._asdict()
return stats
return self.Summary(0, 0, 0, 0)._asdict() # dummy to not cause crash with current code
def format_tuple(self):
stats = self.stats()

View file

@ -463,65 +463,6 @@ cdef class ChunkIndex(IndexBase):
iter.key = key - self.key_size
return iter
def summarize(self):
cdef uint64_t size = 0, unique_size = 0, chunks = 0, unique_chunks = 0
cdef uint32_t *values
cdef uint32_t refcount
cdef unsigned char *key = NULL
while True:
key = hashindex_next_key(self.index, key)
if not key:
break
unique_chunks += 1
values = <uint32_t*> (key + self.key_size)
refcount = _le32toh(values[0])
assert refcount <= _MAX_VALUE, "invalid reference count"
chunks += refcount
unique_size += _le32toh(values[1])
size += <uint64_t> _le32toh(values[1]) * _le32toh(values[0])
return size, unique_size, unique_chunks, chunks
def stats_against(self, ChunkIndex master_index):
"""
Calculate chunk statistics of this index against *master_index*.
A chunk is counted as unique if the number of references
in this index matches the number of references in *master_index*.
This index must be a subset of *master_index*.
Return the same statistics tuple as summarize:
size, unique_size, unique_chunks, chunks.
"""
cdef uint64_t size = 0, unique_size = 0, chunks = 0, unique_chunks = 0
cdef uint32_t our_refcount, chunk_size
cdef const uint32_t *our_values
cdef const uint32_t *master_values
cdef const unsigned char *key = NULL
cdef HashIndex *master = master_index.index
while True:
key = hashindex_next_key(self.index, key)
if not key:
break
our_values = <const uint32_t*> (key + self.key_size)
master_values = <const uint32_t*> hashindex_get(master, key)
if not master_values:
raise ValueError('stats_against: key contained in self but not in master_index.')
our_refcount = _le32toh(our_values[0])
chunk_size = _le32toh(master_values[1])
chunks += our_refcount
size += <uint64_t> chunk_size * our_refcount
if our_values[0] == master_values[0]:
# our refcount equals the master's refcount, so this chunk is unique to us
unique_chunks += 1
unique_size += chunk_size
return size, unique_size, unique_chunks, chunks
def add(self, key, refs, size):
assert len(key) == self.key_size
cdef uint32_t[2] data

View file

@ -33,7 +33,7 @@
ChunkerTestCase,
]
SELFTEST_COUNT = 33
SELFTEST_COUNT = 32
class SelfTestResult(TestResult):

View file

@ -141,18 +141,6 @@ def test_chunkindex_merge(self):
assert idx1[H(3)] == (3, 300)
assert idx1[H(4)] == (6, 400)
def test_chunkindex_summarize(self):
idx = ChunkIndex()
idx[H(1)] = 1, 1000
idx[H(2)] = 2, 2000
idx[H(3)] = 3, 3000
size, unique_size, unique_chunks, chunks = idx.summarize()
assert size == 1000 + 2 * 2000 + 3 * 3000
assert unique_size == 1000 + 2000 + 3000
assert chunks == 1 + 2 + 3
assert unique_chunks == 3
def test_flags(self):
idx = NSIndex()
key = H(0)