From fc6d459875244399f6e95166cc8aab98c302edb5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 14 Aug 2024 01:46:55 +0200 Subject: [PATCH] cache: replace .stats() by a dummy Dummy returns all-zero stats from that call. Problem was that these values can't be computed from the chunks cache anymore. No correct refcounts, often no size information. Also removed hashindex.ChunkIndex.summarize (previously used by the above mentioned .stats() call) and .stats_against (unused) for same reason. --- src/borg/cache.py | 17 +--------- src/borg/hashindex.pyx | 59 --------------------------------- src/borg/selftest.py | 2 +- src/borg/testsuite/hashindex.py | 12 ------- 4 files changed, 2 insertions(+), 88 deletions(-) diff --git a/src/borg/cache.py b/src/borg/cache.py index 0fc38283f..7018cd001 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -398,22 +398,7 @@ def __str__(self): Summary = namedtuple("Summary", ["total_size", "unique_size", "total_unique_chunks", "total_chunks"]) def stats(self): - from .archive import Archive - - if isinstance(self, AdHocCache) and getattr(self, "chunks", None) is None: - self.chunks = self._load_chunks_from_repo() # AdHocCache usually only has .chunks after begin_txn. - - # XXX: this should really be moved down to `hashindex.pyx` - total_size, unique_size, total_unique_chunks, total_chunks = self.chunks.summarize() - # since borg 1.2 we have new archive metadata telling the total size per archive, - # so we can just sum up all archives to get the "all archives" stats: - total_size = 0 - for archive_name in self.manifest.archives: - archive = Archive(self.manifest, archive_name) - stats = archive.calc_stats(self, want_unique=False) - total_size += stats.osize - stats = self.Summary(total_size, unique_size, total_unique_chunks, total_chunks)._asdict() - return stats + return self.Summary(0, 0, 0, 0)._asdict() # dummy to not cause crash with current code def format_tuple(self): stats = self.stats() diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 94149105c..3e2757fec 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -463,65 +463,6 @@ cdef class ChunkIndex(IndexBase): iter.key = key - self.key_size return iter - def summarize(self): - cdef uint64_t size = 0, unique_size = 0, chunks = 0, unique_chunks = 0 - cdef uint32_t *values - cdef uint32_t refcount - cdef unsigned char *key = NULL - - while True: - key = hashindex_next_key(self.index, key) - if not key: - break - unique_chunks += 1 - values = (key + self.key_size) - refcount = _le32toh(values[0]) - assert refcount <= _MAX_VALUE, "invalid reference count" - chunks += refcount - unique_size += _le32toh(values[1]) - size += _le32toh(values[1]) * _le32toh(values[0]) - - return size, unique_size, unique_chunks, chunks - - def stats_against(self, ChunkIndex master_index): - """ - Calculate chunk statistics of this index against *master_index*. - - A chunk is counted as unique if the number of references - in this index matches the number of references in *master_index*. - - This index must be a subset of *master_index*. - - Return the same statistics tuple as summarize: - size, unique_size, unique_chunks, chunks. - """ - cdef uint64_t size = 0, unique_size = 0, chunks = 0, unique_chunks = 0 - cdef uint32_t our_refcount, chunk_size - cdef const uint32_t *our_values - cdef const uint32_t *master_values - cdef const unsigned char *key = NULL - cdef HashIndex *master = master_index.index - - while True: - key = hashindex_next_key(self.index, key) - if not key: - break - our_values = (key + self.key_size) - master_values = hashindex_get(master, key) - if not master_values: - raise ValueError('stats_against: key contained in self but not in master_index.') - our_refcount = _le32toh(our_values[0]) - chunk_size = _le32toh(master_values[1]) - - chunks += our_refcount - size += chunk_size * our_refcount - if our_values[0] == master_values[0]: - # our refcount equals the master's refcount, so this chunk is unique to us - unique_chunks += 1 - unique_size += chunk_size - - return size, unique_size, unique_chunks, chunks - def add(self, key, refs, size): assert len(key) == self.key_size cdef uint32_t[2] data diff --git a/src/borg/selftest.py b/src/borg/selftest.py index 8f6b693bb..53415fde1 100644 --- a/src/borg/selftest.py +++ b/src/borg/selftest.py @@ -33,7 +33,7 @@ ChunkerTestCase, ] -SELFTEST_COUNT = 33 +SELFTEST_COUNT = 32 class SelfTestResult(TestResult): diff --git a/src/borg/testsuite/hashindex.py b/src/borg/testsuite/hashindex.py index 54e56e5f3..19a04b90e 100644 --- a/src/borg/testsuite/hashindex.py +++ b/src/borg/testsuite/hashindex.py @@ -141,18 +141,6 @@ def test_chunkindex_merge(self): assert idx1[H(3)] == (3, 300) assert idx1[H(4)] == (6, 400) - def test_chunkindex_summarize(self): - idx = ChunkIndex() - idx[H(1)] = 1, 1000 - idx[H(2)] = 2, 2000 - idx[H(3)] = 3, 3000 - - size, unique_size, unique_chunks, chunks = idx.summarize() - assert size == 1000 + 2 * 2000 + 3 * 3000 - assert unique_size == 1000 + 2000 + 3000 - assert chunks == 1 + 2 + 3 - assert unique_chunks == 3 - def test_flags(self): idx = NSIndex() key = H(0)