From a912c027573ea031094de91827434e64bb0a3675 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 01:10:43 +0200 Subject: [PATCH] detect inconsistency / corruption / hash collision, closes #170 added a check that compares the size of the new chunk with the stored size of the already existing chunk in storage that has the same id_hash value. raise an exception if there is a size mismatch. this could happen if: - the stored size is somehow incorrect (corruption or software bug) - we found a hash collision for the id_hash (for sha256, this is very unlikely) --- borg/cache.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 2391be275..def181629 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -347,9 +347,9 @@ class Cache: def add_chunk(self, id, data, stats): if not self.txn_active: self.begin_txn() - if self.seen_chunk(id): - return self.chunk_incref(id, stats) size = len(data) + if self.seen_chunk(id, size): + return self.chunk_incref(id, stats) data = self.key.encrypt(data) csize = len(data) self.repository.put(id, data, wait=False) @@ -357,8 +357,14 @@ class Cache: stats.update(size, csize, True) return id, size, csize - def seen_chunk(self, id): - return self.chunks.get(id, (0, 0, 0))[0] + def seen_chunk(self, id, size=None): + refcount, stored_size, _ = self.chunks.get(id, (0, None, None)) + if size is not None and stored_size is not None and size != stored_size: + # we already have a chunk with that id, but different size. + # this is either a hash collision (unlikely) or corruption or a bug. + raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % ( + id, stored_size, size)) + return refcount def chunk_incref(self, id, stats): if not self.txn_active: