detect inconsistency / corruption / hash collision, closes #170

added a check that compares the size of the new chunk with the stored size of the
already existing chunk in storage that has the same id_hash value.
raise an exception if there is a size mismatch.

this could happen if:

- the stored size is somehow incorrect (corruption or software bug)
- we found a hash collision for the id_hash (for sha256, this is very unlikely)
This commit is contained in:
Thomas Waldmann 2015-09-06 01:10:43 +02:00
parent 17c4394896
commit a912c02757
1 changed files with 10 additions and 4 deletions

View File

@ -347,9 +347,9 @@ class Cache:
def add_chunk(self, id, data, stats):
if not self.txn_active:
self.begin_txn()
if self.seen_chunk(id):
return self.chunk_incref(id, stats)
size = len(data)
if self.seen_chunk(id, size):
return self.chunk_incref(id, stats)
data = self.key.encrypt(data)
csize = len(data)
self.repository.put(id, data, wait=False)
@ -357,8 +357,14 @@ class Cache:
stats.update(size, csize, True)
return id, size, csize
def seen_chunk(self, id):
return self.chunks.get(id, (0, 0, 0))[0]
def seen_chunk(self, id, size=None):
refcount, stored_size, _ = self.chunks.get(id, (0, None, None))
if size is not None and stored_size is not None and size != stored_size:
# we already have a chunk with that id, but different size.
# this is either a hash collision (unlikely) or corruption or a bug.
raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % (
id, stored_size, size))
return refcount
def chunk_incref(self, id, stats):
if not self.txn_active: