1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-27 18:28:42 +00:00

RepositoryCache: abort on data corruption

This commit is contained in:
Marian Beermann 2017-06-03 12:27:35 +02:00
parent b544af2af1
commit 4faaa7d1fa
2 changed files with 23 additions and 38 deletions

View file

@ -1087,9 +1087,6 @@ class RepositoryCache(RepositoryNoCache):
should return the initial data (as returned by *transform*). should return the initial data (as returned by *transform*).
""" """
class InvalidateCacheEntry(Exception):
pass
def __init__(self, repository, pack=None, unpack=None, transform=None): def __init__(self, repository, pack=None, unpack=None, transform=None):
super().__init__(repository, transform) super().__init__(repository, transform)
self.pack = pack or (lambda data: data) self.pack = pack or (lambda data: data)
@ -1104,7 +1101,6 @@ def __init__(self, repository, pack=None, unpack=None, transform=None):
self.slow_misses = 0 self.slow_misses = 0
self.slow_lat = 0.0 self.slow_lat = 0.0
self.evictions = 0 self.evictions = 0
self.checksum_errors = 0
self.enospc = 0 self.enospc = 0
def query_size_limit(self): def query_size_limit(self):
@ -1149,10 +1145,10 @@ def add_entry(self, key, data, cache):
def close(self): def close(self):
logger.debug('RepositoryCache: current items %d, size %s / %s, %d hits, %d misses, %d slow misses (+%.1fs), ' logger.debug('RepositoryCache: current items %d, size %s / %s, %d hits, %d misses, %d slow misses (+%.1fs), '
'%d evictions, %d ENOSPC hit, %d checksum errors', '%d evictions, %d ENOSPC hit',
len(self.cache), format_file_size(self.size), format_file_size(self.size_limit), len(self.cache), format_file_size(self.size), format_file_size(self.size_limit),
self.hits, self.misses, self.slow_misses, self.slow_lat, self.hits, self.misses, self.slow_misses, self.slow_lat,
self.evictions, self.enospc, self.checksum_errors) self.evictions, self.enospc)
self.cache.clear() self.cache.clear()
shutil.rmtree(self.basedir) shutil.rmtree(self.basedir)
@ -1162,31 +1158,24 @@ def get_many(self, keys, cache=True):
for key in keys: for key in keys:
if key in self.cache: if key in self.cache:
file = self.key_filename(key) file = self.key_filename(key)
try: with open(file, 'rb') as fd:
with open(file, 'rb') as fd: self.hits += 1
self.hits += 1 yield self.unpack(fd.read())
yield self.unpack(fd.read())
continue # go to the next key
except self.InvalidateCacheEntry:
self.cache.remove(key)
self.size -= os.stat(file).st_size
self.checksum_errors += 1
os.unlink(file)
# fall through to fetch the object again
for key_, data in repository_iterator:
if key_ == key:
transformed = self.add_entry(key, data, cache)
self.misses += 1
yield transformed
break
else: else:
# slow path: eviction during this get_many removed this key from the cache for key_, data in repository_iterator:
t0 = time.perf_counter() if key_ == key:
data = self.repository.get(key) transformed = self.add_entry(key, data, cache)
self.slow_lat += time.perf_counter() - t0 self.misses += 1
transformed = self.add_entry(key, data, cache) yield transformed
self.slow_misses += 1 break
yield transformed else:
# slow path: eviction during this get_many removed this key from the cache
t0 = time.perf_counter()
data = self.repository.get(key)
self.slow_lat += time.perf_counter() - t0
transformed = self.add_entry(key, data, cache)
self.slow_misses += 1
yield transformed
# Consume any pending requests # Consume any pending requests
for _ in repository_iterator: for _ in repository_iterator:
pass pass
@ -1220,8 +1209,7 @@ def unpack(data):
csize, checksum = cache_struct.unpack(data[:cache_struct.size]) csize, checksum = cache_struct.unpack(data[:cache_struct.size])
compressed = data[cache_struct.size:] compressed = data[cache_struct.size:]
if checksum != xxh64(compressed): if checksum != xxh64(compressed):
logger.warning('Repository metadata cache: detected corrupted data in cache!') raise IntegrityError('detected corrupted data in metadata cache')
raise RepositoryCache.InvalidateCacheEntry
return csize, compressor.decompress(compressed) return csize, compressor.decompress(compressed)
def transform(id_, data): def transform(id_, data):

View file

@ -10,6 +10,7 @@
from ..repository import Repository from ..repository import Repository
from ..crypto.key import PlaintextKey from ..crypto.key import PlaintextKey
from ..compress import CompressionSpec from ..compress import CompressionSpec
from ..helpers import IntegrityError
from .hashindex import H from .hashindex import H
from .key import TestKey from .key import TestKey
@ -193,9 +194,5 @@ def test_cache_corruption(self, decrypted_cache: RepositoryCache, H1, H2, H3):
fd.write(corrupted) fd.write(corrupted)
fd.truncate() fd.truncate()
assert next(iterator) == (7, b'5678') with pytest.raises(IntegrityError):
assert decrypted_cache.checksum_errors == 1 assert next(iterator) == (7, b'5678')
assert decrypted_cache.slow_misses == 1
assert next(iterator) == (103, bytes(100))
assert decrypted_cache.hits == 3
assert decrypted_cache.misses == 3