1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-19 14:02:55 +00:00

heal files if missing chunks re-appear, fixes #148

also: improve logging for archive check
This commit is contained in:
Thomas Waldmann 2016-07-09 16:38:07 +02:00
parent 9bfd233ce1
commit 85fe7d7f4c

View file

@ -912,31 +912,56 @@ def add_reference(id_, size, csize, cdata=None):
self.repository.put(id_, cdata) self.repository.put(id_, cdata)
def verify_file_chunks(item): def verify_file_chunks(item):
"""Verifies that all file chunks are present """Verifies that all file chunks are present.
Missing file chunks will be replaced with new chunks of the same Missing file chunks will be replaced with new chunks of the same length containing all zeros.
length containing all zeros. If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
""" """
offset = 0 offset = 0
chunk_list = [] chunk_list = []
chunks_replaced = False chunks_replaced = False
for chunk_id, size, csize in item[b'chunks']: has_chunks_healthy = b'chunks_healthy' in item
chunks_current = item[b'chunks']
chunks_healthy = item[b'chunks_healthy'] if has_chunks_healthy else chunks_current
assert len(chunks_current) == len(chunks_healthy)
for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
chunk_id, size, csize = chunk_healthy
if chunk_id not in self.chunks: if chunk_id not in self.chunks:
# If a file chunk is missing, create an all empty replacement chunk # a chunk of the healthy list is missing
logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size)) if chunk_current == chunk_healthy:
self.error_found = chunks_replaced = True logger.error('{}: New missing file chunk detected (Byte {}-{}). '
data = bytes(size) 'Replacing with all-zero chunk.'.format(
chunk_id = self.key.id_hash(data) item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
cdata = self.key.encrypt(data) self.error_found = chunks_replaced = True
csize = len(cdata) data = bytes(size)
add_reference(chunk_id, size, csize, cdata) chunk_id = self.key.id_hash(data)
cdata = self.key.encrypt(data)
csize = len(cdata)
add_reference(chunk_id, size, csize, cdata)
else:
logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). '
'It has a all-zero replacement chunk already.'.format(
item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
chunk_id, size, csize = chunk_current
add_reference(chunk_id, size, csize)
else: else:
add_reference(chunk_id, size, csize) if chunk_current == chunk_healthy:
chunk_list.append((chunk_id, size, csize)) # normal case, all fine.
add_reference(chunk_id, size, csize)
else:
logger.info('{}: Healed previously missing file chunk! (Byte {}-{}).'.format(
item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
add_reference(chunk_id, size, csize)
mark_as_possibly_superseded(chunk_current[0]) # maybe orphaned the all-zero replacement chunk
chunk_list.append([chunk_id, size, csize]) # list-typed element as chunks_healthy is list-of-lists
offset += size offset += size
if chunks_replaced and b'chunks_healthy' not in item: if chunks_replaced and not has_chunks_healthy:
# if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later # if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
item[b'chunks_healthy'] = item[b'chunks'] item[b'chunks_healthy'] = item[b'chunks']
if has_chunks_healthy and chunk_list == chunks_healthy:
logger.info('{}: Completely healed previously damaged file!'.format(
item[b'path'].decode('utf-8', 'surrogateescape')))
del item[b'chunks_healthy']
item[b'chunks'] = chunk_list item[b'chunks'] = chunk_list
def robust_iterator(archive): def robust_iterator(archive):