1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-04 06:21:46 +00:00

Merge pull request #1300 from ThomasWaldmann/heal-items

heal items
This commit is contained in:
enkore 2016-07-10 00:09:42 +02:00 committed by GitHub
commit 73e46358c3
3 changed files with 81 additions and 20 deletions

View file

@ -920,31 +920,56 @@ def add_reference(id_, size, csize, cdata=None):
self.repository.put(id_, cdata)
def verify_file_chunks(item):
"""Verifies that all file chunks are present
"""Verifies that all file chunks are present.
Missing file chunks will be replaced with new chunks of the same
length containing all zeros.
Missing file chunks will be replaced with new chunks of the same length containing all zeros.
If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
"""
offset = 0
chunk_list = []
chunks_replaced = False
for chunk_id, size, csize in item[b'chunks']:
has_chunks_healthy = b'chunks_healthy' in item
chunks_current = item[b'chunks']
chunks_healthy = item[b'chunks_healthy'] if has_chunks_healthy else chunks_current
assert len(chunks_current) == len(chunks_healthy)
for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
chunk_id, size, csize = chunk_healthy
if chunk_id not in self.chunks:
# If a file chunk is missing, create an all empty replacement chunk
logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
self.error_found = chunks_replaced = True
data = bytes(size)
chunk_id = self.key.id_hash(data)
cdata = self.key.encrypt(data)
csize = len(cdata)
add_reference(chunk_id, size, csize, cdata)
# a chunk of the healthy list is missing
if chunk_current == chunk_healthy:
logger.error('{}: New missing file chunk detected (Byte {}-{}). '
'Replacing with all-zero chunk.'.format(
item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
self.error_found = chunks_replaced = True
data = bytes(size)
chunk_id = self.key.id_hash(data)
cdata = self.key.encrypt(data)
csize = len(cdata)
add_reference(chunk_id, size, csize, cdata)
else:
logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). '
'It has a all-zero replacement chunk already.'.format(
item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
chunk_id, size, csize = chunk_current
add_reference(chunk_id, size, csize)
else:
add_reference(chunk_id, size, csize)
chunk_list.append((chunk_id, size, csize))
if chunk_current == chunk_healthy:
# normal case, all fine.
add_reference(chunk_id, size, csize)
else:
logger.info('{}: Healed previously missing file chunk! (Byte {}-{}).'.format(
item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
add_reference(chunk_id, size, csize)
mark_as_possibly_superseded(chunk_current[0]) # maybe orphaned the all-zero replacement chunk
chunk_list.append([chunk_id, size, csize]) # list-typed element as chunks_healthy is list-of-lists
offset += size
if chunks_replaced and b'chunks_healthy' not in item:
if chunks_replaced and not has_chunks_healthy:
# if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
item[b'chunks_healthy'] = item[b'chunks']
if has_chunks_healthy and chunk_list == chunks_healthy:
logger.info('{}: Completely healed previously damaged file!'.format(
item[b'path'].decode('utf-8', 'surrogateescape')))
del item[b'chunks_healthy']
item[b'chunks'] = chunk_list
def robust_iterator(archive):

View file

@ -973,9 +973,12 @@ def build_parser(self, args=None, prog=None):
- Check if archive metadata chunk is present. if not, remove archive from
manifest.
- For all files (items) in the archive, for all chunks referenced by these
files, check if chunk is present (if not and we are in repair mode, replace
it with a same-size chunk of zeros). This requires reading of archive and
file metadata, but not data.
files, check if chunk is present.
If a chunk is not present and we are in repair mode, replace it with a same-size
replacement chunk of zeros.
If a previously lost chunk reappears (e.g. via a later backup) and we are in
repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
This requires reading of archive and file metadata, but not data.
- If we are in repair mode and we checked all the archives: delete orphaned
chunks from the repo.
- if you use a remote repo server via ssh:, the archive check is executed on

View file

@ -1142,12 +1142,45 @@ def test_missing_file_chunk(self):
with repository:
for item in archive.iter_items():
if item[b'path'].endswith('testsuite/archiver.py'):
repository.delete(item[b'chunks'][-1][0])
valid_chunks = item[b'chunks']
killed_chunk = valid_chunks[-1]
repository.delete(killed_chunk[0])
break
else:
self.assert_true(False) # should not happen
repository.commit()
self.cmd('check', self.repository_location, exit_code=1)
self.cmd('check', '--repair', self.repository_location, exit_code=0)
output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
self.assert_in('New missing file chunk detected', output)
self.cmd('check', self.repository_location, exit_code=0)
# check that the file in the old archives has now a different chunk list without the killed chunk
for archive_name in ('archive1', 'archive2'):
archive, repository = self.open_archive(archive_name)
with repository:
for item in archive.iter_items():
if item[b'path'].endswith('testsuite/archiver.py'):
self.assert_not_equal(valid_chunks, item[b'chunks'])
self.assert_not_in(killed_chunk, item[b'chunks'])
break
else:
self.assert_true(False) # should not happen
# do a fresh backup (that will include the killed chunk)
with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
self.create_src_archive('archive3')
# check should be able to heal the file now:
output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0)
self.assert_in('Healed previously missing file chunk', output)
self.assert_in('testsuite/archiver.py: Completely healed previously damaged file!', output)
# check that the file in the old archives has the correct chunks again
for archive_name in ('archive1', 'archive2'):
archive, repository = self.open_archive(archive_name)
with repository:
for item in archive.iter_items():
if item[b'path'].endswith('testsuite/archiver.py'):
self.assert_equal(valid_chunks, item[b'chunks'])
break
else:
self.assert_true(False) # should not happen
def test_missing_archive_item_chunk(self):
archive, repository = self.open_archive('archive1')