mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-01 12:45:34 +00:00
commit
73e46358c3
3 changed files with 81 additions and 20 deletions
|
@ -920,31 +920,56 @@ def add_reference(id_, size, csize, cdata=None):
|
|||
self.repository.put(id_, cdata)
|
||||
|
||||
def verify_file_chunks(item):
|
||||
"""Verifies that all file chunks are present
|
||||
"""Verifies that all file chunks are present.
|
||||
|
||||
Missing file chunks will be replaced with new chunks of the same
|
||||
length containing all zeros.
|
||||
Missing file chunks will be replaced with new chunks of the same length containing all zeros.
|
||||
If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
|
||||
"""
|
||||
offset = 0
|
||||
chunk_list = []
|
||||
chunks_replaced = False
|
||||
for chunk_id, size, csize in item[b'chunks']:
|
||||
has_chunks_healthy = b'chunks_healthy' in item
|
||||
chunks_current = item[b'chunks']
|
||||
chunks_healthy = item[b'chunks_healthy'] if has_chunks_healthy else chunks_current
|
||||
assert len(chunks_current) == len(chunks_healthy)
|
||||
for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
|
||||
chunk_id, size, csize = chunk_healthy
|
||||
if chunk_id not in self.chunks:
|
||||
# If a file chunk is missing, create an all empty replacement chunk
|
||||
logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
|
||||
self.error_found = chunks_replaced = True
|
||||
data = bytes(size)
|
||||
chunk_id = self.key.id_hash(data)
|
||||
cdata = self.key.encrypt(data)
|
||||
csize = len(cdata)
|
||||
add_reference(chunk_id, size, csize, cdata)
|
||||
# a chunk of the healthy list is missing
|
||||
if chunk_current == chunk_healthy:
|
||||
logger.error('{}: New missing file chunk detected (Byte {}-{}). '
|
||||
'Replacing with all-zero chunk.'.format(
|
||||
item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
|
||||
self.error_found = chunks_replaced = True
|
||||
data = bytes(size)
|
||||
chunk_id = self.key.id_hash(data)
|
||||
cdata = self.key.encrypt(data)
|
||||
csize = len(cdata)
|
||||
add_reference(chunk_id, size, csize, cdata)
|
||||
else:
|
||||
logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). '
|
||||
'It has a all-zero replacement chunk already.'.format(
|
||||
item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
|
||||
chunk_id, size, csize = chunk_current
|
||||
add_reference(chunk_id, size, csize)
|
||||
else:
|
||||
add_reference(chunk_id, size, csize)
|
||||
chunk_list.append((chunk_id, size, csize))
|
||||
if chunk_current == chunk_healthy:
|
||||
# normal case, all fine.
|
||||
add_reference(chunk_id, size, csize)
|
||||
else:
|
||||
logger.info('{}: Healed previously missing file chunk! (Byte {}-{}).'.format(
|
||||
item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
|
||||
add_reference(chunk_id, size, csize)
|
||||
mark_as_possibly_superseded(chunk_current[0]) # maybe orphaned the all-zero replacement chunk
|
||||
chunk_list.append([chunk_id, size, csize]) # list-typed element as chunks_healthy is list-of-lists
|
||||
offset += size
|
||||
if chunks_replaced and b'chunks_healthy' not in item:
|
||||
if chunks_replaced and not has_chunks_healthy:
|
||||
# if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
|
||||
item[b'chunks_healthy'] = item[b'chunks']
|
||||
if has_chunks_healthy and chunk_list == chunks_healthy:
|
||||
logger.info('{}: Completely healed previously damaged file!'.format(
|
||||
item[b'path'].decode('utf-8', 'surrogateescape')))
|
||||
del item[b'chunks_healthy']
|
||||
item[b'chunks'] = chunk_list
|
||||
|
||||
def robust_iterator(archive):
|
||||
|
|
|
@ -973,9 +973,12 @@ def build_parser(self, args=None, prog=None):
|
|||
- Check if archive metadata chunk is present. if not, remove archive from
|
||||
manifest.
|
||||
- For all files (items) in the archive, for all chunks referenced by these
|
||||
files, check if chunk is present (if not and we are in repair mode, replace
|
||||
it with a same-size chunk of zeros). This requires reading of archive and
|
||||
file metadata, but not data.
|
||||
files, check if chunk is present.
|
||||
If a chunk is not present and we are in repair mode, replace it with a same-size
|
||||
replacement chunk of zeros.
|
||||
If a previously lost chunk reappears (e.g. via a later backup) and we are in
|
||||
repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
|
||||
This requires reading of archive and file metadata, but not data.
|
||||
- If we are in repair mode and we checked all the archives: delete orphaned
|
||||
chunks from the repo.
|
||||
- if you use a remote repo server via ssh:, the archive check is executed on
|
||||
|
|
|
@ -1142,12 +1142,45 @@ def test_missing_file_chunk(self):
|
|||
with repository:
|
||||
for item in archive.iter_items():
|
||||
if item[b'path'].endswith('testsuite/archiver.py'):
|
||||
repository.delete(item[b'chunks'][-1][0])
|
||||
valid_chunks = item[b'chunks']
|
||||
killed_chunk = valid_chunks[-1]
|
||||
repository.delete(killed_chunk[0])
|
||||
break
|
||||
else:
|
||||
self.assert_true(False) # should not happen
|
||||
repository.commit()
|
||||
self.cmd('check', self.repository_location, exit_code=1)
|
||||
self.cmd('check', '--repair', self.repository_location, exit_code=0)
|
||||
output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
|
||||
self.assert_in('New missing file chunk detected', output)
|
||||
self.cmd('check', self.repository_location, exit_code=0)
|
||||
# check that the file in the old archives has now a different chunk list without the killed chunk
|
||||
for archive_name in ('archive1', 'archive2'):
|
||||
archive, repository = self.open_archive(archive_name)
|
||||
with repository:
|
||||
for item in archive.iter_items():
|
||||
if item[b'path'].endswith('testsuite/archiver.py'):
|
||||
self.assert_not_equal(valid_chunks, item[b'chunks'])
|
||||
self.assert_not_in(killed_chunk, item[b'chunks'])
|
||||
break
|
||||
else:
|
||||
self.assert_true(False) # should not happen
|
||||
# do a fresh backup (that will include the killed chunk)
|
||||
with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
|
||||
self.create_src_archive('archive3')
|
||||
# check should be able to heal the file now:
|
||||
output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0)
|
||||
self.assert_in('Healed previously missing file chunk', output)
|
||||
self.assert_in('testsuite/archiver.py: Completely healed previously damaged file!', output)
|
||||
# check that the file in the old archives has the correct chunks again
|
||||
for archive_name in ('archive1', 'archive2'):
|
||||
archive, repository = self.open_archive(archive_name)
|
||||
with repository:
|
||||
for item in archive.iter_items():
|
||||
if item[b'path'].endswith('testsuite/archiver.py'):
|
||||
self.assert_equal(valid_chunks, item[b'chunks'])
|
||||
break
|
||||
else:
|
||||
self.assert_true(False) # should not happen
|
||||
|
||||
def test_missing_archive_item_chunk(self):
|
||||
archive, repository = self.open_archive('archive1')
|
||||
|
|
Loading…
Reference in a new issue