borg check --verify-data tuning

This commit is contained in:
Thomas Waldmann 2016-09-16 02:49:54 +02:00
parent 2314d8b3f4
commit 19eb75984e
3 changed files with 58 additions and 18 deletions

View File

@ -33,7 +33,7 @@ from .helpers import decode_dict, StableDict
from .helpers import int_to_bigint, bigint_to_int, bin_to_hex from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
from .helpers import ProgressIndicatorPercent, log_multi from .helpers import ProgressIndicatorPercent, log_multi
from .helpers import PathPrefixPattern, FnmatchPattern from .helpers import PathPrefixPattern, FnmatchPattern
from .helpers import consume from .helpers import consume, chunkit
from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
from .item import Item, ArchiveItem from .item import Item, ArchiveItem
from .key import key_factory from .key import key_factory
@ -1045,23 +1045,34 @@ class ArchiveChecker:
errors = 0 errors = 0
defect_chunks = [] defect_chunks = []
pi = ProgressIndicatorPercent(total=count, msg="Verifying data %6.2f%%", step=0.01) pi = ProgressIndicatorPercent(total=count, msg="Verifying data %6.2f%%", step=0.01)
for chunk_id, (refcount, *_) in self.chunks.iteritems(): for chunk_infos in chunkit(self.chunks.iteritems(), 100):
pi.show() chunk_ids = [chunk_id for chunk_id, _ in chunk_infos]
try: chunk_data_iter = self.repository.get_many(chunk_ids)
encrypted_data = self.repository.get(chunk_id) chunk_ids_revd = list(reversed(chunk_ids))
except Repository.ObjectNotFound: while chunk_ids_revd:
self.error_found = True pi.show()
errors += 1 chunk_id = chunk_ids_revd.pop(-1) # better efficiency
logger.error('chunk %s not found', bin_to_hex(chunk_id)) try:
continue encrypted_data = next(chunk_data_iter)
try: except (Repository.ObjectNotFound, IntegrityError) as err:
_chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id self.error_found = True
_, data = self.key.decrypt(_chunk_id, encrypted_data) errors += 1
except IntegrityError as integrity_error: logger.error('chunk %s: %s', bin_to_hex(chunk_id), err)
self.error_found = True if isinstance(err, IntegrityError):
errors += 1 defect_chunks.append(chunk_id)
logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error) # as the exception killed our generator, make a new one for remaining chunks:
defect_chunks.append(chunk_id) if chunk_ids_revd:
chunk_ids = list(reversed(chunk_ids_revd))
chunk_data_iter = self.repository.get_many(chunk_ids)
else:
try:
_chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id
_, data = self.key.decrypt(_chunk_id, encrypted_data)
except IntegrityError as integrity_error:
self.error_found = True
errors += 1
logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
defect_chunks.append(chunk_id)
pi.finish() pi.finish()
if defect_chunks: if defect_chunks:
if self.repair: if self.repair:

View File

@ -1494,6 +1494,17 @@ def file_status(mode):
return '?' return '?'
def chunkit(it, size):
"""
Chunk an iterator <it> into pieces of <size>.
>>> list(chunker('ABCDEFG', 3))
[['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]
"""
iterable = iter(it)
return iter(lambda: list(islice(iterable, size)), [])
def consume(iterator, n=None): def consume(iterator, n=None):
"""Advance the iterator n-steps ahead. If n is none, consume entirely.""" """Advance the iterator n-steps ahead. If n is none, consume entirely."""
# Use functions that consume iterators at C speed. # Use functions that consume iterators at C speed.

View File

@ -25,6 +25,7 @@ from ..helpers import load_excludes
from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2 from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2
from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
from ..helpers import swidth_slice from ..helpers import swidth_slice
from ..helpers import chunkit
from . import BaseTestCase, environment_variable, FakeInputs from . import BaseTestCase, environment_variable, FakeInputs
@ -977,6 +978,23 @@ def test_chunk_file_wrapper():
assert cfw.exhausted assert cfw.exhausted
def test_chunkit():
it = chunkit('abcdefg', 3)
assert next(it) == ['a', 'b', 'c']
assert next(it) == ['d', 'e', 'f']
assert next(it) == ['g']
with pytest.raises(StopIteration):
next(it)
with pytest.raises(StopIteration):
next(it)
it = chunkit('ab', 3)
assert list(it) == [['a', 'b']]
it = chunkit('', 3)
assert list(it) == []
def test_clean_lines(): def test_clean_lines():
conf = """\ conf = """\
#comment #comment