Merge pull request #1607 from ThomasWaldmann/tune-check-verify-data

borg check --verify-data tuning
This commit is contained in:
TW 2016-09-29 19:52:52 +02:00 committed by GitHub
commit 23ac8af7fa
3 changed files with 58 additions and 18 deletions

View File

@ -33,7 +33,7 @@ from .helpers import decode_dict, StableDict
from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
from .helpers import ProgressIndicatorPercent, log_multi
from .helpers import PathPrefixPattern, FnmatchPattern
from .helpers import consume
from .helpers import consume, chunkit
from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
from .item import Item, ArchiveItem
from .key import key_factory
@ -1045,23 +1045,34 @@ class ArchiveChecker:
errors = 0
defect_chunks = []
pi = ProgressIndicatorPercent(total=count, msg="Verifying data %6.2f%%", step=0.01)
for chunk_id, (refcount, *_) in self.chunks.iteritems():
pi.show()
try:
encrypted_data = self.repository.get(chunk_id)
except Repository.ObjectNotFound:
self.error_found = True
errors += 1
logger.error('chunk %s not found', bin_to_hex(chunk_id))
continue
try:
_chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id
_, data = self.key.decrypt(_chunk_id, encrypted_data)
except IntegrityError as integrity_error:
self.error_found = True
errors += 1
logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
defect_chunks.append(chunk_id)
for chunk_infos in chunkit(self.chunks.iteritems(), 100):
chunk_ids = [chunk_id for chunk_id, _ in chunk_infos]
chunk_data_iter = self.repository.get_many(chunk_ids)
chunk_ids_revd = list(reversed(chunk_ids))
while chunk_ids_revd:
pi.show()
chunk_id = chunk_ids_revd.pop(-1) # better efficiency
try:
encrypted_data = next(chunk_data_iter)
except (Repository.ObjectNotFound, IntegrityError) as err:
self.error_found = True
errors += 1
logger.error('chunk %s: %s', bin_to_hex(chunk_id), err)
if isinstance(err, IntegrityError):
defect_chunks.append(chunk_id)
# as the exception killed our generator, make a new one for remaining chunks:
if chunk_ids_revd:
chunk_ids = list(reversed(chunk_ids_revd))
chunk_data_iter = self.repository.get_many(chunk_ids)
else:
try:
_chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id
_, data = self.key.decrypt(_chunk_id, encrypted_data)
except IntegrityError as integrity_error:
self.error_found = True
errors += 1
logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
defect_chunks.append(chunk_id)
pi.finish()
if defect_chunks:
if self.repair:

View File

@ -1494,6 +1494,17 @@ def file_status(mode):
return '?'
def chunkit(it, size):
"""
Chunk an iterator <it> into pieces of <size>.
>>> list(chunker('ABCDEFG', 3))
[['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]
"""
iterable = iter(it)
return iter(lambda: list(islice(iterable, size)), [])
def consume(iterator, n=None):
"""Advance the iterator n-steps ahead. If n is none, consume entirely."""
# Use functions that consume iterators at C speed.

View File

@ -25,6 +25,7 @@ from ..helpers import load_excludes
from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2
from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
from ..helpers import swidth_slice
from ..helpers import chunkit
from . import BaseTestCase, environment_variable, FakeInputs
@ -977,6 +978,23 @@ def test_chunk_file_wrapper():
assert cfw.exhausted
def test_chunkit():
it = chunkit('abcdefg', 3)
assert next(it) == ['a', 'b', 'c']
assert next(it) == ['d', 'e', 'f']
assert next(it) == ['g']
with pytest.raises(StopIteration):
next(it)
with pytest.raises(StopIteration):
next(it)
it = chunkit('ab', 3)
assert list(it) == [['a', 'b']]
it = chunkit('', 3)
assert list(it) == []
def test_clean_lines():
conf = """\
#comment