diff --git a/src/borg/archive.py b/src/borg/archive.py index dd5eba215..16ab75e31 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -33,7 +33,7 @@ from .helpers import decode_dict, StableDict from .helpers import int_to_bigint, bigint_to_int, bin_to_hex from .helpers import ProgressIndicatorPercent, log_multi from .helpers import PathPrefixPattern, FnmatchPattern -from .helpers import consume +from .helpers import consume, chunkit from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec from .item import Item, ArchiveItem from .key import key_factory @@ -1045,23 +1045,34 @@ class ArchiveChecker: errors = 0 defect_chunks = [] pi = ProgressIndicatorPercent(total=count, msg="Verifying data %6.2f%%", step=0.01) - for chunk_id, (refcount, *_) in self.chunks.iteritems(): - pi.show() - try: - encrypted_data = self.repository.get(chunk_id) - except Repository.ObjectNotFound: - self.error_found = True - errors += 1 - logger.error('chunk %s not found', bin_to_hex(chunk_id)) - continue - try: - _chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id - _, data = self.key.decrypt(_chunk_id, encrypted_data) - except IntegrityError as integrity_error: - self.error_found = True - errors += 1 - logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error) - defect_chunks.append(chunk_id) + for chunk_infos in chunkit(self.chunks.iteritems(), 100): + chunk_ids = [chunk_id for chunk_id, _ in chunk_infos] + chunk_data_iter = self.repository.get_many(chunk_ids) + chunk_ids_revd = list(reversed(chunk_ids)) + while chunk_ids_revd: + pi.show() + chunk_id = chunk_ids_revd.pop(-1) # better efficiency + try: + encrypted_data = next(chunk_data_iter) + except (Repository.ObjectNotFound, IntegrityError) as err: + self.error_found = True + errors += 1 + logger.error('chunk %s: %s', bin_to_hex(chunk_id), err) + if isinstance(err, IntegrityError): + defect_chunks.append(chunk_id) + # as the exception killed our generator, make a new one for remaining chunks: + if chunk_ids_revd: + chunk_ids = list(reversed(chunk_ids_revd)) + chunk_data_iter = self.repository.get_many(chunk_ids) + else: + try: + _chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id + _, data = self.key.decrypt(_chunk_id, encrypted_data) + except IntegrityError as integrity_error: + self.error_found = True + errors += 1 + logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error) + defect_chunks.append(chunk_id) pi.finish() if defect_chunks: if self.repair: diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 5c4f0feff..6d6b8c7e7 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -1494,6 +1494,17 @@ def file_status(mode): return '?' +def chunkit(it, size): + """ + Chunk an iterator into pieces of . + + >>> list(chunker('ABCDEFG', 3)) + [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']] + """ + iterable = iter(it) + return iter(lambda: list(islice(iterable, size)), []) + + def consume(iterator, n=None): """Advance the iterator n-steps ahead. If n is none, consume entirely.""" # Use functions that consume iterators at C speed. diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index b2b568d7c..55569e96e 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -25,6 +25,7 @@ from ..helpers import load_excludes from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2 from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern from ..helpers import swidth_slice +from ..helpers import chunkit from . import BaseTestCase, environment_variable, FakeInputs @@ -977,6 +978,23 @@ def test_chunk_file_wrapper(): assert cfw.exhausted +def test_chunkit(): + it = chunkit('abcdefg', 3) + assert next(it) == ['a', 'b', 'c'] + assert next(it) == ['d', 'e', 'f'] + assert next(it) == ['g'] + with pytest.raises(StopIteration): + next(it) + with pytest.raises(StopIteration): + next(it) + + it = chunkit('ab', 3) + assert list(it) == [['a', 'b']] + + it = chunkit('', 3) + assert list(it) == [] + + def test_clean_lines(): conf = """\ #comment