diff --git a/borg/archive.py b/borg/archive.py index ef41c9900..0626acb04 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -22,7 +22,8 @@ from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \ Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \ ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \ PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \ - CompressionDecider1, CompressionDecider2, CompressionSpec + CompressionDecider1, CompressionDecider2, CompressionSpec, \ + IntegrityError from .repository import Repository from .platform import acl_get, acl_set from .chunker import Chunker @@ -698,7 +699,17 @@ class ArchiveChecker: self.error_found = False self.possibly_superseded = set() - def check(self, repository, repair=False, archive=None, last=None, prefix=None, save_space=False): + def check(self, repository, repair=False, archive=None, last=None, prefix=None, verify_data=False, + save_space=False): + """Perform a set of checks on 'repository' + + :param repair: enable repair mode, write updated or corrected data into repository + :param archive: only check this archive + :param last: only check this number of recent archives + :param prefix: only check archives with this prefix + :param verify_data: integrity verification of data referenced by archives + :param save_space: Repository.commit(save_space) + """ logger.info('Starting archive consistency check...') self.check_all = archive is None and last is None and prefix is None self.repair = repair @@ -712,6 +723,8 @@ class ArchiveChecker: else: self.manifest, _ = Manifest.load(repository, key=self.key) self.rebuild_refcounts(archive=archive, last=last, prefix=prefix) + if verify_data: + self.verify_data() self.orphan_chunks_check() self.finish(save_space=save_space) if self.error_found: @@ -741,6 +754,26 @@ class ArchiveChecker: cdata = repository.get(next(self.chunks.iteritems())[0]) return key_factory(repository, cdata) + def verify_data(self): + logger.info('Starting cryptographic data integrity verification...') + pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Verifying data %6.2f%%", step=0.01, same_line=True) + count = errors = 0 + for chunk_id, (refcount, *_) in self.chunks.iteritems(): + pi.show() + if not refcount: + continue + encrypted_data = self.repository.get(chunk_id) + try: + _, data = self.key.decrypt(chunk_id, encrypted_data) + except IntegrityError as integrity_error: + self.error_found = True + errors += 1 + logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error) + count += 1 + pi.finish() + log = logger.error if errors else logger.info + log('Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.', count, errors) + def rebuild_manifest(self): """Rebuild the manifest object if it is missing @@ -874,6 +907,8 @@ class ArchiveChecker: else: # we only want one specific archive archive_items = [item for item in self.manifest.archives.items() if item[0] == archive] + if not archive_items: + logger.error("Archive '%s' not found.", archive) num_archives = 1 end = 1 diff --git a/borg/archiver.py b/borg/archiver.py index 42c6e4212..02e774623 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -185,12 +185,16 @@ class Archiver: if not yes(msg, false_msg="Aborting.", truish=('YES', ), env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): return EXIT_ERROR + if args.repo_only and args.verify_data: + self.print_error("--repository-only and --verify-data contradict each other. Please select one.") + return EXIT_ERROR if not args.archives_only: if not repository.check(repair=args.repair, save_space=args.save_space): return EXIT_WARNING if not args.repo_only and not ArchiveChecker().check( repository, repair=args.repair, archive=args.location.archive, - last=args.last, prefix=args.prefix, save_space=args.save_space): + last=args.last, prefix=args.prefix, verify_data=args.verify_data, + save_space=args.save_space): return EXIT_WARNING return EXIT_SUCCESS @@ -1213,6 +1217,18 @@ class Archiver: required). - The archive checks can be time consuming, they can be skipped using the --repository-only option. + + The --verify-data option will perform a full integrity verification (as opposed to + checking the CRC32 of the segment) of data, which means reading the data from the + repository, decrypting and decompressing it. This is a cryptographic verification, + which will detect (accidental) corruption. For encrypted repositories it is + tamper-resistant as well, unless the attacker has access to the keys. + + It is also very slow. + + --verify-data only verifies data used by the archives specified with --last, + --prefix or an explicitly named archive. If none of these are passed, + all data in the repository is verified. """) subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False, description=self.do_check.__doc__, @@ -1229,6 +1245,10 @@ class Archiver: subparser.add_argument('--archives-only', dest='archives_only', action='store_true', default=False, help='only perform archives checks') + subparser.add_argument('--verify-data', dest='verify_data', action='store_true', + default=False, + help='perform cryptographic archive data integrity verification ' + '(conflicts with --repository-only)') subparser.add_argument('--repair', dest='repair', action='store_true', default=False, help='attempt to repair any inconsistencies found') diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 1b89515c5..e03973a60 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1590,6 +1590,29 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): self.cmd('check', self.repository_location, exit_code=0) self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0) + def _test_verify_data(self, *init_args): + shutil.rmtree(self.repository_path) + self.cmd('init', self.repository_location, *init_args) + self.create_src_archive('archive1') + archive, repository = self.open_archive('archive1') + with repository: + for item in archive.iter_items(): + if item[b'path'].endswith('testsuite/archiver.py'): + chunk = item[b'chunks'][-1] + data = repository.get(chunk.id) + b'1234' + repository.put(chunk.id, data) + break + repository.commit() + self.cmd('check', self.repository_location, exit_code=0) + output = self.cmd('check', '--verify-data', self.repository_location, exit_code=1) + assert bin_to_hex(chunk.id) + ', integrity error' in output + + def test_verify_data(self): + self._test_verify_data('--encryption', 'repokey') + + def test_verify_data_unencrypted(self): + self._test_verify_data('--encryption', 'none') + class RemoteArchiverTestCase(ArchiverTestCase): prefix = '__testsuite__:'