check: support integrity verification with --verify-data

This commit is contained in:
Marian Beermann 2016-05-13 22:50:34 +02:00
parent d22bbe17c5
commit 0bceaf0736
No known key found for this signature in database
GPG Key ID: 9B8450B91D1362C1
3 changed files with 81 additions and 3 deletions

View File

@ -22,7 +22,8 @@ from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
CompressionDecider1, CompressionDecider2, CompressionSpec
CompressionDecider1, CompressionDecider2, CompressionSpec, \
IntegrityError
from .repository import Repository
from .platform import acl_get, acl_set
from .chunker import Chunker
@ -698,7 +699,17 @@ class ArchiveChecker:
self.error_found = False
self.possibly_superseded = set()
def check(self, repository, repair=False, archive=None, last=None, prefix=None, save_space=False):
def check(self, repository, repair=False, archive=None, last=None, prefix=None, verify_data=False,
save_space=False):
"""Perform a set of checks on 'repository'
:param repair: enable repair mode, write updated or corrected data into repository
:param archive: only check this archive
:param last: only check this number of recent archives
:param prefix: only check archives with this prefix
:param verify_data: integrity verification of data referenced by archives
:param save_space: Repository.commit(save_space)
"""
logger.info('Starting archive consistency check...')
self.check_all = archive is None and last is None and prefix is None
self.repair = repair
@ -712,6 +723,8 @@ class ArchiveChecker:
else:
self.manifest, _ = Manifest.load(repository, key=self.key)
self.rebuild_refcounts(archive=archive, last=last, prefix=prefix)
if verify_data:
self.verify_data()
self.orphan_chunks_check()
self.finish(save_space=save_space)
if self.error_found:
@ -741,6 +754,26 @@ class ArchiveChecker:
cdata = repository.get(next(self.chunks.iteritems())[0])
return key_factory(repository, cdata)
def verify_data(self):
logger.info('Starting cryptographic data integrity verification...')
pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Verifying data %6.2f%%", step=0.01, same_line=True)
count = errors = 0
for chunk_id, (refcount, *_) in self.chunks.iteritems():
pi.show()
if not refcount:
continue
encrypted_data = self.repository.get(chunk_id)
try:
_, data = self.key.decrypt(chunk_id, encrypted_data)
except IntegrityError as integrity_error:
self.error_found = True
errors += 1
logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
count += 1
pi.finish()
log = logger.error if errors else logger.info
log('Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.', count, errors)
def rebuild_manifest(self):
"""Rebuild the manifest object if it is missing
@ -874,6 +907,8 @@ class ArchiveChecker:
else:
# we only want one specific archive
archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
if not archive_items:
logger.error("Archive '%s' not found.", archive)
num_archives = 1
end = 1

View File

@ -185,12 +185,16 @@ class Archiver:
if not yes(msg, false_msg="Aborting.", truish=('YES', ),
env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
return EXIT_ERROR
if args.repo_only and args.verify_data:
self.print_error("--repository-only and --verify-data contradict each other. Please select one.")
return EXIT_ERROR
if not args.archives_only:
if not repository.check(repair=args.repair, save_space=args.save_space):
return EXIT_WARNING
if not args.repo_only and not ArchiveChecker().check(
repository, repair=args.repair, archive=args.location.archive,
last=args.last, prefix=args.prefix, save_space=args.save_space):
last=args.last, prefix=args.prefix, verify_data=args.verify_data,
save_space=args.save_space):
return EXIT_WARNING
return EXIT_SUCCESS
@ -1213,6 +1217,18 @@ class Archiver:
required).
- The archive checks can be time consuming, they can be skipped using the
--repository-only option.
The --verify-data option will perform a full integrity verification (as opposed to
checking the CRC32 of the segment) of data, which means reading the data from the
repository, decrypting and decompressing it. This is a cryptographic verification,
which will detect (accidental) corruption. For encrypted repositories it is
tamper-resistant as well, unless the attacker has access to the keys.
It is also very slow.
--verify-data only verifies data used by the archives specified with --last,
--prefix or an explicitly named archive. If none of these are passed,
all data in the repository is verified.
""")
subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False,
description=self.do_check.__doc__,
@ -1229,6 +1245,10 @@ class Archiver:
subparser.add_argument('--archives-only', dest='archives_only', action='store_true',
default=False,
help='only perform archives checks')
subparser.add_argument('--verify-data', dest='verify_data', action='store_true',
default=False,
help='perform cryptographic archive data integrity verification '
'(conflicts with --repository-only)')
subparser.add_argument('--repair', dest='repair', action='store_true',
default=False,
help='attempt to repair any inconsistencies found')

View File

@ -1590,6 +1590,29 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
self.cmd('check', self.repository_location, exit_code=0)
self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0)
def _test_verify_data(self, *init_args):
shutil.rmtree(self.repository_path)
self.cmd('init', self.repository_location, *init_args)
self.create_src_archive('archive1')
archive, repository = self.open_archive('archive1')
with repository:
for item in archive.iter_items():
if item[b'path'].endswith('testsuite/archiver.py'):
chunk = item[b'chunks'][-1]
data = repository.get(chunk.id) + b'1234'
repository.put(chunk.id, data)
break
repository.commit()
self.cmd('check', self.repository_location, exit_code=0)
output = self.cmd('check', '--verify-data', self.repository_location, exit_code=1)
assert bin_to_hex(chunk.id) + ', integrity error' in output
def test_verify_data(self):
self._test_verify_data('--encryption', 'repokey')
def test_verify_data_unencrypted(self):
self._test_verify_data('--encryption', 'none')
class RemoteArchiverTestCase(ArchiverTestCase):
prefix = '__testsuite__:'