mirror of https://github.com/borgbackup/borg.git
check: support integrity verification with --verify-data
This commit is contained in:
parent
d22bbe17c5
commit
0bceaf0736
|
@ -22,7 +22,8 @@ from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
|
|||
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
|
||||
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
|
||||
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
|
||||
CompressionDecider1, CompressionDecider2, CompressionSpec
|
||||
CompressionDecider1, CompressionDecider2, CompressionSpec, \
|
||||
IntegrityError
|
||||
from .repository import Repository
|
||||
from .platform import acl_get, acl_set
|
||||
from .chunker import Chunker
|
||||
|
@ -698,7 +699,17 @@ class ArchiveChecker:
|
|||
self.error_found = False
|
||||
self.possibly_superseded = set()
|
||||
|
||||
def check(self, repository, repair=False, archive=None, last=None, prefix=None, save_space=False):
|
||||
def check(self, repository, repair=False, archive=None, last=None, prefix=None, verify_data=False,
|
||||
save_space=False):
|
||||
"""Perform a set of checks on 'repository'
|
||||
|
||||
:param repair: enable repair mode, write updated or corrected data into repository
|
||||
:param archive: only check this archive
|
||||
:param last: only check this number of recent archives
|
||||
:param prefix: only check archives with this prefix
|
||||
:param verify_data: integrity verification of data referenced by archives
|
||||
:param save_space: Repository.commit(save_space)
|
||||
"""
|
||||
logger.info('Starting archive consistency check...')
|
||||
self.check_all = archive is None and last is None and prefix is None
|
||||
self.repair = repair
|
||||
|
@ -712,6 +723,8 @@ class ArchiveChecker:
|
|||
else:
|
||||
self.manifest, _ = Manifest.load(repository, key=self.key)
|
||||
self.rebuild_refcounts(archive=archive, last=last, prefix=prefix)
|
||||
if verify_data:
|
||||
self.verify_data()
|
||||
self.orphan_chunks_check()
|
||||
self.finish(save_space=save_space)
|
||||
if self.error_found:
|
||||
|
@ -741,6 +754,26 @@ class ArchiveChecker:
|
|||
cdata = repository.get(next(self.chunks.iteritems())[0])
|
||||
return key_factory(repository, cdata)
|
||||
|
||||
def verify_data(self):
|
||||
logger.info('Starting cryptographic data integrity verification...')
|
||||
pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Verifying data %6.2f%%", step=0.01, same_line=True)
|
||||
count = errors = 0
|
||||
for chunk_id, (refcount, *_) in self.chunks.iteritems():
|
||||
pi.show()
|
||||
if not refcount:
|
||||
continue
|
||||
encrypted_data = self.repository.get(chunk_id)
|
||||
try:
|
||||
_, data = self.key.decrypt(chunk_id, encrypted_data)
|
||||
except IntegrityError as integrity_error:
|
||||
self.error_found = True
|
||||
errors += 1
|
||||
logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
|
||||
count += 1
|
||||
pi.finish()
|
||||
log = logger.error if errors else logger.info
|
||||
log('Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.', count, errors)
|
||||
|
||||
def rebuild_manifest(self):
|
||||
"""Rebuild the manifest object if it is missing
|
||||
|
||||
|
@ -874,6 +907,8 @@ class ArchiveChecker:
|
|||
else:
|
||||
# we only want one specific archive
|
||||
archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
|
||||
if not archive_items:
|
||||
logger.error("Archive '%s' not found.", archive)
|
||||
num_archives = 1
|
||||
end = 1
|
||||
|
||||
|
|
|
@ -185,12 +185,16 @@ class Archiver:
|
|||
if not yes(msg, false_msg="Aborting.", truish=('YES', ),
|
||||
env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
|
||||
return EXIT_ERROR
|
||||
if args.repo_only and args.verify_data:
|
||||
self.print_error("--repository-only and --verify-data contradict each other. Please select one.")
|
||||
return EXIT_ERROR
|
||||
if not args.archives_only:
|
||||
if not repository.check(repair=args.repair, save_space=args.save_space):
|
||||
return EXIT_WARNING
|
||||
if not args.repo_only and not ArchiveChecker().check(
|
||||
repository, repair=args.repair, archive=args.location.archive,
|
||||
last=args.last, prefix=args.prefix, save_space=args.save_space):
|
||||
last=args.last, prefix=args.prefix, verify_data=args.verify_data,
|
||||
save_space=args.save_space):
|
||||
return EXIT_WARNING
|
||||
return EXIT_SUCCESS
|
||||
|
||||
|
@ -1213,6 +1217,18 @@ class Archiver:
|
|||
required).
|
||||
- The archive checks can be time consuming, they can be skipped using the
|
||||
--repository-only option.
|
||||
|
||||
The --verify-data option will perform a full integrity verification (as opposed to
|
||||
checking the CRC32 of the segment) of data, which means reading the data from the
|
||||
repository, decrypting and decompressing it. This is a cryptographic verification,
|
||||
which will detect (accidental) corruption. For encrypted repositories it is
|
||||
tamper-resistant as well, unless the attacker has access to the keys.
|
||||
|
||||
It is also very slow.
|
||||
|
||||
--verify-data only verifies data used by the archives specified with --last,
|
||||
--prefix or an explicitly named archive. If none of these are passed,
|
||||
all data in the repository is verified.
|
||||
""")
|
||||
subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False,
|
||||
description=self.do_check.__doc__,
|
||||
|
@ -1229,6 +1245,10 @@ class Archiver:
|
|||
subparser.add_argument('--archives-only', dest='archives_only', action='store_true',
|
||||
default=False,
|
||||
help='only perform archives checks')
|
||||
subparser.add_argument('--verify-data', dest='verify_data', action='store_true',
|
||||
default=False,
|
||||
help='perform cryptographic archive data integrity verification '
|
||||
'(conflicts with --repository-only)')
|
||||
subparser.add_argument('--repair', dest='repair', action='store_true',
|
||||
default=False,
|
||||
help='attempt to repair any inconsistencies found')
|
||||
|
|
|
@ -1590,6 +1590,29 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
|
|||
self.cmd('check', self.repository_location, exit_code=0)
|
||||
self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0)
|
||||
|
||||
def _test_verify_data(self, *init_args):
|
||||
shutil.rmtree(self.repository_path)
|
||||
self.cmd('init', self.repository_location, *init_args)
|
||||
self.create_src_archive('archive1')
|
||||
archive, repository = self.open_archive('archive1')
|
||||
with repository:
|
||||
for item in archive.iter_items():
|
||||
if item[b'path'].endswith('testsuite/archiver.py'):
|
||||
chunk = item[b'chunks'][-1]
|
||||
data = repository.get(chunk.id) + b'1234'
|
||||
repository.put(chunk.id, data)
|
||||
break
|
||||
repository.commit()
|
||||
self.cmd('check', self.repository_location, exit_code=0)
|
||||
output = self.cmd('check', '--verify-data', self.repository_location, exit_code=1)
|
||||
assert bin_to_hex(chunk.id) + ', integrity error' in output
|
||||
|
||||
def test_verify_data(self):
|
||||
self._test_verify_data('--encryption', 'repokey')
|
||||
|
||||
def test_verify_data_unencrypted(self):
|
||||
self._test_verify_data('--encryption', 'none')
|
||||
|
||||
|
||||
class RemoteArchiverTestCase(ArchiverTestCase):
|
||||
prefix = '__testsuite__:'
|
||||
|
|
Loading…
Reference in New Issue