mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-01 12:45:34 +00:00
check: support integrity verification with --verify-data
This commit is contained in:
parent
d22bbe17c5
commit
0bceaf0736
3 changed files with 81 additions and 3 deletions
|
@ -22,7 +22,8 @@
|
||||||
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
|
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
|
||||||
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
|
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
|
||||||
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
|
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
|
||||||
CompressionDecider1, CompressionDecider2, CompressionSpec
|
CompressionDecider1, CompressionDecider2, CompressionSpec, \
|
||||||
|
IntegrityError
|
||||||
from .repository import Repository
|
from .repository import Repository
|
||||||
from .platform import acl_get, acl_set
|
from .platform import acl_get, acl_set
|
||||||
from .chunker import Chunker
|
from .chunker import Chunker
|
||||||
|
@ -698,7 +699,17 @@ def __init__(self):
|
||||||
self.error_found = False
|
self.error_found = False
|
||||||
self.possibly_superseded = set()
|
self.possibly_superseded = set()
|
||||||
|
|
||||||
def check(self, repository, repair=False, archive=None, last=None, prefix=None, save_space=False):
|
def check(self, repository, repair=False, archive=None, last=None, prefix=None, verify_data=False,
|
||||||
|
save_space=False):
|
||||||
|
"""Perform a set of checks on 'repository'
|
||||||
|
|
||||||
|
:param repair: enable repair mode, write updated or corrected data into repository
|
||||||
|
:param archive: only check this archive
|
||||||
|
:param last: only check this number of recent archives
|
||||||
|
:param prefix: only check archives with this prefix
|
||||||
|
:param verify_data: integrity verification of data referenced by archives
|
||||||
|
:param save_space: Repository.commit(save_space)
|
||||||
|
"""
|
||||||
logger.info('Starting archive consistency check...')
|
logger.info('Starting archive consistency check...')
|
||||||
self.check_all = archive is None and last is None and prefix is None
|
self.check_all = archive is None and last is None and prefix is None
|
||||||
self.repair = repair
|
self.repair = repair
|
||||||
|
@ -712,6 +723,8 @@ def check(self, repository, repair=False, archive=None, last=None, prefix=None,
|
||||||
else:
|
else:
|
||||||
self.manifest, _ = Manifest.load(repository, key=self.key)
|
self.manifest, _ = Manifest.load(repository, key=self.key)
|
||||||
self.rebuild_refcounts(archive=archive, last=last, prefix=prefix)
|
self.rebuild_refcounts(archive=archive, last=last, prefix=prefix)
|
||||||
|
if verify_data:
|
||||||
|
self.verify_data()
|
||||||
self.orphan_chunks_check()
|
self.orphan_chunks_check()
|
||||||
self.finish(save_space=save_space)
|
self.finish(save_space=save_space)
|
||||||
if self.error_found:
|
if self.error_found:
|
||||||
|
@ -741,6 +754,26 @@ def identify_key(self, repository):
|
||||||
cdata = repository.get(next(self.chunks.iteritems())[0])
|
cdata = repository.get(next(self.chunks.iteritems())[0])
|
||||||
return key_factory(repository, cdata)
|
return key_factory(repository, cdata)
|
||||||
|
|
||||||
|
def verify_data(self):
|
||||||
|
logger.info('Starting cryptographic data integrity verification...')
|
||||||
|
pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Verifying data %6.2f%%", step=0.01, same_line=True)
|
||||||
|
count = errors = 0
|
||||||
|
for chunk_id, (refcount, *_) in self.chunks.iteritems():
|
||||||
|
pi.show()
|
||||||
|
if not refcount:
|
||||||
|
continue
|
||||||
|
encrypted_data = self.repository.get(chunk_id)
|
||||||
|
try:
|
||||||
|
_, data = self.key.decrypt(chunk_id, encrypted_data)
|
||||||
|
except IntegrityError as integrity_error:
|
||||||
|
self.error_found = True
|
||||||
|
errors += 1
|
||||||
|
logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
|
||||||
|
count += 1
|
||||||
|
pi.finish()
|
||||||
|
log = logger.error if errors else logger.info
|
||||||
|
log('Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.', count, errors)
|
||||||
|
|
||||||
def rebuild_manifest(self):
|
def rebuild_manifest(self):
|
||||||
"""Rebuild the manifest object if it is missing
|
"""Rebuild the manifest object if it is missing
|
||||||
|
|
||||||
|
@ -874,6 +907,8 @@ def report(msg, chunk_id, chunk_no):
|
||||||
else:
|
else:
|
||||||
# we only want one specific archive
|
# we only want one specific archive
|
||||||
archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
|
archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
|
||||||
|
if not archive_items:
|
||||||
|
logger.error("Archive '%s' not found.", archive)
|
||||||
num_archives = 1
|
num_archives = 1
|
||||||
end = 1
|
end = 1
|
||||||
|
|
||||||
|
|
|
@ -185,12 +185,16 @@ def do_check(self, args, repository):
|
||||||
if not yes(msg, false_msg="Aborting.", truish=('YES', ),
|
if not yes(msg, false_msg="Aborting.", truish=('YES', ),
|
||||||
env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
|
env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
|
||||||
return EXIT_ERROR
|
return EXIT_ERROR
|
||||||
|
if args.repo_only and args.verify_data:
|
||||||
|
self.print_error("--repository-only and --verify-data contradict each other. Please select one.")
|
||||||
|
return EXIT_ERROR
|
||||||
if not args.archives_only:
|
if not args.archives_only:
|
||||||
if not repository.check(repair=args.repair, save_space=args.save_space):
|
if not repository.check(repair=args.repair, save_space=args.save_space):
|
||||||
return EXIT_WARNING
|
return EXIT_WARNING
|
||||||
if not args.repo_only and not ArchiveChecker().check(
|
if not args.repo_only and not ArchiveChecker().check(
|
||||||
repository, repair=args.repair, archive=args.location.archive,
|
repository, repair=args.repair, archive=args.location.archive,
|
||||||
last=args.last, prefix=args.prefix, save_space=args.save_space):
|
last=args.last, prefix=args.prefix, verify_data=args.verify_data,
|
||||||
|
save_space=args.save_space):
|
||||||
return EXIT_WARNING
|
return EXIT_WARNING
|
||||||
return EXIT_SUCCESS
|
return EXIT_SUCCESS
|
||||||
|
|
||||||
|
@ -1213,6 +1217,18 @@ def build_parser(self, args=None, prog=None):
|
||||||
required).
|
required).
|
||||||
- The archive checks can be time consuming, they can be skipped using the
|
- The archive checks can be time consuming, they can be skipped using the
|
||||||
--repository-only option.
|
--repository-only option.
|
||||||
|
|
||||||
|
The --verify-data option will perform a full integrity verification (as opposed to
|
||||||
|
checking the CRC32 of the segment) of data, which means reading the data from the
|
||||||
|
repository, decrypting and decompressing it. This is a cryptographic verification,
|
||||||
|
which will detect (accidental) corruption. For encrypted repositories it is
|
||||||
|
tamper-resistant as well, unless the attacker has access to the keys.
|
||||||
|
|
||||||
|
It is also very slow.
|
||||||
|
|
||||||
|
--verify-data only verifies data used by the archives specified with --last,
|
||||||
|
--prefix or an explicitly named archive. If none of these are passed,
|
||||||
|
all data in the repository is verified.
|
||||||
""")
|
""")
|
||||||
subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False,
|
subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False,
|
||||||
description=self.do_check.__doc__,
|
description=self.do_check.__doc__,
|
||||||
|
@ -1229,6 +1245,10 @@ def build_parser(self, args=None, prog=None):
|
||||||
subparser.add_argument('--archives-only', dest='archives_only', action='store_true',
|
subparser.add_argument('--archives-only', dest='archives_only', action='store_true',
|
||||||
default=False,
|
default=False,
|
||||||
help='only perform archives checks')
|
help='only perform archives checks')
|
||||||
|
subparser.add_argument('--verify-data', dest='verify_data', action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='perform cryptographic archive data integrity verification '
|
||||||
|
'(conflicts with --repository-only)')
|
||||||
subparser.add_argument('--repair', dest='repair', action='store_true',
|
subparser.add_argument('--repair', dest='repair', action='store_true',
|
||||||
default=False,
|
default=False,
|
||||||
help='attempt to repair any inconsistencies found')
|
help='attempt to repair any inconsistencies found')
|
||||||
|
|
|
@ -1590,6 +1590,29 @@ def test_extra_chunks(self):
|
||||||
self.cmd('check', self.repository_location, exit_code=0)
|
self.cmd('check', self.repository_location, exit_code=0)
|
||||||
self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0)
|
self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0)
|
||||||
|
|
||||||
|
def _test_verify_data(self, *init_args):
|
||||||
|
shutil.rmtree(self.repository_path)
|
||||||
|
self.cmd('init', self.repository_location, *init_args)
|
||||||
|
self.create_src_archive('archive1')
|
||||||
|
archive, repository = self.open_archive('archive1')
|
||||||
|
with repository:
|
||||||
|
for item in archive.iter_items():
|
||||||
|
if item[b'path'].endswith('testsuite/archiver.py'):
|
||||||
|
chunk = item[b'chunks'][-1]
|
||||||
|
data = repository.get(chunk.id) + b'1234'
|
||||||
|
repository.put(chunk.id, data)
|
||||||
|
break
|
||||||
|
repository.commit()
|
||||||
|
self.cmd('check', self.repository_location, exit_code=0)
|
||||||
|
output = self.cmd('check', '--verify-data', self.repository_location, exit_code=1)
|
||||||
|
assert bin_to_hex(chunk.id) + ', integrity error' in output
|
||||||
|
|
||||||
|
def test_verify_data(self):
|
||||||
|
self._test_verify_data('--encryption', 'repokey')
|
||||||
|
|
||||||
|
def test_verify_data_unencrypted(self):
|
||||||
|
self._test_verify_data('--encryption', 'none')
|
||||||
|
|
||||||
|
|
||||||
class RemoteArchiverTestCase(ArchiverTestCase):
|
class RemoteArchiverTestCase(ArchiverTestCase):
|
||||||
prefix = '__testsuite__:'
|
prefix = '__testsuite__:'
|
||||||
|
|
Loading…
Reference in a new issue