diff --git a/borg/archive.py b/borg/archive.py index a5db1d696..e9a9a7d90 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -799,21 +799,33 @@ class ArchiveChecker: _state += 1 return _state + def report(msg, chunk_id, chunk_no): + cid = hexlify(chunk_id).decode('ascii') + msg += ' [chunk: %06d_%s]' % (chunk_no, cid) # see debug-dump-archive-items + self.report_progress(msg, error=True) + + i = 0 for state, items in groupby(archive[b'items'], missing_chunk_detector): items = list(items) if state % 2: - self.report_progress('Archive metadata damage detected', error=True) + for chunk_id in items: + report('item metadata chunk missing', chunk_id, i) + i += 1 continue if state > 0: unpacker.resync() for chunk_id, cdata in zip(items, repository.get_many(items)): unpacker.feed(self.key.decrypt(chunk_id, cdata)) - for item in unpacker: - if not isinstance(item, dict): - self.report_progress('Did not get expected metadata dict - archive corrupted!', - error=True) - continue - yield item + try: + for item in unpacker: + if isinstance(item, dict): + yield item + else: + report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i) + except Exception: + report('Exception while unpacking item metadata', chunk_id, i) + raise + i += 1 repository = cache_if_remote(self.repository) if archive is None: diff --git a/borg/archiver.py b/borg/archiver.py index 8432a78de..acb705ff6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -3,6 +3,7 @@ from .support import argparse # see support/__init__.py docstring from binascii import hexlify from datetime import datetime +from hashlib import sha256 from operator import attrgetter import functools import inspect @@ -17,7 +18,7 @@ import traceback from . import __version__ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, IncludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ - get_cache_dir, get_keys_dir, prune_within, prune_split, \ + get_cache_dir, get_keys_dir, prune_within, prune_split, unhexlify, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec, have_cython, is_slow_msgpack, yes, \ EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR @@ -502,6 +503,75 @@ class Archiver: print("warning: %s" % e) return self.exit_code + def do_debug_dump_archive_items(self, args): + """dump (decrypted, decompressed) archive items metadata (not: data)""" + repository = self.open_repository(args.archive) + manifest, key = Manifest.load(repository) + archive = Archive(repository, key, manifest, args.archive.archive) + for i, item_id in enumerate(archive.metadata[b'items']): + data = key.decrypt(item_id, repository.get(item_id)) + filename = '%06d_%s.items' %(i, hexlify(item_id).decode('ascii')) + print('Dumping', filename) + with open(filename, 'wb') as fd: + fd.write(data) + print('Done.') + return EXIT_SUCCESS + + def do_debug_get_obj(self, args): + """get object contents from the repository and write it into file""" + repository = self.open_repository(args.repository) + manifest, key = Manifest.load(repository) + hex_id = args.id + try: + id = unhexlify(hex_id) + except ValueError: + print("object id %s is invalid." % hex_id) + else: + try: + data =repository.get(id) + except repository.ObjectNotFound: + print("object %s not found." % hex_id) + else: + with open(args.path, "wb") as f: + f.write(data) + print("object %s fetched." % hex_id) + return EXIT_SUCCESS + + def do_debug_put_obj(self, args): + """put file(s) contents into the repository""" + repository = self.open_repository(args.repository) + manifest, key = Manifest.load(repository) + for path in args.paths: + with open(path, "rb") as f: + data = f.read() + h = sha256(data) # XXX hardcoded + repository.put(h.digest(), data) + print("object %s put." % h.hexdigest()) + repository.commit() + return EXIT_SUCCESS + + def do_debug_delete_obj(self, args): + """delete the objects with the given IDs from the repo""" + repository = self.open_repository(args.repository) + manifest, key = Manifest.load(repository) + modified = False + for hex_id in args.ids: + try: + id = unhexlify(hex_id) + except ValueError: + print("object id %s is invalid." % hex_id) + else: + try: + repository.delete(id) + modified = True + print("object %s deleted." % hex_id) + except repository.ObjectNotFound: + print("object %s not found." % hex_id) + if modified: + repository.commit() + print('Done.') + return EXIT_SUCCESS + helptext = {} helptext['patterns'] = ''' Exclude patterns use a variant of shell pattern syntax, with '*' matching any @@ -990,6 +1060,62 @@ class Archiver: subparser.set_defaults(func=functools.partial(self.do_help, parser, subparsers.choices)) subparser.add_argument('topic', metavar='TOPIC', type=str, nargs='?', help='additional help on TOPIC') + + debug_dump_archive_items_epilog = textwrap.dedent(""" + This command dumps raw (but decrypted and decompressed) archive items (only metadata) to files. + """) + subparser = subparsers.add_parser('debug-dump-archive-items', parents=[common_parser], + description=self.do_debug_dump_archive_items.__doc__, + epilog=debug_dump_archive_items_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_debug_dump_archive_items) + subparser.add_argument('archive', metavar='ARCHIVE', + type=location_validator(archive=True), + help='archive to dump') + + debug_get_obj_epilog = textwrap.dedent(""" + This command gets an object from the repository. + """) + subparser = subparsers.add_parser('debug-get-obj', parents=[common_parser], + description=self.do_debug_get_obj.__doc__, + epilog=debug_get_obj_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_debug_get_obj) + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False), + help='repository to use') + subparser.add_argument('id', metavar='ID', type=str, + help='hex object ID to get from the repo') + subparser.add_argument('path', metavar='PATH', type=str, + help='file to write object data into') + + debug_put_obj_epilog = textwrap.dedent(""" + This command puts objects into the repository. + """) + subparser = subparsers.add_parser('debug-put-obj', parents=[common_parser], + description=self.do_debug_put_obj.__doc__, + epilog=debug_put_obj_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_debug_put_obj) + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False), + help='repository to use') + subparser.add_argument('paths', metavar='PATH', nargs='+', type=str, + help='file(s) to read and create object(s) from') + + debug_delete_obj_epilog = textwrap.dedent(""" + This command deletes objects from the repository. + """) + subparser = subparsers.add_parser('debug-delete-obj', parents=[common_parser], + description=self.do_debug_delete_obj.__doc__, + epilog=debug_delete_obj_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_debug_delete_obj) + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False), + help='repository to use') + subparser.add_argument('ids', metavar='IDs', nargs='+', type=str, + help='hex object ID(s) to delete from the repo') return parser def parse_args(self, args=None): diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 780a976ef..a70cd7eaa 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -777,6 +777,35 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_aes_counter_uniqueness_passphrase(self): self.verify_aes_counter_uniqueness('passphrase') + def test_debug_dump_archive_items(self): + self.create_test_files() + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + with changedir('output'): + output = self.cmd('debug-dump-archive-items', self.repository_location + '::test') + output_dir = sorted(os.listdir('output')) + assert len(output_dir) > 0 and output_dir[0].startswith('000000_') + assert 'Done.' in output + + def test_debug_put_get_delete_obj(self): + self.cmd('init', self.repository_location) + data = b'some data' + hexkey = sha256(data).hexdigest() + self.create_regular_file('file', contents=data) + output = self.cmd('debug-put-obj', self.repository_location, 'input/file') + assert hexkey in output + output = self.cmd('debug-get-obj', self.repository_location, hexkey, 'output/file') + assert hexkey in output + with open('output/file', 'rb') as f: + data_read = f.read() + assert data == data_read + output = self.cmd('debug-delete-obj', self.repository_location, hexkey) + assert "deleted" in output + output = self.cmd('debug-delete-obj', self.repository_location, hexkey) + assert "not found" in output + output = self.cmd('debug-delete-obj', self.repository_location, 'invalid') + assert "is invalid" in output + @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available') class ArchiverTestCaseBinary(ArchiverTestCase): @@ -885,3 +914,7 @@ class RemoteArchiverTestCase(ArchiverTestCase): @unittest.skip('deadlock issues') def test_fuse_mount_archive(self): pass + + @unittest.skip('only works locally') + def test_debug_put_get_delete_obj(self): + pass