From e8335dba0f007060b7cdf92ca27b6e1c9f72d6d0 Mon Sep 17 00:00:00 2001 From: Martin Hostettler Date: Sun, 29 Jan 2017 21:31:35 +0100 Subject: [PATCH] archiver: Add 'debug dump-manifest' and 'debug dump-archive' commands. --- src/borg/archiver.py | 104 ++++++++++++++++++++++++++++++++- src/borg/helpers.py | 44 ++++++++++++++ src/borg/testsuite/archiver.py | 30 ++++++++++ 3 files changed, 177 insertions(+), 1 deletion(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 61f26a56b..b5fb165c1 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -4,6 +4,7 @@ import faulthandler import functools import hashlib import inspect +import json import logging import os import re @@ -22,6 +23,8 @@ from itertools import zip_longest from .logger import create_logger, setup_logging logger = create_logger() +import msgpack + from . import __version__ from . import helpers from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special @@ -34,11 +37,12 @@ from .helpers import Error, NoManifestError from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec from .helpers import PrefixSpec, SortBySpec, HUMAN_SORT_KEYS from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive -from .helpers import safe_encode, remove_surrogates, bin_to_hex +from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict from .helpers import prune_within, prune_split from .helpers import to_localtime, timestamp from .helpers import get_cache_dir from .helpers import Manifest +from .helpers import StableDict from .helpers import update_excludes, check_extension_modules from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo from .helpers import log_multi @@ -1226,6 +1230,74 @@ class Archiver: print('Done.') return EXIT_SUCCESS + @with_repository() + def do_debug_dump_archive(self, args, repository, manifest, key): + """dump decoded archive metadata (not: data)""" + + try: + archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)] + except KeyError: + raise Archive.DoesNotExist(args.location.archive) + + indent = 4 + + def do_indent(d): + return textwrap.indent(json.dumps(d, indent=indent), prefix=' ' * indent) + + def output(fd): + # this outputs megabytes of data for a modest sized archive, so some manual streaming json output + fd.write('{\n') + fd.write(' "_name": ' + json.dumps(args.location.archive) + ",\n") + fd.write(' "_manifest_entry":\n') + fd.write(do_indent(prepare_dump_dict(archive_meta_orig))) + fd.write(',\n') + + _, data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id'])) + archive_org_dict = msgpack.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape') + + fd.write(' "_meta":\n') + fd.write(do_indent(prepare_dump_dict(archive_org_dict))) + fd.write(',\n') + fd.write(' "_items": [\n') + + unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict) + first = True + for item_id in archive_org_dict[b'items']: + _, data = key.decrypt(item_id, repository.get(item_id)) + unpacker.feed(data) + for item in unpacker: + item = prepare_dump_dict(item) + if first: + first = False + else: + fd.write(',\n') + fd.write(do_indent(item)) + + fd.write('\n') + fd.write(' ]\n}\n') + + if args.path == '-': + output(sys.stdout) + else: + with open(args.path, 'w') as fd: + output(fd) + return EXIT_SUCCESS + + @with_repository() + def do_debug_dump_manifest(self, args, repository, manifest, key): + """dump decoded repository manifest""" + + _, data = key.decrypt(None, repository.get(manifest.MANIFEST_ID)) + + meta = prepare_dump_dict(msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')) + + if args.path == '-': + json.dump(meta, sys.stdout, indent=4) + else: + with open(args.path, 'w') as fd: + json.dump(meta, fd, indent=4) + return EXIT_SUCCESS + @with_repository() def do_debug_dump_repo_objs(self, args, repository, manifest, key): """dump (decrypted, decompressed) repo objects""" @@ -2716,6 +2788,36 @@ class Archiver: type=location_validator(archive=True), help='archive to dump') + debug_dump_archive_epilog = textwrap.dedent(""" + This command dumps all metadata of an archive in a decoded form to a file. + """) + subparser = debug_parsers.add_parser('dump-archive', parents=[common_parser], add_help=False, + description=self.do_debug_dump_archive.__doc__, + epilog=debug_dump_archive_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='dump decoded archive metadata (debug)') + subparser.set_defaults(func=self.do_debug_dump_archive) + subparser.add_argument('location', metavar='ARCHIVE', + type=location_validator(archive=True), + help='archive to dump') + subparser.add_argument('path', metavar='PATH', type=str, + help='file to dump data into') + + debug_dump_manifest_epilog = textwrap.dedent(""" + This command dumps manifest metadata of a repository in a decoded form to a file. + """) + subparser = debug_parsers.add_parser('dump-manifest', parents=[common_parser], add_help=False, + description=self.do_debug_dump_manifest.__doc__, + epilog=debug_dump_manifest_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='dump decoded repository metadata (debug)') + subparser.set_defaults(func=self.do_debug_dump_manifest) + subparser.add_argument('location', metavar='REPOSITORY', + type=location_validator(archive=False), + help='repository to dump') + subparser.add_argument('path', metavar='PATH', type=str, + help='file to dump data into') + debug_dump_repo_objs_epilog = textwrap.dedent(""" This command dumps raw (but decrypted and decompressed) repo objects to files. """) diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 19293f151..df2a136fd 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -1,5 +1,6 @@ import argparse import contextlib +import collections import grp import hashlib import logging @@ -1093,6 +1094,49 @@ def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'): return d +def prepare_dump_dict(d): + def decode_bytes(value): + # this should somehow be reversable later, but usual strings should + # look nice and chunk ids should mostly show in hex. Use a special + # inband signaling character (ASCII DEL) to distinguish between + # decoded and hex mode. + if not value.startswith(b'\x7f'): + try: + value = value.decode() + return value + except UnicodeDecodeError: + pass + return '\u007f' + bin_to_hex(value) + + def decode_tuple(t): + res = [] + for value in t: + if isinstance(value, dict): + value = decode(value) + elif isinstance(value, tuple) or isinstance(value, list): + value = decode_tuple(value) + elif isinstance(value, bytes): + value = decode_bytes(value) + res.append(value) + return res + + def decode(d): + res = collections.OrderedDict() + for key, value in d.items(): + if isinstance(value, dict): + value = decode(value) + elif isinstance(value, (tuple, list)): + value = decode_tuple(value) + elif isinstance(value, bytes): + value = decode_bytes(value) + if isinstance(key, bytes): + key = key.decode() + res[key] = value + return res + + return decode(d) + + def remove_surrogates(s, errors='replace'): """Replace surrogates generated by fsdecode with '?' """ diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index f5b1fe8cb..a9ad8ecf7 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -3,6 +3,7 @@ from configparser import ConfigParser import errno import os import inspect +import json from datetime import datetime from datetime import timedelta from io import StringIO @@ -2020,6 +2021,35 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 2: 737475 - 88 """ + def test_debug_dump_manifest(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + dump_file = self.output_path + '/dump' + output = self.cmd('debug', 'dump-manifest', self.repository_location, dump_file) + assert output == "" + with open(dump_file, "r") as f: + result = json.load(f) + assert 'archives' in result + assert 'config' in result + assert 'item_keys' in result + assert 'timestamp' in result + assert 'version' in result + + def test_debug_dump_archive(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + dump_file = self.output_path + '/dump' + output = self.cmd('debug', 'dump-archive', self.repository_location + "::test", dump_file) + assert output == "" + with open(dump_file, "r") as f: + result = json.load(f) + assert '_name' in result + assert '_manifest_entry' in result + assert '_meta' in result + assert '_items' in result + @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available') class ArchiverTestCaseBinary(ArchiverTestCase):