archiver: Add 'debug dump-manifest' and 'debug dump-archive' commands.

This commit is contained in:
Martin Hostettler 2017-01-29 21:31:35 +01:00
parent bb94ab7118
commit e8335dba0f
3 changed files with 177 additions and 1 deletions

View File

@ -4,6 +4,7 @@ import faulthandler
import functools
import hashlib
import inspect
import json
import logging
import os
import re
@ -22,6 +23,8 @@ from itertools import zip_longest
from .logger import create_logger, setup_logging
logger = create_logger()
import msgpack
from . import __version__
from . import helpers
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
@ -34,11 +37,12 @@ from .helpers import Error, NoManifestError
from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec
from .helpers import PrefixSpec, SortBySpec, HUMAN_SORT_KEYS
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive
from .helpers import safe_encode, remove_surrogates, bin_to_hex
from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
from .helpers import prune_within, prune_split
from .helpers import to_localtime, timestamp
from .helpers import get_cache_dir
from .helpers import Manifest
from .helpers import StableDict
from .helpers import update_excludes, check_extension_modules
from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
from .helpers import log_multi
@ -1226,6 +1230,74 @@ class Archiver:
print('Done.')
return EXIT_SUCCESS
@with_repository()
def do_debug_dump_archive(self, args, repository, manifest, key):
"""dump decoded archive metadata (not: data)"""
try:
archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)]
except KeyError:
raise Archive.DoesNotExist(args.location.archive)
indent = 4
def do_indent(d):
return textwrap.indent(json.dumps(d, indent=indent), prefix=' ' * indent)
def output(fd):
# this outputs megabytes of data for a modest sized archive, so some manual streaming json output
fd.write('{\n')
fd.write(' "_name": ' + json.dumps(args.location.archive) + ",\n")
fd.write(' "_manifest_entry":\n')
fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
fd.write(',\n')
_, data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id']))
archive_org_dict = msgpack.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')
fd.write(' "_meta":\n')
fd.write(do_indent(prepare_dump_dict(archive_org_dict)))
fd.write(',\n')
fd.write(' "_items": [\n')
unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
first = True
for item_id in archive_org_dict[b'items']:
_, data = key.decrypt(item_id, repository.get(item_id))
unpacker.feed(data)
for item in unpacker:
item = prepare_dump_dict(item)
if first:
first = False
else:
fd.write(',\n')
fd.write(do_indent(item))
fd.write('\n')
fd.write(' ]\n}\n')
if args.path == '-':
output(sys.stdout)
else:
with open(args.path, 'w') as fd:
output(fd)
return EXIT_SUCCESS
@with_repository()
def do_debug_dump_manifest(self, args, repository, manifest, key):
"""dump decoded repository manifest"""
_, data = key.decrypt(None, repository.get(manifest.MANIFEST_ID))
meta = prepare_dump_dict(msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape'))
if args.path == '-':
json.dump(meta, sys.stdout, indent=4)
else:
with open(args.path, 'w') as fd:
json.dump(meta, fd, indent=4)
return EXIT_SUCCESS
@with_repository()
def do_debug_dump_repo_objs(self, args, repository, manifest, key):
"""dump (decrypted, decompressed) repo objects"""
@ -2716,6 +2788,36 @@ class Archiver:
type=location_validator(archive=True),
help='archive to dump')
debug_dump_archive_epilog = textwrap.dedent("""
This command dumps all metadata of an archive in a decoded form to a file.
""")
subparser = debug_parsers.add_parser('dump-archive', parents=[common_parser], add_help=False,
description=self.do_debug_dump_archive.__doc__,
epilog=debug_dump_archive_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help='dump decoded archive metadata (debug)')
subparser.set_defaults(func=self.do_debug_dump_archive)
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to dump')
subparser.add_argument('path', metavar='PATH', type=str,
help='file to dump data into')
debug_dump_manifest_epilog = textwrap.dedent("""
This command dumps manifest metadata of a repository in a decoded form to a file.
""")
subparser = debug_parsers.add_parser('dump-manifest', parents=[common_parser], add_help=False,
description=self.do_debug_dump_manifest.__doc__,
epilog=debug_dump_manifest_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help='dump decoded repository metadata (debug)')
subparser.set_defaults(func=self.do_debug_dump_manifest)
subparser.add_argument('location', metavar='REPOSITORY',
type=location_validator(archive=False),
help='repository to dump')
subparser.add_argument('path', metavar='PATH', type=str,
help='file to dump data into')
debug_dump_repo_objs_epilog = textwrap.dedent("""
This command dumps raw (but decrypted and decompressed) repo objects to files.
""")

View File

@ -1,5 +1,6 @@
import argparse
import contextlib
import collections
import grp
import hashlib
import logging
@ -1093,6 +1094,49 @@ def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
return d
def prepare_dump_dict(d):
def decode_bytes(value):
# this should somehow be reversable later, but usual strings should
# look nice and chunk ids should mostly show in hex. Use a special
# inband signaling character (ASCII DEL) to distinguish between
# decoded and hex mode.
if not value.startswith(b'\x7f'):
try:
value = value.decode()
return value
except UnicodeDecodeError:
pass
return '\u007f' + bin_to_hex(value)
def decode_tuple(t):
res = []
for value in t:
if isinstance(value, dict):
value = decode(value)
elif isinstance(value, tuple) or isinstance(value, list):
value = decode_tuple(value)
elif isinstance(value, bytes):
value = decode_bytes(value)
res.append(value)
return res
def decode(d):
res = collections.OrderedDict()
for key, value in d.items():
if isinstance(value, dict):
value = decode(value)
elif isinstance(value, (tuple, list)):
value = decode_tuple(value)
elif isinstance(value, bytes):
value = decode_bytes(value)
if isinstance(key, bytes):
key = key.decode()
res[key] = value
return res
return decode(d)
def remove_surrogates(s, errors='replace'):
"""Replace surrogates generated by fsdecode with '?'
"""

View File

@ -3,6 +3,7 @@ from configparser import ConfigParser
import errno
import os
import inspect
import json
from datetime import datetime
from datetime import timedelta
from io import StringIO
@ -2020,6 +2021,35 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
2: 737475 - 88
"""
def test_debug_dump_manifest(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
dump_file = self.output_path + '/dump'
output = self.cmd('debug', 'dump-manifest', self.repository_location, dump_file)
assert output == ""
with open(dump_file, "r") as f:
result = json.load(f)
assert 'archives' in result
assert 'config' in result
assert 'item_keys' in result
assert 'timestamp' in result
assert 'version' in result
def test_debug_dump_archive(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
dump_file = self.output_path + '/dump'
output = self.cmd('debug', 'dump-archive', self.repository_location + "::test", dump_file)
assert output == ""
with open(dump_file, "r") as f:
result = json.load(f)
assert '_name' in result
assert '_manifest_entry' in result
assert '_meta' in result
assert '_items' in result
@unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available')
class ArchiverTestCaseBinary(ArchiverTestCase):