Merge pull request #2114 from textshell/feature/debug-dump-json

Add 'debug dump-manifest' and 'debug dump-archive' commands.
This commit is contained in:
enkore 2017-02-11 20:19:04 +01:00 committed by GitHub
commit f6732b62fd
3 changed files with 177 additions and 1 deletions

View File

@ -4,6 +4,7 @@ import faulthandler
import functools import functools
import hashlib import hashlib
import inspect import inspect
import json
import logging import logging
import os import os
import re import re
@ -22,6 +23,8 @@ from itertools import zip_longest
from .logger import create_logger, setup_logging from .logger import create_logger, setup_logging
logger = create_logger() logger = create_logger()
import msgpack
from . import __version__ from . import __version__
from . import helpers from . import helpers
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
@ -34,11 +37,12 @@ from .helpers import Error, NoManifestError
from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec
from .helpers import PrefixSpec, SortBySpec, HUMAN_SORT_KEYS from .helpers import PrefixSpec, SortBySpec, HUMAN_SORT_KEYS
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive
from .helpers import safe_encode, remove_surrogates, bin_to_hex from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
from .helpers import prune_within, prune_split from .helpers import prune_within, prune_split
from .helpers import to_localtime, timestamp from .helpers import to_localtime, timestamp
from .helpers import get_cache_dir from .helpers import get_cache_dir
from .helpers import Manifest from .helpers import Manifest
from .helpers import StableDict
from .helpers import update_excludes, check_extension_modules from .helpers import update_excludes, check_extension_modules
from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
from .helpers import log_multi from .helpers import log_multi
@ -1226,6 +1230,74 @@ class Archiver:
print('Done.') print('Done.')
return EXIT_SUCCESS return EXIT_SUCCESS
@with_repository()
def do_debug_dump_archive(self, args, repository, manifest, key):
"""dump decoded archive metadata (not: data)"""
try:
archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)]
except KeyError:
raise Archive.DoesNotExist(args.location.archive)
indent = 4
def do_indent(d):
return textwrap.indent(json.dumps(d, indent=indent), prefix=' ' * indent)
def output(fd):
# this outputs megabytes of data for a modest sized archive, so some manual streaming json output
fd.write('{\n')
fd.write(' "_name": ' + json.dumps(args.location.archive) + ",\n")
fd.write(' "_manifest_entry":\n')
fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
fd.write(',\n')
_, data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id']))
archive_org_dict = msgpack.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')
fd.write(' "_meta":\n')
fd.write(do_indent(prepare_dump_dict(archive_org_dict)))
fd.write(',\n')
fd.write(' "_items": [\n')
unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
first = True
for item_id in archive_org_dict[b'items']:
_, data = key.decrypt(item_id, repository.get(item_id))
unpacker.feed(data)
for item in unpacker:
item = prepare_dump_dict(item)
if first:
first = False
else:
fd.write(',\n')
fd.write(do_indent(item))
fd.write('\n')
fd.write(' ]\n}\n')
if args.path == '-':
output(sys.stdout)
else:
with open(args.path, 'w') as fd:
output(fd)
return EXIT_SUCCESS
@with_repository()
def do_debug_dump_manifest(self, args, repository, manifest, key):
"""dump decoded repository manifest"""
_, data = key.decrypt(None, repository.get(manifest.MANIFEST_ID))
meta = prepare_dump_dict(msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape'))
if args.path == '-':
json.dump(meta, sys.stdout, indent=4)
else:
with open(args.path, 'w') as fd:
json.dump(meta, fd, indent=4)
return EXIT_SUCCESS
@with_repository() @with_repository()
def do_debug_dump_repo_objs(self, args, repository, manifest, key): def do_debug_dump_repo_objs(self, args, repository, manifest, key):
"""dump (decrypted, decompressed) repo objects""" """dump (decrypted, decompressed) repo objects"""
@ -2716,6 +2788,36 @@ class Archiver:
type=location_validator(archive=True), type=location_validator(archive=True),
help='archive to dump') help='archive to dump')
debug_dump_archive_epilog = textwrap.dedent("""
This command dumps all metadata of an archive in a decoded form to a file.
""")
subparser = debug_parsers.add_parser('dump-archive', parents=[common_parser], add_help=False,
description=self.do_debug_dump_archive.__doc__,
epilog=debug_dump_archive_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help='dump decoded archive metadata (debug)')
subparser.set_defaults(func=self.do_debug_dump_archive)
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to dump')
subparser.add_argument('path', metavar='PATH', type=str,
help='file to dump data into')
debug_dump_manifest_epilog = textwrap.dedent("""
This command dumps manifest metadata of a repository in a decoded form to a file.
""")
subparser = debug_parsers.add_parser('dump-manifest', parents=[common_parser], add_help=False,
description=self.do_debug_dump_manifest.__doc__,
epilog=debug_dump_manifest_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help='dump decoded repository metadata (debug)')
subparser.set_defaults(func=self.do_debug_dump_manifest)
subparser.add_argument('location', metavar='REPOSITORY',
type=location_validator(archive=False),
help='repository to dump')
subparser.add_argument('path', metavar='PATH', type=str,
help='file to dump data into')
debug_dump_repo_objs_epilog = textwrap.dedent(""" debug_dump_repo_objs_epilog = textwrap.dedent("""
This command dumps raw (but decrypted and decompressed) repo objects to files. This command dumps raw (but decrypted and decompressed) repo objects to files.
""") """)

View File

@ -1,5 +1,6 @@
import argparse import argparse
import contextlib import contextlib
import collections
import grp import grp
import hashlib import hashlib
import logging import logging
@ -1093,6 +1094,49 @@ def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
return d return d
def prepare_dump_dict(d):
def decode_bytes(value):
# this should somehow be reversable later, but usual strings should
# look nice and chunk ids should mostly show in hex. Use a special
# inband signaling character (ASCII DEL) to distinguish between
# decoded and hex mode.
if not value.startswith(b'\x7f'):
try:
value = value.decode()
return value
except UnicodeDecodeError:
pass
return '\u007f' + bin_to_hex(value)
def decode_tuple(t):
res = []
for value in t:
if isinstance(value, dict):
value = decode(value)
elif isinstance(value, tuple) or isinstance(value, list):
value = decode_tuple(value)
elif isinstance(value, bytes):
value = decode_bytes(value)
res.append(value)
return res
def decode(d):
res = collections.OrderedDict()
for key, value in d.items():
if isinstance(value, dict):
value = decode(value)
elif isinstance(value, (tuple, list)):
value = decode_tuple(value)
elif isinstance(value, bytes):
value = decode_bytes(value)
if isinstance(key, bytes):
key = key.decode()
res[key] = value
return res
return decode(d)
def remove_surrogates(s, errors='replace'): def remove_surrogates(s, errors='replace'):
"""Replace surrogates generated by fsdecode with '?' """Replace surrogates generated by fsdecode with '?'
""" """

View File

@ -3,6 +3,7 @@ from configparser import ConfigParser
import errno import errno
import os import os
import inspect import inspect
import json
from datetime import datetime from datetime import datetime
from datetime import timedelta from datetime import timedelta
from io import StringIO from io import StringIO
@ -2020,6 +2021,35 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
2: 737475 - 88 2: 737475 - 88
""" """
def test_debug_dump_manifest(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
dump_file = self.output_path + '/dump'
output = self.cmd('debug', 'dump-manifest', self.repository_location, dump_file)
assert output == ""
with open(dump_file, "r") as f:
result = json.load(f)
assert 'archives' in result
assert 'config' in result
assert 'item_keys' in result
assert 'timestamp' in result
assert 'version' in result
def test_debug_dump_archive(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
dump_file = self.output_path + '/dump'
output = self.cmd('debug', 'dump-archive', self.repository_location + "::test", dump_file)
assert output == ""
with open(dump_file, "r") as f:
result = json.load(f)
assert '_name' in result
assert '_manifest_entry' in result
assert '_meta' in result
assert '_items' in result
@unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available') @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available')
class ArchiverTestCaseBinary(ArchiverTestCase): class ArchiverTestCaseBinary(ArchiverTestCase):