Merge pull request #2203 from enkore/f/jsono

JSON output for major commands
This commit is contained in:
enkore 2017-02-25 23:05:35 +01:00 committed by GitHub
commit 0f9b7a270d
5 changed files with 319 additions and 58 deletions

View File

@ -28,7 +28,7 @@ from .helpers import Chunk, ChunkIteratorFileWrapper, open_item
from .helpers import Error, IntegrityError
from .helpers import uid2user, user2uid, gid2group, group2gid
from .helpers import parse_timestamp, to_localtime
from .helpers import format_time, format_timedelta, format_file_size, file_status
from .helpers import format_time, format_timedelta, format_file_size, file_status, FileSize
from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates
from .helpers import StableDict
from .helpers import bin_to_hex
@ -68,6 +68,14 @@ class Statistics:
return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format(
cls=type(self).__name__, hash=id(self), self=self)
def as_dict(self):
return {
'original_size': FileSize(self.osize),
'compressed_size': FileSize(self.csize),
'deduplicated_size': FileSize(self.usize),
'nfiles': self.nfiles,
}
@property
def osize_fmt(self):
return format_file_size(self.osize)
@ -282,7 +290,8 @@ class Archive:
self.end = end
self.consider_part_files = consider_part_files
self.pipeline = DownloadPipeline(self.repository, self.key)
if create:
self.create = create
if self.create:
self.file_compression_logger = create_logger('borg.debug.file-compression')
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
@ -343,6 +352,37 @@ class Archive:
def duration_from_meta(self):
return format_timedelta(self.ts_end - self.ts)
def info(self):
if self.create:
stats = self.stats
start = self.start.replace(tzinfo=timezone.utc)
end = self.end.replace(tzinfo=timezone.utc)
else:
stats = self.calc_stats(self.cache)
start = self.ts
end = self.ts_end
info = {
'name': self.name,
'id': self.fpr,
'start': format_time(to_localtime(start)),
'end': format_time(to_localtime(end)),
'duration': (end - start).total_seconds(),
'stats': stats.as_dict(),
'limits': {
'max_archive_size': self.cache.chunks[self.id].csize / MAX_DATA_SIZE,
},
}
if self.create:
info['command_line'] = sys.argv
else:
info.update({
'command_line': self.metadata.cmdline,
'hostname': self.metadata.hostname,
'username': self.metadata.username,
'comment': self.metadata.get('comment', ''),
})
return info
def __str__(self):
return '''\
Archive name: {0.name}

View File

@ -17,7 +17,7 @@ import textwrap
import time
import traceback
from binascii import unhexlify
from datetime import datetime
from datetime import datetime, timedelta
from itertools import zip_longest
from .logger import create_logger, setup_logging
@ -37,7 +37,8 @@ from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
from .helpers import Error, NoManifestError
from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec
from .helpers import PrefixSpec, SortBySpec, HUMAN_SORT_KEYS
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
from .helpers import format_time, format_timedelta, format_file_size, format_archive
from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
from .helpers import prune_within, prune_split
from .helpers import to_localtime, timestamp
@ -52,6 +53,7 @@ from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
from .helpers import ErrorIgnoringTextIOWrapper
from .helpers import ProgressIndicatorPercent
from .helpers import basic_json_data, json_print
from .item import Item
from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey
from .keymanager import KeyManager
@ -364,14 +366,20 @@ class Archiver:
archive.save(comment=args.comment, timestamp=args.timestamp)
if args.progress:
archive.stats.show_progress(final=True)
args.stats |= args.json
if args.stats:
log_multi(DASHES,
str(archive),
DASHES,
STATS_HEADER,
str(archive.stats),
str(cache),
DASHES, logger=logging.getLogger('borg.output.stats'))
if args.json:
json_print(basic_json_data(manifest, cache=cache, extra={
'archive': archive,
}))
else:
log_multi(DASHES,
str(archive),
DASHES,
STATS_HEADER,
str(archive.stats),
str(cache),
DASHES, logger=logging.getLogger('borg.output.stats'))
self.output_filter = args.output_filter
self.output_list = args.output_list
@ -934,10 +942,12 @@ class Archiver:
format = "{path}{NL}"
else:
format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}"
formatter = ItemFormatter(archive, format)
formatter = ItemFormatter(archive, format, json=args.json)
write(safe_encode(formatter.begin()))
for item in archive.iter_items(lambda item: matcher.match(item.path)):
write(safe_encode(formatter.format_item(item)))
write(safe_encode(formatter.end()))
return self.exit_code
def _list_repository(self, args, manifest, write):
@ -949,8 +959,18 @@ class Archiver:
format = "{archive:<36} {time} [{id}]{NL}"
formatter = ArchiveFormatter(format)
output_data = []
for archive_info in manifest.archives.list_considering(args):
write(safe_encode(formatter.format_item(archive_info)))
if args.json:
output_data.append(formatter.get_item_data(archive_info))
else:
write(safe_encode(formatter.format_item(archive_info)))
if args.json:
json_print(basic_json_data(manifest, extra={
'archives': output_data
}))
return self.exit_code
@ -960,7 +980,7 @@ class Archiver:
if any((args.location.archive, args.first, args.last, args.prefix)):
return self._info_archives(args, repository, manifest, key, cache)
else:
return self._info_repository(repository, key, cache)
return self._info_repository(args, repository, manifest, key, cache)
def _info_archives(self, args, repository, manifest, key, cache):
def format_cmdline(cmdline):
@ -973,45 +993,75 @@ class Archiver:
if not archive_names:
return self.exit_code
output_data = []
for i, archive_name in enumerate(archive_names, 1):
archive = Archive(repository, key, manifest, archive_name, cache=cache,
consider_part_files=args.consider_part_files)
stats = archive.calc_stats(cache)
print('Archive name: %s' % archive.name)
print('Archive fingerprint: %s' % archive.fpr)
print('Comment: %s' % archive.metadata.get('comment', ''))
print('Hostname: %s' % archive.metadata.hostname)
print('Username: %s' % archive.metadata.username)
print('Time (start): %s' % format_time(to_localtime(archive.ts)))
print('Time (end): %s' % format_time(to_localtime(archive.ts_end)))
print('Duration: %s' % archive.duration_from_meta)
print('Number of files: %d' % stats.nfiles)
print('Command line: %s' % format_cmdline(archive.metadata.cmdline))
print('Utilization of max. archive size: %d%%' % (100 * cache.chunks[archive.id].csize / MAX_DATA_SIZE))
print(DASHES)
print(STATS_HEADER)
print(str(stats))
print(str(cache))
info = archive.info()
if args.json:
output_data.append(info)
else:
info['duration'] = format_timedelta(timedelta(seconds=info['duration']))
info['command_line'] = format_cmdline(info['command_line'])
print(textwrap.dedent("""
Archive name: {name}
Archive fingerprint: {id}
Comment: {comment}
Hostname: {hostname}
Username: {username}
Time (start): {start}
Time (end): {end}
Duration: {duration}
Number of files: {stats[nfiles]}
Command line: {command_line}
Utilization of max. archive size: {limits[max_archive_size]:.0%}
------------------------------------------------------------------------------
Original size Compressed size Deduplicated size
This archive: {stats[original_size]:>20s} {stats[compressed_size]:>20s} {stats[deduplicated_size]:>20s}
{cache}
""").strip().format(cache=cache, **info))
if self.exit_code:
break
if len(archive_names) - i:
if not args.json and len(archive_names) - i:
print()
if args.json:
json_print(basic_json_data(manifest, cache=cache, extra={
'archives': output_data,
}))
return self.exit_code
def _info_repository(self, repository, key, cache):
print('Repository ID: %s' % bin_to_hex(repository.id))
if key.NAME == 'plaintext':
encrypted = 'No'
def _info_repository(self, args, repository, manifest, key, cache):
info = basic_json_data(manifest, cache=cache, extra={
'security_dir': cache.security_manager.dir,
})
if args.json:
json_print(info)
else:
encrypted = 'Yes (%s)' % key.NAME
print('Encrypted: %s' % encrypted)
if key.NAME.startswith('key file'):
print('Key file: %s' % key.find_key())
print('Cache: %s' % cache.path)
print('Security dir: %s' % cache.security_manager.dir)
print(DASHES)
print(STATS_HEADER)
print(str(cache))
encryption = 'Encrypted: '
if key.NAME == 'plaintext':
encryption += 'No'
else:
encryption += 'Yes (%s)' % key.NAME
if key.NAME.startswith('key file'):
encryption += '\nKey file: %s' % key.find_key()
info['encryption'] = encryption
print(textwrap.dedent("""
Repository ID: {id}
Location: {location}
{encryption}
Cache: {cache.path}
Security dir: {security_dir}
""").strip().format(
id=bin_to_hex(repository.id),
location=repository._location.canonical_path(),
**info))
print(DASHES)
print(STATS_HEADER)
print(str(cache))
return self.exit_code
@with_repository(exclusive=True)
@ -2146,6 +2196,8 @@ class Archiver:
help='output verbose list of items (files, dirs, ...)')
subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS',
help='only display items with the given status characters')
subparser.add_argument('--json', action='store_true',
help='output stats as JSON (implies --stats)')
exclude_group = subparser.add_argument_group('Exclusion options')
exclude_group.add_argument('-e', '--exclude', dest='patterns',
@ -2424,6 +2476,10 @@ class Archiver:
subparser.add_argument('--format', '--list-format', dest='format', type=str,
help="""specify format for file listing
(default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
subparser.add_argument('--json', action='store_true',
help='format output as JSON. The form of --format is ignored, but keys used in it '
'are added to the JSON output. Some keys are always present. Note: JSON can only '
'represent text. A "bpath" key is therefore not available.')
subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
type=location_validator(),
help='repository/archive to list contents of')
@ -2542,6 +2598,8 @@ class Archiver:
subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
type=location_validator(),
help='archive or repository to display information about')
subparser.add_argument('--json', action='store_true',
help='format output as JSON')
self.add_archives_filters_args(subparser)
break_lock_epilog = process_epilog("""

View File

@ -219,18 +219,22 @@ All archives: {0.total_size:>20s} {0.total_csize:>20s} {0.unique_csize:>20s}
Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
return fmt.format(self.format_tuple())
def format_tuple(self):
Summary = namedtuple('Summary', ['total_size', 'total_csize', 'unique_size', 'unique_csize', 'total_unique_chunks',
'total_chunks'])
def stats(self):
# XXX: this should really be moved down to `hashindex.pyx`
Summary = namedtuple('Summary', ['total_size', 'total_csize', 'unique_size', 'unique_csize', 'total_unique_chunks', 'total_chunks'])
stats = Summary(*self.chunks.summarize())._asdict()
stats = self.Summary(*self.chunks.summarize())._asdict()
return stats
def format_tuple(self):
stats = self.stats()
for field in ['total_size', 'total_csize', 'unique_csize']:
stats[field] = format_file_size(stats[field])
return Summary(**stats)
return self.Summary(**stats)
def chunks_stored_size(self):
Summary = namedtuple('Summary', ['total_size', 'total_csize', 'unique_size', 'unique_csize', 'total_unique_chunks', 'total_chunks'])
stats = Summary(*self.chunks.summarize())
return stats.unique_csize
return self.stats()['unique_csize']
def create(self):
"""Create a new empty cache at `self.path`

View File

@ -5,6 +5,7 @@ import grp
import hashlib
import logging
import io
import json
import os
import os.path
import platform
@ -206,6 +207,10 @@ class Manifest:
def id_str(self):
return bin_to_hex(self.id)
@property
def last_timestamp(self):
return datetime.strptime(self.timestamp, "%Y-%m-%dT%H:%M:%S.%f")
@classmethod
def load(cls, repository, key=None, force_tam_not_required=False):
from .item import ManifestItem
@ -250,7 +255,7 @@ class Manifest:
if self.timestamp is None:
self.timestamp = datetime.utcnow().isoformat()
else:
prev_ts = datetime.strptime(self.timestamp, "%Y-%m-%dT%H:%M:%S.%f")
prev_ts = self.last_timestamp
incremented = (prev_ts + timedelta(microseconds=1)).isoformat()
self.timestamp = max(incremented, datetime.utcnow().isoformat())
manifest = ManifestItem(
@ -826,6 +831,11 @@ def format_file_size(v, precision=2, sign=False):
return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign)
class FileSize(int):
def __format__(self, format_spec):
return format_file_size(int(self)).__format__(format_spec)
def parse_file_size(s):
"""Return int from file size (1234, 55G, 1.7T)."""
if not s:
@ -1560,6 +1570,7 @@ class ArchiveFormatter(BaseFormatter):
def get_item_data(self, archive):
return {
'name': remove_surrogates(archive.name),
'barchive': archive.name,
'archive': remove_surrogates(archive.name),
'id': bin_to_hex(archive.id),
@ -1568,7 +1579,7 @@ class ArchiveFormatter(BaseFormatter):
@staticmethod
def keys_help():
return " - archive: archive name interpreted as text (might be missing non-text characters, see barchive)\n" \
return " - archive, name: archive name interpreted as text (might be missing non-text characters, see barchive)\n" \
" - barchive: verbatim archive name, can contain any character except NUL\n" \
" - time: time of creation of the archive\n" \
" - id: internal ID of the archive"
@ -1627,8 +1638,9 @@ class ItemFormatter(BaseFormatter):
assert not keys, str(keys)
return "\n".join(help)
def __init__(self, archive, format):
def __init__(self, archive, format, *, json=False):
self.archive = archive
self.json = json
static_keys = {
'archivename': archive.name,
'archiveid': archive.fpr,
@ -1653,7 +1665,33 @@ class ItemFormatter(BaseFormatter):
for hash_function in hashlib.algorithms_guaranteed:
self.add_key(hash_function, partial(self.hash_item, hash_function))
self.used_call_keys = set(self.call_keys) & self.format_keys
self.item_data = static_keys
if self.json:
self.item_data = {}
self.format_item = self.format_item_json
self.first = True
else:
self.item_data = static_keys
def begin(self):
if not self.json:
return ''
begin = json_dump(basic_json_data(self.archive.manifest))
begin, _, _ = begin.rpartition('\n}') # remove last closing brace, we want to extend the object
begin += ',\n'
begin += ' "files": [\n'
return begin
def end(self):
if not self.json:
return ''
return "]}"
def format_item_json(self, item):
if self.first:
self.first = False
return json.dumps(self.get_item_data(item))
else:
return ',' + json.dumps(self.get_item_data(item))
def add_key(self, key, callable_with_item):
self.call_keys[key] = callable_with_item
@ -1680,12 +1718,15 @@ class ItemFormatter(BaseFormatter):
item_data['uid'] = item.uid
item_data['gid'] = item.gid
item_data['path'] = remove_surrogates(item.path)
item_data['bpath'] = item.path
if self.json:
item_data['healthy'] = 'chunks_healthy' not in item
else:
item_data['bpath'] = item.path
item_data['extra'] = extra
item_data['health'] = 'broken' if 'chunks_healthy' in item else 'healthy'
item_data['source'] = source
item_data['linktarget'] = source
item_data['extra'] = extra
item_data['flags'] = item.get('bsdflags')
item_data['health'] = 'broken' if 'chunks_healthy' in item else 'healthy'
for key in self.used_call_keys:
item_data[key] = self.call_keys[key](item)
return item_data
@ -2065,3 +2106,50 @@ def swidth_slice(string, max_width):
if reverse:
result.reverse()
return ''.join(result)
class BorgJsonEncoder(json.JSONEncoder):
def default(self, o):
from .repository import Repository
from .remote import RemoteRepository
from .archive import Archive
from .cache import Cache
if isinstance(o, Repository) or isinstance(o, RemoteRepository):
return {
'id': bin_to_hex(o.id),
'location': o._location.canonical_path(),
}
if isinstance(o, Archive):
return o.info()
if isinstance(o, Cache):
return {
'path': o.path,
'stats': o.stats(),
}
return super().default(o)
def basic_json_data(manifest, *, cache=None, extra=None):
key = manifest.key
data = extra or {}
data.update({
'repository': BorgJsonEncoder().default(manifest.repository),
'encryption': {
'mode': key.NAME,
},
})
data['repository']['last_modified'] = format_time(to_localtime(manifest.last_timestamp.replace(tzinfo=timezone.utc)))
if key.NAME.startswith('key file'):
data['encryption']['keyfile'] = key.find_key()
if cache:
data['cache'] = cache
return data
def json_dump(obj):
"""Dump using BorgJSONEncoder."""
return json.dumps(obj, sort_keys=True, indent=4, cls=BorgJsonEncoder)
def json_print(obj):
print(json_dump(obj))

View File

@ -1112,6 +1112,33 @@ class ArchiverTestCase(ArchiverTestCaseBase):
info_archive = self.cmd('info', '--first', '1', self.repository_location)
assert 'Archive name: test\n' in info_archive
def test_info_json(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
info_repo = json.loads(self.cmd('info', '--json', self.repository_location))
repository = info_repo['repository']
assert len(repository['id']) == 64
assert 'last_modified' in repository
assert info_repo['encryption']['mode'] == 'repokey'
assert 'keyfile' not in info_repo['encryption']
cache = info_repo['cache']
stats = cache['stats']
assert all(isinstance(o, int) for o in stats.values())
assert all(key in stats for key in ('total_chunks', 'total_csize', 'total_size', 'total_unique_chunks', 'unique_csize', 'unique_size'))
info_archive = json.loads(self.cmd('info', '--json', self.repository_location + '::test'))
assert info_repo['repository'] == info_archive['repository']
assert info_repo['cache'] == info_archive['cache']
archives = info_archive['archives']
assert len(archives) == 1
archive = archives[0]
assert archive['name'] == 'test'
assert isinstance(archive['command_line'], list)
assert isinstance(archive['duration'], float)
assert len(archive['id']) == 64
assert 'stats' in archive
def test_comment(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', '--encryption=repokey', self.repository_location)
@ -1263,6 +1290,23 @@ class ArchiverTestCase(ArchiverTestCaseBase):
if has_lchflags:
self.assert_in("x input/file3", output)
def test_create_json(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', '--encryption=repokey', self.repository_location)
create_info = json.loads(self.cmd('create', '--json', self.repository_location + '::test', 'input'))
# The usual keys
assert 'encryption' in create_info
assert 'repository' in create_info
assert 'cache' in create_info
assert 'last_modified' in create_info['repository']
archive = create_info['archive']
assert archive['name'] == 'test'
assert isinstance(archive['command_line'], list)
assert isinstance(archive['duration'], float)
assert len(archive['id']) == 64
assert 'stats' in archive
def test_create_topical(self):
now = time.time()
self.create_regular_file('file1', size=1024 * 80)
@ -1447,6 +1491,33 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert int(dsize) <= int(size)
assert int(dcsize) <= int(csize)
def test_list_json(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
list_repo = json.loads(self.cmd('list', '--json', self.repository_location))
repository = list_repo['repository']
assert len(repository['id']) == 64
assert 'last_modified' in repository
assert list_repo['encryption']['mode'] == 'repokey'
assert 'keyfile' not in list_repo['encryption']
list_archive = json.loads(self.cmd('list', '--json', self.repository_location + '::test'))
assert list_repo['repository'] == list_archive['repository']
files = list_archive['files']
assert len(files) == 2
file1 = files[1]
assert file1['path'] == 'input/file1'
assert file1['size'] == 81920
list_archive = json.loads(self.cmd('list', '--json', '--format={sha256}', self.repository_location + '::test'))
assert list_repo['repository'] == list_archive['repository']
files = list_archive['files']
assert len(files) == 2
file1 = files[1]
assert file1['path'] == 'input/file1'
assert file1['sha256'] == 'b2915eb69f260d8d3c25249195f2c8f4f716ea82ec760ae929732c0262442b2b'
def _get_sizes(self, compression, compressible, size=10000):
if compressible:
contents = b'X' * size