list: JSON lines output for archive contents

This commit is contained in:
Marian Beermann 2017-05-05 15:49:56 +02:00
parent d8b7aef15c
commit 2a22f93e44
4 changed files with 36 additions and 66 deletions

View File

@ -392,39 +392,18 @@ The same archive with more information (``borg info --last 1 --json``)::
.. rubric:: File listings
Listing the contents of an archive can produce *a lot* of JSON. Each item (file, directory, ...) is described
by one object in the *items* array of the :ref:`borg_list` output. Refer to the *borg list* documentation for
the available keys and their meaning.
Listing the contents of an archive can produce *a lot* of JSON. Since many JSON implementations
don't support a streaming mode of operation, which is pretty much required to deal with this amount of
JSON, output is generated in the `JSON lines <http://jsonlines.org/>`_ format, which is simply
a number of JSON objects separated by new lines.
Each item (file, directory, ...) is described by one object in the :ref:`borg_list` output.
Refer to the *borg list* documentation for the available keys and their meaning.
Example (excerpt)::
{
"encryption": {
"mode": "repokey"
},
"repository": {
"id": "0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23",
"last_modified": "Mon, 2017-02-27 21:21:58",
"location": "/home/user/repository"
},
"items": [
{
"type": "d",
"mode": "drwxr-xr-x",
"user": "user",
"group": "user",
"uid": 1000,
"gid": 1000,
"path": "linux",
"healthy": true,
"source": "",
"linktarget": "",
"flags": null,
"isomtime": "Sat, 2016-05-07 19:46:01",
"size": 0
}
]
}
{"type": "d", "mode": "drwxr-xr-x", "user": "user", "group": "user", "uid": 1000, "gid": 1000, "path": "linux", "healthy": true, "source": "", "linktarget": "", "flags": null, "isomtime": "Sat, 2016-05-07 19:46:01", "size": 0}
{"type": "d", "mode": "drwxr-xr-x", "user": "user", "group": "user", "uid": 1000, "gid": 1000, "path": "linux/baz", "healthy": true, "source": "", "linktarget": "", "flags": null, "isomtime": "Sat, 2016-05-07 19:46:01", "size": 0}
.. _msgid:

View File

@ -1048,8 +1048,14 @@ class Archiver:
write = sys.stdout.buffer.write
if args.location.archive:
if args.json:
self.print_error('The --json option is only valid for listing archives, not archive contents.')
return self.exit_code
return self._list_archive(args, repository, manifest, key, write)
else:
if args.json_lines:
self.print_error('The --json-lines option is only valid for listing archive contents, not archives.')
return self.exit_code
return self._list_repository(args, manifest, write)
def _list_archive(self, args, repository, manifest, key, write):
@ -1065,11 +1071,9 @@ class Archiver:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
consider_part_files=args.consider_part_files)
formatter = ItemFormatter(archive, format, json=args.json)
write(safe_encode(formatter.begin()))
formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
for item in archive.iter_items(lambda item: matcher.match(item.path)):
write(safe_encode(formatter.format_item(item)))
write(safe_encode(formatter.end()))
# Only load the cache if it will be used
if ItemFormatter.format_needs_cache(format):
@ -2616,9 +2620,17 @@ class Archiver:
help="""specify format for file listing
(default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
subparser.add_argument('--json', action='store_true',
help='format output as JSON. The form of --format is ignored, but keys used in it '
'are added to the JSON output. Some keys are always present. Note: JSON can only '
'represent text. A "bpath" key is therefore not available.')
help='Only valid for listing archives. Format output as JSON. '
'The form of --format is ignored, '
'but keys used in it are added to the JSON output. '
'Some keys are always present. Note: JSON can only represent text. '
'A "barchive" key is therefore not available.')
subparser.add_argument('--json-lines', action='store_true',
help='Only valid for listing archive contents. Format output as JSON Lines. '
'The form of --format is ignored, '
'but keys used in it are added to the JSON output. '
'Some keys are always present. Note: JSON can only represent text. '
'A "bpath" key is therefore not available.')
subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
type=location_validator(),
help='repository/archive to list contents of')

View File

@ -1505,9 +1505,9 @@ class ItemFormatter(BaseFormatter):
format_keys = {f[1] for f in Formatter().parse(format)}
return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
def __init__(self, archive, format, *, json=False):
def __init__(self, archive, format, *, json_lines=False):
self.archive = archive
self.json = json
self.json_lines = json_lines
static_keys = {
'archivename': archive.name,
'archiveid': archive.fpr,
@ -1532,33 +1532,14 @@ class ItemFormatter(BaseFormatter):
for hash_function in hashlib.algorithms_guaranteed:
self.add_key(hash_function, partial(self.hash_item, hash_function))
self.used_call_keys = set(self.call_keys) & self.format_keys
if self.json:
if self.json_lines:
self.item_data = {}
self.format_item = self.format_item_json
self.first = True
else:
self.item_data = static_keys
def begin(self):
if not self.json:
return ''
begin = json_dump(basic_json_data(self.archive.manifest))
begin, _, _ = begin.rpartition('\n}') # remove last closing brace, we want to extend the object
begin += ',\n'
begin += ' "items": [\n'
return begin
def end(self):
if not self.json:
return ''
return "]}"
def format_item_json(self, item):
if self.first:
self.first = False
return json.dumps(self.get_item_data(item))
else:
return ',' + json.dumps(self.get_item_data(item))
return json.dumps(self.get_item_data(item)) + '\n'
def add_key(self, key, callable_with_item):
self.call_keys[key] = callable_with_item
@ -1585,7 +1566,7 @@ class ItemFormatter(BaseFormatter):
item_data['uid'] = item.uid
item_data['gid'] = item.gid
item_data['path'] = remove_surrogates(item.path)
if self.json:
if self.json_lines:
item_data['healthy'] = 'chunks_healthy' not in item
else:
item_data['bpath'] = item.path

View File

@ -1615,17 +1615,15 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert list_repo['encryption']['mode'] == 'repokey'
assert 'keyfile' not in list_repo['encryption']
list_archive = json.loads(self.cmd('list', '--json', self.repository_location + '::test'))
assert list_repo['repository'] == list_archive['repository']
items = list_archive['items']
list_archive = self.cmd('list', '--json-lines', self.repository_location + '::test')
items = [json.loads(s) for s in list_archive.splitlines()]
assert len(items) == 2
file1 = items[1]
assert file1['path'] == 'input/file1'
assert file1['size'] == 81920
list_archive = json.loads(self.cmd('list', '--json', '--format={sha256}', self.repository_location + '::test'))
assert list_repo['repository'] == list_archive['repository']
items = list_archive['items']
list_archive = self.cmd('list', '--json-lines', '--format={sha256}', self.repository_location + '::test')
items = [json.loads(s) for s in list_archive.splitlines()]
assert len(items) == 2
file1 = items[1]
assert file1['path'] == 'input/file1'