ArchiveItem.cmdline list-of-str -> .command_line str, fixes #7246

Same change for .recreate_cmdline -> .recreate_command_line .

JSON output key "command_line":
borg 1.x: sys.argv [list of str]
borg 2: shlex.join(sys.argv) [str]
This commit is contained in:
Thomas Waldmann 2023-01-19 23:57:43 +01:00
parent ccbfc4ee95
commit bf667170a7
No known key found for this signature in database
GPG Key ID: 243ACFA951F78E01
11 changed files with 44 additions and 40 deletions

View File

@ -542,7 +542,7 @@ The archive object itself further contains some metadata:
in the manifest, but leaves the *name* field of the archives as it was.
* *item_ptrs*, a list of "pointer chunk" IDs.
Each "pointer chunk" contains a list of chunk IDs of item metadata.
* *cmdline*, the command line which was used to create the archive
* *command_line*, the command line which was used to create the archive
* *hostname*
* *username*
* *time* and *time_end* are the start and end timestamps, respectively

View File

@ -32,7 +32,7 @@ from .helpers import Error, IntegrityError, set_ec
from .platform import uid2user, user2uid, gid2group, group2gid
from .helpers import parse_timestamp, archive_ts_now
from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json
from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd
from .helpers import StableDict
from .helpers import bin_to_hex
from .helpers import safe_ns
@ -597,11 +597,11 @@ class Archive:
"stats": stats.as_dict(),
}
if self.create:
info["command_line"] = sys.argv
info["command_line"] = join_cmd(sys.argv)
else:
info.update(
{
"command_line": self.metadata.cmdline,
"command_line": self.metadata.command_line,
"hostname": self.metadata.hostname,
"username": self.metadata.username,
"comment": self.metadata.get("comment", ""),
@ -676,7 +676,7 @@ Duration: {0.duration}
"name": name,
"comment": comment or "",
"item_ptrs": item_ptrs, # see #1473
"cmdline": sys.argv,
"command_line": join_cmd(sys.argv),
"hostname": hostname,
"username": getuser(),
"time": start.isoformat(timespec="microseconds"),
@ -1902,7 +1902,7 @@ class ArchiveChecker:
continue
if not valid_msgpacked_dict(data, archive_keys_serialized):
continue
if b"cmdline" not in data or b"\xa7version\x02" not in data:
if b"command_line" not in data or b"\xa7version\x02" not in data:
continue
try:
archive = msgpack.unpackb(data)
@ -2360,15 +2360,15 @@ class ArchiveRecreater:
additional_metadata = {
"time": archive.metadata.time,
"time_end": archive.metadata.get("time_end") or archive.metadata.time,
"cmdline": archive.metadata.cmdline,
"command_line": archive.metadata.command_line,
# but also remember recreate metadata:
"recreate_cmdline": sys.argv,
"recreate_command_line": join_cmd(sys.argv),
}
else:
additional_metadata = {
"cmdline": archive.metadata.cmdline,
"command_line": archive.metadata.command_line,
# but also remember recreate metadata:
"recreate_cmdline": sys.argv,
"recreate_command_line": join_cmd(sys.argv),
}
target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata)

View File

@ -1,12 +1,11 @@
import argparse
import shlex
import textwrap
from datetime import timedelta
from ._common import with_repository
from ..archive import Archive
from ..constants import * # NOQA
from ..helpers import remove_surrogates, format_timedelta, json_print, basic_json_data
from ..helpers import format_timedelta, json_print, basic_json_data
from ..manifest import Manifest
from ..logger import create_logger
@ -19,9 +18,6 @@ class InfoMixIn:
def do_info(self, args, repository, manifest, cache):
"""Show archive details such as disk space used"""
def format_cmdline(cmdline):
return remove_surrogates(" ".join(shlex.quote(x) for x in cmdline))
args.consider_checkpoints = True
archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
@ -36,7 +32,6 @@ class InfoMixIn:
output_data.append(info)
else:
info["duration"] = format_timedelta(timedelta(seconds=info["duration"]))
info["command_line"] = format_cmdline(info["command_line"])
print(
textwrap.dedent(
"""

View File

@ -11,17 +11,18 @@ REQUIRED_ITEM_KEYS = frozenset(["path", "mtime"])
# this set must be kept complete, otherwise rebuild_manifest might malfunction:
# fmt: off
ARCHIVE_KEYS = frozenset(['version', 'name', 'cmdline', 'hostname', 'username', 'time', 'time_end',
ARCHIVE_KEYS = frozenset(['version', 'name', 'hostname', 'username', 'time', 'time_end',
'items', # legacy v1 archives
'item_ptrs', # v2+ archives
'comment', 'chunker_params',
'recreate_cmdline',
'command_line', 'recreate_command_line', # v2+ archives
'cmdline', 'recreate_cmdline', # legacy
'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2
'size', 'nfiles', 'size_parts', 'nfiles_parts'])
# fmt: on
# this is the set of keys that are always present in archives:
REQUIRED_ARCHIVE_KEYS = frozenset(["version", "name", "item_ptrs", "cmdline", "time"])
REQUIRED_ARCHIVE_KEYS = frozenset(["version", "name", "item_ptrs", "command_line", "time"])
# default umask, overridden by --umask, defaults to read/write only for owner
UMASK_DEFAULT = 0o077

View File

@ -19,7 +19,7 @@ from .fs import HardLinkManager
from .misc import sysinfo, log_multi, consume
from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper
from .parseformat import bin_to_hex, safe_encode, safe_decode
from .parseformat import text_to_json, binary_to_json, remove_surrogates
from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd
from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval
from .parseformat import SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper
from .parseformat import format_file_size, parse_file_size, FileSize, parse_storage_quota

View File

@ -85,6 +85,11 @@ def text_to_json(key, value):
return data
def join_cmd(argv, rs=False):
cmd = shlex.join(argv)
return remove_surrogates(cmd) if rs else cmd
def eval_escapes(s):
"""Evaluate literal escape sequences in a string (eg `\\n` -> `\n`)."""
return s.encode("ascii", "backslashreplace").decode("unicode-escape")
@ -717,8 +722,8 @@ class ArchiveFormatter(BaseFormatter):
"hostname": partial(self.get_meta, "hostname"),
"username": partial(self.get_meta, "username"),
"comment": partial(self.get_meta, "comment"),
"command_line": partial(self.get_meta, "command_line"),
"end": self.get_ts_end,
"command_line": self.get_cmdline,
}
self.used_call_keys = set(self.call_keys) & self.format_keys
if self.json:
@ -748,8 +753,8 @@ class ArchiveFormatter(BaseFormatter):
item_data[key] = self.call_keys[key]()
# Note: name and comment are validated, should never contain surrogate escapes.
# But unsure whether hostname, username could contain surrogate escapes, play safe:
for key in "hostname", "username":
# But unsure whether hostname, username, command_line could contain surrogate escapes, play safe:
for key in "hostname", "username", "command_line":
if key in item_data:
item_data.update(text_to_json(key, item_data[key]))
return item_data
@ -766,13 +771,6 @@ class ArchiveFormatter(BaseFormatter):
def get_meta(self, key):
return self.archive.metadata.get(key, "")
def get_cmdline(self):
cmdline = map(remove_surrogates, self.archive.metadata.get("cmdline", []))
if self.json:
return list(cmdline)
else:
return " ".join(map(shlex.quote, cmdline))
def get_ts_end(self):
return self.format_time(self.archive.ts_end)

View File

@ -500,14 +500,16 @@ cdef class ArchiveItem(PropDict):
name = PropDictProperty(str, 'surrogate-escaped str')
items = PropDictProperty(list) # list of chunk ids of item metadata stream (only in memory)
item_ptrs = PropDictProperty(list) # list of blocks with list of chunk ids of ims, arch v2
cmdline = PropDictProperty(list) # list of s-e-str
cmdline = PropDictProperty(list) # legacy, list of s-e-str
command_line = PropDictProperty(str, 'surrogate-escaped str')
hostname = PropDictProperty(str, 'surrogate-escaped str')
username = PropDictProperty(str, 'surrogate-escaped str')
time = PropDictProperty(str)
time_end = PropDictProperty(str)
comment = PropDictProperty(str, 'surrogate-escaped str')
chunker_params = PropDictProperty(tuple)
recreate_cmdline = PropDictProperty(list) # list of s-e-str
recreate_cmdline = PropDictProperty(list) # legacy, list of s-e-str
recreate_command_line = PropDictProperty(str, 'surrogate-escaped str')
# recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
recreate_source_id = PropDictProperty(bytes)
recreate_args = PropDictProperty(list) # list of s-e-str
@ -529,7 +531,9 @@ cdef class ArchiveItem(PropDict):
v = fix_str_value(d, k, 'replace')
if k == 'chunker_params':
v = fix_tuple_of_str_and_int(v)
if k in ('cmdline', 'recreate_cmdline'):
if k in ('command_line', 'recreate_command_line'):
v = fix_str_value(d, k)
if k in ('cmdline', 'recreate_cmdline'): # legacy
v = fix_list_of_str(v)
if k == 'items': # legacy
v = fix_list_of_bytes(v)

View File

@ -177,7 +177,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
archive = msgpack.packb(
{
"cmdline": [],
"command_line": "",
"item_ptrs": [],
"hostname": "foo",
"username": "bar",

View File

@ -692,7 +692,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
archive = create_info["archive"]
assert archive["name"] == "test"
assert isinstance(archive["command_line"], list)
assert isinstance(archive["command_line"], str)
assert isinstance(archive["duration"], float)
assert len(archive["id"]) == 64
assert "stats" in archive

View File

@ -33,7 +33,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert len(archives) == 1
archive = archives[0]
assert archive["name"] == "test"
assert isinstance(archive["command_line"], list)
assert isinstance(archive["command_line"], str)
assert isinstance(archive["duration"], float)
assert len(archive["id"]) == 64
assert "stats" in archive

View File

@ -2,7 +2,7 @@ from struct import Struct
from .constants import REQUIRED_ITEM_KEYS, CH_BUZHASH
from .compress import ZLIB, ZLIB_legacy, ObfuscateSize
from .helpers import HardLinkManager
from .helpers import HardLinkManager, join_cmd
from .item import Item
from .logger import create_logger
@ -26,14 +26,14 @@ class UpgraderNoOp:
new_metadata = {}
# keep all metadata except archive version and stats.
for attr in (
"cmdline",
"command_line",
"hostname",
"username",
"time",
"time_end",
"comment",
"chunker_params",
"recreate_cmdline",
"recreate_command_line",
):
if hasattr(metadata, attr):
new_metadata[attr] = getattr(metadata, attr)
@ -144,7 +144,7 @@ class UpgraderFrom12To20:
new_metadata = {}
# keep all metadata except archive version and stats. also do not keep
# recreate_source_id, recreate_args, recreate_partial_chunks which were used only in 1.1.0b1 .. b2.
for attr in ("cmdline", "hostname", "username", "comment", "chunker_params", "recreate_cmdline"):
for attr in ("hostname", "username", "comment", "chunker_params"):
if hasattr(metadata, attr):
new_metadata[attr] = getattr(metadata, attr)
if chunker_params := new_metadata.get("chunker_params"):
@ -155,4 +155,10 @@ class UpgraderFrom12To20:
for attr in ("time", "time_end"):
if hasattr(metadata, attr):
new_metadata[attr] = getattr(metadata, attr) + "+00:00"
# borg 1: cmdline, recreate_cmdline: a copy of sys.argv
# borg 2: command_line, recreate_command_line: a single string
if hasattr(metadata, "cmdline"):
new_metadata["command_line"] = join_cmd(getattr(metadata, "cmdline"))
if hasattr(metadata, "recreate_cmdline"):
new_metadata["recreate_command_line"] = join_cmd(getattr(metadata, "recreate_cmdline"))
return new_metadata