1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-03-15 00:21:56 +00:00

ArchiveItem.cmdline list-of-str -> .command_line str, fixes #7246

Same change for .recreate_cmdline -> .recreate_command_line .

JSON output key "command_line":
borg 1.x: sys.argv [list of str]
borg 2: shlex.join(sys.argv) [str]
This commit is contained in:
Thomas Waldmann 2023-01-19 23:57:43 +01:00
parent ccbfc4ee95
commit bf667170a7
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
11 changed files with 44 additions and 40 deletions

View file

@ -542,7 +542,7 @@ The archive object itself further contains some metadata:
in the manifest, but leaves the *name* field of the archives as it was. in the manifest, but leaves the *name* field of the archives as it was.
* *item_ptrs*, a list of "pointer chunk" IDs. * *item_ptrs*, a list of "pointer chunk" IDs.
Each "pointer chunk" contains a list of chunk IDs of item metadata. Each "pointer chunk" contains a list of chunk IDs of item metadata.
* *cmdline*, the command line which was used to create the archive * *command_line*, the command line which was used to create the archive
* *hostname* * *hostname*
* *username* * *username*
* *time* and *time_end* are the start and end timestamps, respectively * *time* and *time_end* are the start and end timestamps, respectively

View file

@ -32,7 +32,7 @@ from .helpers import Error, IntegrityError, set_ec
from .platform import uid2user, user2uid, gid2group, group2gid from .platform import uid2user, user2uid, gid2group, group2gid
from .helpers import parse_timestamp, archive_ts_now from .helpers import parse_timestamp, archive_ts_now
from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd
from .helpers import StableDict from .helpers import StableDict
from .helpers import bin_to_hex from .helpers import bin_to_hex
from .helpers import safe_ns from .helpers import safe_ns
@ -597,11 +597,11 @@ class Archive:
"stats": stats.as_dict(), "stats": stats.as_dict(),
} }
if self.create: if self.create:
info["command_line"] = sys.argv info["command_line"] = join_cmd(sys.argv)
else: else:
info.update( info.update(
{ {
"command_line": self.metadata.cmdline, "command_line": self.metadata.command_line,
"hostname": self.metadata.hostname, "hostname": self.metadata.hostname,
"username": self.metadata.username, "username": self.metadata.username,
"comment": self.metadata.get("comment", ""), "comment": self.metadata.get("comment", ""),
@ -676,7 +676,7 @@ Duration: {0.duration}
"name": name, "name": name,
"comment": comment or "", "comment": comment or "",
"item_ptrs": item_ptrs, # see #1473 "item_ptrs": item_ptrs, # see #1473
"cmdline": sys.argv, "command_line": join_cmd(sys.argv),
"hostname": hostname, "hostname": hostname,
"username": getuser(), "username": getuser(),
"time": start.isoformat(timespec="microseconds"), "time": start.isoformat(timespec="microseconds"),
@ -1902,7 +1902,7 @@ class ArchiveChecker:
continue continue
if not valid_msgpacked_dict(data, archive_keys_serialized): if not valid_msgpacked_dict(data, archive_keys_serialized):
continue continue
if b"cmdline" not in data or b"\xa7version\x02" not in data: if b"command_line" not in data or b"\xa7version\x02" not in data:
continue continue
try: try:
archive = msgpack.unpackb(data) archive = msgpack.unpackb(data)
@ -2360,15 +2360,15 @@ class ArchiveRecreater:
additional_metadata = { additional_metadata = {
"time": archive.metadata.time, "time": archive.metadata.time,
"time_end": archive.metadata.get("time_end") or archive.metadata.time, "time_end": archive.metadata.get("time_end") or archive.metadata.time,
"cmdline": archive.metadata.cmdline, "command_line": archive.metadata.command_line,
# but also remember recreate metadata: # but also remember recreate metadata:
"recreate_cmdline": sys.argv, "recreate_command_line": join_cmd(sys.argv),
} }
else: else:
additional_metadata = { additional_metadata = {
"cmdline": archive.metadata.cmdline, "command_line": archive.metadata.command_line,
# but also remember recreate metadata: # but also remember recreate metadata:
"recreate_cmdline": sys.argv, "recreate_command_line": join_cmd(sys.argv),
} }
target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata) target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata)

View file

@ -1,12 +1,11 @@
import argparse import argparse
import shlex
import textwrap import textwrap
from datetime import timedelta from datetime import timedelta
from ._common import with_repository from ._common import with_repository
from ..archive import Archive from ..archive import Archive
from ..constants import * # NOQA from ..constants import * # NOQA
from ..helpers import remove_surrogates, format_timedelta, json_print, basic_json_data from ..helpers import format_timedelta, json_print, basic_json_data
from ..manifest import Manifest from ..manifest import Manifest
from ..logger import create_logger from ..logger import create_logger
@ -19,9 +18,6 @@ class InfoMixIn:
def do_info(self, args, repository, manifest, cache): def do_info(self, args, repository, manifest, cache):
"""Show archive details such as disk space used""" """Show archive details such as disk space used"""
def format_cmdline(cmdline):
return remove_surrogates(" ".join(shlex.quote(x) for x in cmdline))
args.consider_checkpoints = True args.consider_checkpoints = True
archive_names = tuple(x.name for x in manifest.archives.list_considering(args)) archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
@ -36,7 +32,6 @@ class InfoMixIn:
output_data.append(info) output_data.append(info)
else: else:
info["duration"] = format_timedelta(timedelta(seconds=info["duration"])) info["duration"] = format_timedelta(timedelta(seconds=info["duration"]))
info["command_line"] = format_cmdline(info["command_line"])
print( print(
textwrap.dedent( textwrap.dedent(
""" """

View file

@ -11,17 +11,18 @@ REQUIRED_ITEM_KEYS = frozenset(["path", "mtime"])
# this set must be kept complete, otherwise rebuild_manifest might malfunction: # this set must be kept complete, otherwise rebuild_manifest might malfunction:
# fmt: off # fmt: off
ARCHIVE_KEYS = frozenset(['version', 'name', 'cmdline', 'hostname', 'username', 'time', 'time_end', ARCHIVE_KEYS = frozenset(['version', 'name', 'hostname', 'username', 'time', 'time_end',
'items', # legacy v1 archives 'items', # legacy v1 archives
'item_ptrs', # v2+ archives 'item_ptrs', # v2+ archives
'comment', 'chunker_params', 'comment', 'chunker_params',
'recreate_cmdline', 'command_line', 'recreate_command_line', # v2+ archives
'cmdline', 'recreate_cmdline', # legacy
'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2
'size', 'nfiles', 'size_parts', 'nfiles_parts']) 'size', 'nfiles', 'size_parts', 'nfiles_parts'])
# fmt: on # fmt: on
# this is the set of keys that are always present in archives: # this is the set of keys that are always present in archives:
REQUIRED_ARCHIVE_KEYS = frozenset(["version", "name", "item_ptrs", "cmdline", "time"]) REQUIRED_ARCHIVE_KEYS = frozenset(["version", "name", "item_ptrs", "command_line", "time"])
# default umask, overridden by --umask, defaults to read/write only for owner # default umask, overridden by --umask, defaults to read/write only for owner
UMASK_DEFAULT = 0o077 UMASK_DEFAULT = 0o077

View file

@ -19,7 +19,7 @@ from .fs import HardLinkManager
from .misc import sysinfo, log_multi, consume from .misc import sysinfo, log_multi, consume
from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper
from .parseformat import bin_to_hex, safe_encode, safe_decode from .parseformat import bin_to_hex, safe_encode, safe_decode
from .parseformat import text_to_json, binary_to_json, remove_surrogates from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd
from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval
from .parseformat import SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper from .parseformat import SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper
from .parseformat import format_file_size, parse_file_size, FileSize, parse_storage_quota from .parseformat import format_file_size, parse_file_size, FileSize, parse_storage_quota

View file

@ -85,6 +85,11 @@ def text_to_json(key, value):
return data return data
def join_cmd(argv, rs=False):
cmd = shlex.join(argv)
return remove_surrogates(cmd) if rs else cmd
def eval_escapes(s): def eval_escapes(s):
"""Evaluate literal escape sequences in a string (eg `\\n` -> `\n`).""" """Evaluate literal escape sequences in a string (eg `\\n` -> `\n`)."""
return s.encode("ascii", "backslashreplace").decode("unicode-escape") return s.encode("ascii", "backslashreplace").decode("unicode-escape")
@ -717,8 +722,8 @@ class ArchiveFormatter(BaseFormatter):
"hostname": partial(self.get_meta, "hostname"), "hostname": partial(self.get_meta, "hostname"),
"username": partial(self.get_meta, "username"), "username": partial(self.get_meta, "username"),
"comment": partial(self.get_meta, "comment"), "comment": partial(self.get_meta, "comment"),
"command_line": partial(self.get_meta, "command_line"),
"end": self.get_ts_end, "end": self.get_ts_end,
"command_line": self.get_cmdline,
} }
self.used_call_keys = set(self.call_keys) & self.format_keys self.used_call_keys = set(self.call_keys) & self.format_keys
if self.json: if self.json:
@ -748,8 +753,8 @@ class ArchiveFormatter(BaseFormatter):
item_data[key] = self.call_keys[key]() item_data[key] = self.call_keys[key]()
# Note: name and comment are validated, should never contain surrogate escapes. # Note: name and comment are validated, should never contain surrogate escapes.
# But unsure whether hostname, username could contain surrogate escapes, play safe: # But unsure whether hostname, username, command_line could contain surrogate escapes, play safe:
for key in "hostname", "username": for key in "hostname", "username", "command_line":
if key in item_data: if key in item_data:
item_data.update(text_to_json(key, item_data[key])) item_data.update(text_to_json(key, item_data[key]))
return item_data return item_data
@ -766,13 +771,6 @@ class ArchiveFormatter(BaseFormatter):
def get_meta(self, key): def get_meta(self, key):
return self.archive.metadata.get(key, "") return self.archive.metadata.get(key, "")
def get_cmdline(self):
cmdline = map(remove_surrogates, self.archive.metadata.get("cmdline", []))
if self.json:
return list(cmdline)
else:
return " ".join(map(shlex.quote, cmdline))
def get_ts_end(self): def get_ts_end(self):
return self.format_time(self.archive.ts_end) return self.format_time(self.archive.ts_end)

View file

@ -500,14 +500,16 @@ cdef class ArchiveItem(PropDict):
name = PropDictProperty(str, 'surrogate-escaped str') name = PropDictProperty(str, 'surrogate-escaped str')
items = PropDictProperty(list) # list of chunk ids of item metadata stream (only in memory) items = PropDictProperty(list) # list of chunk ids of item metadata stream (only in memory)
item_ptrs = PropDictProperty(list) # list of blocks with list of chunk ids of ims, arch v2 item_ptrs = PropDictProperty(list) # list of blocks with list of chunk ids of ims, arch v2
cmdline = PropDictProperty(list) # list of s-e-str cmdline = PropDictProperty(list) # legacy, list of s-e-str
command_line = PropDictProperty(str, 'surrogate-escaped str')
hostname = PropDictProperty(str, 'surrogate-escaped str') hostname = PropDictProperty(str, 'surrogate-escaped str')
username = PropDictProperty(str, 'surrogate-escaped str') username = PropDictProperty(str, 'surrogate-escaped str')
time = PropDictProperty(str) time = PropDictProperty(str)
time_end = PropDictProperty(str) time_end = PropDictProperty(str)
comment = PropDictProperty(str, 'surrogate-escaped str') comment = PropDictProperty(str, 'surrogate-escaped str')
chunker_params = PropDictProperty(tuple) chunker_params = PropDictProperty(tuple)
recreate_cmdline = PropDictProperty(list) # list of s-e-str recreate_cmdline = PropDictProperty(list) # legacy, list of s-e-str
recreate_command_line = PropDictProperty(str, 'surrogate-escaped str')
# recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2 # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
recreate_source_id = PropDictProperty(bytes) recreate_source_id = PropDictProperty(bytes)
recreate_args = PropDictProperty(list) # list of s-e-str recreate_args = PropDictProperty(list) # list of s-e-str
@ -529,7 +531,9 @@ cdef class ArchiveItem(PropDict):
v = fix_str_value(d, k, 'replace') v = fix_str_value(d, k, 'replace')
if k == 'chunker_params': if k == 'chunker_params':
v = fix_tuple_of_str_and_int(v) v = fix_tuple_of_str_and_int(v)
if k in ('cmdline', 'recreate_cmdline'): if k in ('command_line', 'recreate_command_line'):
v = fix_str_value(d, k)
if k in ('cmdline', 'recreate_cmdline'): # legacy
v = fix_list_of_str(v) v = fix_list_of_str(v)
if k == 'items': # legacy if k == 'items': # legacy
v = fix_list_of_bytes(v) v = fix_list_of_bytes(v)

View file

@ -177,7 +177,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
archive = msgpack.packb( archive = msgpack.packb(
{ {
"cmdline": [], "command_line": "",
"item_ptrs": [], "item_ptrs": [],
"hostname": "foo", "hostname": "foo",
"username": "bar", "username": "bar",

View file

@ -692,7 +692,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
archive = create_info["archive"] archive = create_info["archive"]
assert archive["name"] == "test" assert archive["name"] == "test"
assert isinstance(archive["command_line"], list) assert isinstance(archive["command_line"], str)
assert isinstance(archive["duration"], float) assert isinstance(archive["duration"], float)
assert len(archive["id"]) == 64 assert len(archive["id"]) == 64
assert "stats" in archive assert "stats" in archive

View file

@ -33,7 +33,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert len(archives) == 1 assert len(archives) == 1
archive = archives[0] archive = archives[0]
assert archive["name"] == "test" assert archive["name"] == "test"
assert isinstance(archive["command_line"], list) assert isinstance(archive["command_line"], str)
assert isinstance(archive["duration"], float) assert isinstance(archive["duration"], float)
assert len(archive["id"]) == 64 assert len(archive["id"]) == 64
assert "stats" in archive assert "stats" in archive

View file

@ -2,7 +2,7 @@ from struct import Struct
from .constants import REQUIRED_ITEM_KEYS, CH_BUZHASH from .constants import REQUIRED_ITEM_KEYS, CH_BUZHASH
from .compress import ZLIB, ZLIB_legacy, ObfuscateSize from .compress import ZLIB, ZLIB_legacy, ObfuscateSize
from .helpers import HardLinkManager from .helpers import HardLinkManager, join_cmd
from .item import Item from .item import Item
from .logger import create_logger from .logger import create_logger
@ -26,14 +26,14 @@ class UpgraderNoOp:
new_metadata = {} new_metadata = {}
# keep all metadata except archive version and stats. # keep all metadata except archive version and stats.
for attr in ( for attr in (
"cmdline", "command_line",
"hostname", "hostname",
"username", "username",
"time", "time",
"time_end", "time_end",
"comment", "comment",
"chunker_params", "chunker_params",
"recreate_cmdline", "recreate_command_line",
): ):
if hasattr(metadata, attr): if hasattr(metadata, attr):
new_metadata[attr] = getattr(metadata, attr) new_metadata[attr] = getattr(metadata, attr)
@ -144,7 +144,7 @@ class UpgraderFrom12To20:
new_metadata = {} new_metadata = {}
# keep all metadata except archive version and stats. also do not keep # keep all metadata except archive version and stats. also do not keep
# recreate_source_id, recreate_args, recreate_partial_chunks which were used only in 1.1.0b1 .. b2. # recreate_source_id, recreate_args, recreate_partial_chunks which were used only in 1.1.0b1 .. b2.
for attr in ("cmdline", "hostname", "username", "comment", "chunker_params", "recreate_cmdline"): for attr in ("hostname", "username", "comment", "chunker_params"):
if hasattr(metadata, attr): if hasattr(metadata, attr):
new_metadata[attr] = getattr(metadata, attr) new_metadata[attr] = getattr(metadata, attr)
if chunker_params := new_metadata.get("chunker_params"): if chunker_params := new_metadata.get("chunker_params"):
@ -155,4 +155,10 @@ class UpgraderFrom12To20:
for attr in ("time", "time_end"): for attr in ("time", "time_end"):
if hasattr(metadata, attr): if hasattr(metadata, attr):
new_metadata[attr] = getattr(metadata, attr) + "+00:00" new_metadata[attr] = getattr(metadata, attr) + "+00:00"
# borg 1: cmdline, recreate_cmdline: a copy of sys.argv
# borg 2: command_line, recreate_command_line: a single string
if hasattr(metadata, "cmdline"):
new_metadata["command_line"] = join_cmd(getattr(metadata, "cmdline"))
if hasattr(metadata, "recreate_cmdline"):
new_metadata["recreate_command_line"] = join_cmd(getattr(metadata, "recreate_cmdline"))
return new_metadata return new_metadata