2023-06-11 20:41:36 +00:00
|
|
|
import abc
|
2017-07-31 15:54:45 +00:00
|
|
|
import argparse
|
2022-12-28 23:38:42 +00:00
|
|
|
import base64
|
2024-01-16 22:24:25 +00:00
|
|
|
import binascii
|
2017-07-31 15:54:45 +00:00
|
|
|
import hashlib
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import os.path
|
|
|
|
import re
|
2022-01-05 22:42:15 +00:00
|
|
|
import shlex
|
2017-07-31 15:54:45 +00:00
|
|
|
import stat
|
|
|
|
import uuid
|
2023-07-25 22:19:37 +00:00
|
|
|
from typing import Dict, Set, Tuple, ClassVar, Any, TYPE_CHECKING, Literal
|
2017-07-31 15:54:45 +00:00
|
|
|
from collections import Counter, OrderedDict
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
from functools import partial
|
|
|
|
from string import Formatter
|
|
|
|
|
|
|
|
from ..logger import create_logger
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
logger = create_logger()
|
|
|
|
|
|
|
|
from .errors import Error
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
from .fs import get_keys_dir, make_path_safe
|
2022-05-04 08:34:33 +00:00
|
|
|
from .msgpack import Timestamp
|
2022-08-11 20:35:18 +00:00
|
|
|
from .time import OutputTimestamp, format_time, safe_timestamp
|
2017-07-31 15:54:45 +00:00
|
|
|
from .. import __version__ as borg_version
|
|
|
|
from .. import __version_tuple__ as borg_version_tuple
|
|
|
|
from ..constants import * # NOQA
|
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
from ..item import ItemDiff
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
def bin_to_hex(binary):
|
2024-01-16 22:24:25 +00:00
|
|
|
return binascii.hexlify(binary).decode("ascii")
|
|
|
|
|
|
|
|
|
|
|
|
def hex_to_bin(hex, length=None):
|
|
|
|
try:
|
|
|
|
binary = binascii.unhexlify(hex)
|
|
|
|
binary_len = len(binary)
|
|
|
|
if length is not None and binary_len != length:
|
|
|
|
raise ValueError(f"Expected {length} bytes ({2 * length} hex digits), got {binary_len} bytes.")
|
|
|
|
except binascii.Error as e:
|
|
|
|
raise ValueError(str(e)) from None
|
|
|
|
return binary
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
def safe_decode(s, coding="utf-8", errors="surrogateescape"):
|
|
|
|
"""decode bytes to str, with round-tripping "invalid" bytes"""
|
|
|
|
if s is None:
|
|
|
|
return None
|
|
|
|
return s.decode(coding, errors)
|
|
|
|
|
|
|
|
|
|
|
|
def safe_encode(s, coding="utf-8", errors="surrogateescape"):
|
|
|
|
"""encode str to bytes, with round-tripping "invalid" bytes"""
|
|
|
|
if s is None:
|
|
|
|
return None
|
|
|
|
return s.encode(coding, errors)
|
|
|
|
|
|
|
|
|
|
|
|
def remove_surrogates(s, errors="replace"):
|
|
|
|
"""Replace surrogates generated by fsdecode with '?'"""
|
|
|
|
return s.encode("utf-8", errors).decode("utf-8")
|
|
|
|
|
|
|
|
|
2022-12-28 23:38:42 +00:00
|
|
|
def binary_to_json(key, value):
|
|
|
|
assert isinstance(key, str)
|
|
|
|
assert isinstance(value, bytes)
|
|
|
|
return {key + "_b64": base64.b64encode(value).decode("ascii")}
|
|
|
|
|
|
|
|
|
|
|
|
def text_to_json(key, value):
|
|
|
|
"""
|
|
|
|
Return a dict made from key/value that can be fed safely into a JSON encoder.
|
|
|
|
|
|
|
|
JSON can only contain pure, valid unicode (but not: unicode with surrogate escapes).
|
|
|
|
|
|
|
|
But sometimes we have to deal with such values and we do it like this:
|
|
|
|
- <key>: value as pure unicode text (surrogate escapes, if any, replaced by ?)
|
|
|
|
- <key>_b64: value as base64 encoded binary representation (only set if value has surrogate-escapes)
|
|
|
|
"""
|
|
|
|
coding = "utf-8"
|
|
|
|
assert isinstance(key, str)
|
|
|
|
assert isinstance(value, str) # str might contain surrogate escapes
|
|
|
|
data = {}
|
|
|
|
try:
|
|
|
|
value.encode(coding, errors="strict") # check if pure unicode
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
# value has surrogate escape sequences
|
2023-01-19 19:58:58 +00:00
|
|
|
data[key] = remove_surrogates(value)
|
2022-12-28 23:38:42 +00:00
|
|
|
value_bytes = value.encode(coding, errors="surrogateescape")
|
|
|
|
data.update(binary_to_json(key, value_bytes))
|
|
|
|
else:
|
|
|
|
# value is pure unicode
|
|
|
|
data[key] = value
|
|
|
|
# we do not give the b64 representation, not needed
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2023-01-19 22:57:43 +00:00
|
|
|
def join_cmd(argv, rs=False):
|
|
|
|
cmd = shlex.join(argv)
|
|
|
|
return remove_surrogates(cmd) if rs else cmd
|
|
|
|
|
|
|
|
|
2020-12-06 16:28:25 +00:00
|
|
|
def eval_escapes(s):
|
|
|
|
"""Evaluate literal escape sequences in a string (eg `\\n` -> `\n`)."""
|
|
|
|
return s.encode("ascii", "backslashreplace").decode("unicode-escape")
|
|
|
|
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
def decode_dict(d, keys, encoding="utf-8", errors="surrogateescape"):
|
|
|
|
for key in keys:
|
|
|
|
if isinstance(d.get(key), bytes):
|
|
|
|
d[key] = d[key].decode(encoding, errors)
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
def positive_int_validator(value):
|
|
|
|
"""argparse type for positive integers"""
|
|
|
|
int_value = int(value)
|
|
|
|
if int_value <= 0:
|
|
|
|
raise argparse.ArgumentTypeError("A positive integer is required: %s" % value)
|
|
|
|
return int_value
|
|
|
|
|
|
|
|
|
|
|
|
def interval(s):
|
|
|
|
"""Convert a string representing a valid interval to a number of hours."""
|
|
|
|
multiplier = {"H": 1, "d": 24, "w": 24 * 7, "m": 24 * 31, "y": 24 * 365}
|
|
|
|
|
|
|
|
if s.endswith(tuple(multiplier.keys())):
|
|
|
|
number = s[:-1]
|
|
|
|
suffix = s[-1]
|
|
|
|
else:
|
|
|
|
# range suffixes in ascending multiplier order
|
|
|
|
ranges = [k for k, v in sorted(multiplier.items(), key=lambda t: t[1])]
|
2022-02-27 18:31:33 +00:00
|
|
|
raise argparse.ArgumentTypeError(f'Unexpected interval time unit "{s[-1]}": expected one of {ranges!r}')
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
hours = int(number) * multiplier[suffix]
|
|
|
|
except ValueError:
|
|
|
|
hours = -1
|
|
|
|
|
|
|
|
if hours <= 0:
|
|
|
|
raise argparse.ArgumentTypeError('Unexpected interval number "%s": expected an integer greater than 0' % number)
|
|
|
|
|
|
|
|
return hours
|
|
|
|
|
|
|
|
|
|
|
|
def ChunkerParams(s):
|
2019-01-05 03:38:06 +00:00
|
|
|
params = s.strip().split(",")
|
|
|
|
count = len(params)
|
|
|
|
if count == 0:
|
2023-04-11 23:15:46 +00:00
|
|
|
raise argparse.ArgumentTypeError("no chunker params given")
|
2019-01-05 03:38:06 +00:00
|
|
|
algo = params[0].lower()
|
2023-02-12 23:41:01 +00:00
|
|
|
if algo == CH_FAIL and count == 3:
|
|
|
|
block_size = int(params[1])
|
|
|
|
fail_map = str(params[2])
|
|
|
|
return algo, block_size, fail_map
|
2019-02-13 03:36:09 +00:00
|
|
|
if algo == CH_FIXED and 2 <= count <= 3: # fixed, block_size[, header_size]
|
2019-01-05 03:40:25 +00:00
|
|
|
block_size = int(params[1])
|
|
|
|
header_size = int(params[2]) if count == 3 else 0
|
2019-01-05 05:44:07 +00:00
|
|
|
if block_size < 64:
|
|
|
|
# we are only disallowing the most extreme cases of abuse here - this does NOT imply
|
|
|
|
# that cutting chunks of the minimum allowed size is efficient concerning storage
|
|
|
|
# or in-memory chunk management.
|
|
|
|
# choose the block (chunk) size wisely: if you have a lot of data and you cut
|
|
|
|
# it into very small chunks, you are asking for trouble!
|
2023-04-11 23:15:46 +00:00
|
|
|
raise argparse.ArgumentTypeError("block_size must not be less than 64 Bytes")
|
2019-01-05 05:44:07 +00:00
|
|
|
if block_size > MAX_DATA_SIZE or header_size > MAX_DATA_SIZE:
|
2023-04-11 23:15:46 +00:00
|
|
|
raise argparse.ArgumentTypeError(
|
|
|
|
"block_size and header_size must not exceed MAX_DATA_SIZE [%d]" % MAX_DATA_SIZE
|
|
|
|
)
|
2019-01-05 03:40:25 +00:00
|
|
|
return algo, block_size, header_size
|
2019-01-05 03:38:06 +00:00
|
|
|
if algo == "default" and count == 1: # default
|
2017-07-31 15:54:45 +00:00
|
|
|
return CHUNKER_PARAMS
|
2019-01-05 03:38:06 +00:00
|
|
|
# this must stay last as it deals with old-style compat mode (no algorithm, 4 params, buzhash):
|
2019-02-13 03:36:09 +00:00
|
|
|
if algo == CH_BUZHASH and count == 5 or count == 4: # [buzhash, ]chunk_min, chunk_max, chunk_mask, window_size
|
2022-02-27 18:31:33 +00:00
|
|
|
chunk_min, chunk_max, chunk_mask, window_size = (int(p) for p in params[count - 4 :])
|
2019-01-05 05:44:07 +00:00
|
|
|
if not (chunk_min <= chunk_mask <= chunk_max):
|
2023-04-11 23:15:46 +00:00
|
|
|
raise argparse.ArgumentTypeError("required: chunk_min <= chunk_mask <= chunk_max")
|
2019-01-05 05:44:07 +00:00
|
|
|
if chunk_min < 6:
|
|
|
|
# see comment in 'fixed' algo check
|
2023-04-11 23:15:46 +00:00
|
|
|
raise argparse.ArgumentTypeError(
|
|
|
|
"min. chunk size exponent must not be less than 6 (2^6 = 64B min. chunk size)"
|
|
|
|
)
|
2019-01-05 03:38:06 +00:00
|
|
|
if chunk_max > 23:
|
2023-04-11 23:15:46 +00:00
|
|
|
raise argparse.ArgumentTypeError(
|
|
|
|
"max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)"
|
|
|
|
)
|
2019-02-13 03:36:09 +00:00
|
|
|
return CH_BUZHASH, chunk_min, chunk_max, chunk_mask, window_size
|
2023-04-11 23:15:46 +00:00
|
|
|
raise argparse.ArgumentTypeError("invalid chunker params")
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
implement files cache mode control, fixes #911
You can now control the files cache mode using this option:
--files-cache={ctime,mtime,size,inode,rechunk,disabled}*
(only some combinations are supported)
Previously, only these modes were supported:
- mtime,size,inode (default of borg < 1.1.0rc4)
- mtime,size (by using --ignore-inode)
- disabled (by using --no-files-cache)
Now, you additionally get:
- ctime alternatively to mtime (more safe), e.g.:
ctime,size,inode (this is the new default of borg >= 1.1.0rc4)
- rechunk (consider all files as changed, rechunk them)
Deprecated:
- --ignore-inodes (use modes without "inode")
- --no-files-cache (use "disabled" mode)
The tests needed some changes:
- previously, we use os.utime() to set a files mtime (atime) to specific
values, but that does not work for ctime.
- now use time.sleep() to create the "latest file" that usually does
not end up in the files cache (see FAQ)
2017-09-11 00:54:52 +00:00
|
|
|
def FilesCacheMode(s):
|
|
|
|
ENTRIES_MAP = dict(ctime="c", mtime="m", size="s", inode="i", rechunk="r", disabled="d")
|
2021-06-11 08:15:05 +00:00
|
|
|
VALID_MODES = ("cis", "ims", "cs", "ms", "cr", "mr", "d", "s") # letters in alpha order
|
implement files cache mode control, fixes #911
You can now control the files cache mode using this option:
--files-cache={ctime,mtime,size,inode,rechunk,disabled}*
(only some combinations are supported)
Previously, only these modes were supported:
- mtime,size,inode (default of borg < 1.1.0rc4)
- mtime,size (by using --ignore-inode)
- disabled (by using --no-files-cache)
Now, you additionally get:
- ctime alternatively to mtime (more safe), e.g.:
ctime,size,inode (this is the new default of borg >= 1.1.0rc4)
- rechunk (consider all files as changed, rechunk them)
Deprecated:
- --ignore-inodes (use modes without "inode")
- --no-files-cache (use "disabled" mode)
The tests needed some changes:
- previously, we use os.utime() to set a files mtime (atime) to specific
values, but that does not work for ctime.
- now use time.sleep() to create the "latest file" that usually does
not end up in the files cache (see FAQ)
2017-09-11 00:54:52 +00:00
|
|
|
entries = set(s.strip().split(","))
|
|
|
|
if not entries <= set(ENTRIES_MAP):
|
2023-04-11 23:18:05 +00:00
|
|
|
raise argparse.ArgumentTypeError(
|
|
|
|
"cache mode must be a comma-separated list of: %s" % ",".join(sorted(ENTRIES_MAP))
|
|
|
|
)
|
implement files cache mode control, fixes #911
You can now control the files cache mode using this option:
--files-cache={ctime,mtime,size,inode,rechunk,disabled}*
(only some combinations are supported)
Previously, only these modes were supported:
- mtime,size,inode (default of borg < 1.1.0rc4)
- mtime,size (by using --ignore-inode)
- disabled (by using --no-files-cache)
Now, you additionally get:
- ctime alternatively to mtime (more safe), e.g.:
ctime,size,inode (this is the new default of borg >= 1.1.0rc4)
- rechunk (consider all files as changed, rechunk them)
Deprecated:
- --ignore-inodes (use modes without "inode")
- --no-files-cache (use "disabled" mode)
The tests needed some changes:
- previously, we use os.utime() to set a files mtime (atime) to specific
values, but that does not work for ctime.
- now use time.sleep() to create the "latest file" that usually does
not end up in the files cache (see FAQ)
2017-09-11 00:54:52 +00:00
|
|
|
short_entries = {ENTRIES_MAP[entry] for entry in entries}
|
|
|
|
mode = "".join(sorted(short_entries))
|
|
|
|
if mode not in VALID_MODES:
|
2023-04-11 23:18:05 +00:00
|
|
|
raise argparse.ArgumentTypeError("cache mode short must be one of: %s" % ",".join(VALID_MODES))
|
implement files cache mode control, fixes #911
You can now control the files cache mode using this option:
--files-cache={ctime,mtime,size,inode,rechunk,disabled}*
(only some combinations are supported)
Previously, only these modes were supported:
- mtime,size,inode (default of borg < 1.1.0rc4)
- mtime,size (by using --ignore-inode)
- disabled (by using --no-files-cache)
Now, you additionally get:
- ctime alternatively to mtime (more safe), e.g.:
ctime,size,inode (this is the new default of borg >= 1.1.0rc4)
- rechunk (consider all files as changed, rechunk them)
Deprecated:
- --ignore-inodes (use modes without "inode")
- --no-files-cache (use "disabled" mode)
The tests needed some changes:
- previously, we use os.utime() to set a files mtime (atime) to specific
values, but that does not work for ctime.
- now use time.sleep() to create the "latest file" that usually does
not end up in the files cache (see FAQ)
2017-09-11 00:54:52 +00:00
|
|
|
return mode
|
|
|
|
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
def partial_format(format, mapping):
|
|
|
|
"""
|
|
|
|
Apply format.format_map(mapping) while preserving unknown keys
|
|
|
|
|
|
|
|
Does not support attribute access, indexing and ![rsa] conversions
|
|
|
|
"""
|
|
|
|
for key, value in mapping.items():
|
|
|
|
key = re.escape(key)
|
2022-02-27 18:31:33 +00:00
|
|
|
format = re.sub(
|
|
|
|
rf"(?<!\{{)((\{{{key}\}})|(\{{{key}:[^\}}]*\}}))", lambda match: match.group(1).format_map(mapping), format
|
2017-07-31 15:54:45 +00:00
|
|
|
)
|
|
|
|
return format
|
|
|
|
|
|
|
|
|
|
|
|
class DatetimeWrapper:
|
|
|
|
def __init__(self, dt):
|
|
|
|
self.dt = dt
|
|
|
|
|
|
|
|
def __format__(self, format_spec):
|
|
|
|
if format_spec == "":
|
2017-09-05 03:02:44 +00:00
|
|
|
format_spec = ISO_FORMAT_NO_USECS
|
2017-07-31 15:54:45 +00:00
|
|
|
return self.dt.__format__(format_spec)
|
|
|
|
|
|
|
|
|
|
|
|
class PlaceholderError(Error):
|
|
|
|
"""Formatting Error: "{}".format({}): {}({})"""
|
|
|
|
|
2023-11-09 05:00:13 +00:00
|
|
|
exit_mcode = 5
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
class InvalidPlaceholder(PlaceholderError):
|
|
|
|
"""Invalid placeholder "{}" in string: {}"""
|
|
|
|
|
2023-11-09 05:00:13 +00:00
|
|
|
exit_mcode = 6
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
def format_line(format, data):
|
|
|
|
for _, key, _, conversion in Formatter().parse(format):
|
|
|
|
if not key:
|
|
|
|
continue
|
|
|
|
if conversion or key not in data:
|
|
|
|
raise InvalidPlaceholder(key, format)
|
|
|
|
try:
|
|
|
|
return format.format_map(data)
|
|
|
|
except Exception as e:
|
|
|
|
raise PlaceholderError(format, data, e.__class__.__name__, str(e))
|
|
|
|
|
|
|
|
|
2022-09-09 21:23:20 +00:00
|
|
|
def _replace_placeholders(text, overrides={}):
|
2017-07-31 15:54:45 +00:00
|
|
|
"""Replace placeholders in text with their values."""
|
2018-11-10 20:43:45 +00:00
|
|
|
from ..platform import fqdn, hostname, getosusername
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2018-03-27 19:55:32 +00:00
|
|
|
current_time = datetime.now(timezone.utc)
|
2017-07-31 15:54:45 +00:00
|
|
|
data = {
|
|
|
|
"pid": os.getpid(),
|
2017-12-12 11:44:17 +00:00
|
|
|
"fqdn": fqdn,
|
|
|
|
"reverse-fqdn": ".".join(reversed(fqdn.split("."))),
|
2018-08-04 15:40:04 +00:00
|
|
|
"hostname": hostname,
|
2022-08-11 20:35:18 +00:00
|
|
|
"now": DatetimeWrapper(current_time.astimezone()),
|
2018-03-27 19:55:32 +00:00
|
|
|
"utcnow": DatetimeWrapper(current_time),
|
2018-11-10 20:43:45 +00:00
|
|
|
"user": getosusername(),
|
2017-07-31 15:54:45 +00:00
|
|
|
"uuid4": str(uuid.uuid4()),
|
|
|
|
"borgversion": borg_version,
|
|
|
|
"borgmajor": "%d" % borg_version_tuple[:1],
|
|
|
|
"borgminor": "%d.%d" % borg_version_tuple[:2],
|
|
|
|
"borgpatch": "%d.%d.%d" % borg_version_tuple[:3],
|
2020-11-02 23:07:41 +00:00
|
|
|
**overrides,
|
2017-07-31 15:54:45 +00:00
|
|
|
}
|
|
|
|
return format_line(text, data)
|
|
|
|
|
|
|
|
|
2022-09-09 21:23:20 +00:00
|
|
|
class PlaceholderReplacer:
|
|
|
|
def __init__(self):
|
|
|
|
self.reset()
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2022-09-09 21:23:20 +00:00
|
|
|
def override(self, key, value):
|
|
|
|
self.overrides[key] = value
|
2019-05-09 18:28:41 +00:00
|
|
|
|
2022-09-09 21:23:20 +00:00
|
|
|
def reset(self):
|
|
|
|
self.overrides = {}
|
2022-06-15 17:00:19 +00:00
|
|
|
|
2022-09-09 21:23:20 +00:00
|
|
|
def __call__(self, text, overrides=None):
|
|
|
|
ovr = {}
|
|
|
|
ovr.update(self.overrides)
|
|
|
|
ovr.update(overrides or {})
|
|
|
|
return _replace_placeholders(text, overrides=ovr)
|
|
|
|
|
|
|
|
|
|
|
|
replace_placeholders = PlaceholderReplacer()
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2024-01-02 18:17:55 +00:00
|
|
|
def PathSpec(text):
|
|
|
|
if not text:
|
|
|
|
raise argparse.ArgumentTypeError("Empty strings are not accepted as paths.")
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
def SortBySpec(text):
|
2022-08-13 19:55:12 +00:00
|
|
|
from ..manifest import AI_HUMAN_SORT_KEYS
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
for token in text.split(","):
|
|
|
|
if token not in AI_HUMAN_SORT_KEYS:
|
2023-04-11 23:21:43 +00:00
|
|
|
raise argparse.ArgumentTypeError("Invalid sort key: %s" % token)
|
2023-10-27 18:46:03 +00:00
|
|
|
return text.replace("timestamp", "ts").replace("archive", "name")
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
2021-03-20 23:33:31 +00:00
|
|
|
def format_file_size(v, precision=2, sign=False, iec=False):
|
2017-07-31 15:54:45 +00:00
|
|
|
"""Format file size into a human friendly format"""
|
2021-03-20 23:33:31 +00:00
|
|
|
fn = sizeof_fmt_iec if iec else sizeof_fmt_decimal
|
|
|
|
return fn(v, suffix="B", sep=" ", precision=precision, sign=sign)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
class FileSize(int):
|
2021-03-20 23:33:31 +00:00
|
|
|
def __new__(cls, value, iec=False):
|
|
|
|
obj = int.__new__(cls, value)
|
|
|
|
obj.iec = iec
|
|
|
|
return obj
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
def __format__(self, format_spec):
|
2021-03-20 23:33:31 +00:00
|
|
|
return format_file_size(int(self), iec=self.iec).__format__(format_spec)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
def parse_file_size(s):
|
|
|
|
"""Return int from file size (1234, 55G, 1.7T)."""
|
|
|
|
if not s:
|
|
|
|
return int(s) # will raise
|
|
|
|
suffix = s[-1]
|
|
|
|
power = 1000
|
|
|
|
try:
|
|
|
|
factor = {"K": power, "M": power**2, "G": power**3, "T": power**4, "P": power**5}[suffix]
|
|
|
|
s = s[:-1]
|
|
|
|
except KeyError:
|
|
|
|
factor = 1
|
|
|
|
return int(float(s) * factor)
|
|
|
|
|
|
|
|
|
2022-07-08 22:14:17 +00:00
|
|
|
def parse_storage_quota(storage_quota):
|
|
|
|
parsed = parse_file_size(storage_quota)
|
|
|
|
if parsed < parse_file_size("10M"):
|
|
|
|
raise argparse.ArgumentTypeError("quota is too small (%s). At least 10M are required." % storage_quota)
|
|
|
|
return parsed
|
|
|
|
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
def sizeof_fmt(num, suffix="B", units=None, power=None, sep="", precision=2, sign=False):
|
2020-07-06 17:45:24 +00:00
|
|
|
sign = "+" if sign and num > 0 else ""
|
2020-05-26 16:58:31 +00:00
|
|
|
fmt = "{0:{1}.{2}f}{3}{4}{5}"
|
|
|
|
prec = 0
|
2017-07-31 15:54:45 +00:00
|
|
|
for unit in units[:-1]:
|
|
|
|
if abs(round(num, precision)) < power:
|
2020-05-26 16:58:31 +00:00
|
|
|
break
|
2017-07-31 15:54:45 +00:00
|
|
|
num /= float(power)
|
2020-05-26 16:58:31 +00:00
|
|
|
prec = precision
|
|
|
|
else:
|
|
|
|
unit = units[-1]
|
|
|
|
return fmt.format(num, sign, prec, sep, unit, suffix)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
def sizeof_fmt_iec(num, suffix="B", sep="", precision=2, sign=False):
|
|
|
|
return sizeof_fmt(
|
|
|
|
num,
|
|
|
|
suffix=suffix,
|
|
|
|
sep=sep,
|
|
|
|
precision=precision,
|
|
|
|
sign=sign,
|
|
|
|
units=["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"],
|
|
|
|
power=1024,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def sizeof_fmt_decimal(num, suffix="B", sep="", precision=2, sign=False):
|
|
|
|
return sizeof_fmt(
|
|
|
|
num,
|
|
|
|
suffix=suffix,
|
|
|
|
sep=sep,
|
|
|
|
precision=precision,
|
|
|
|
sign=sign,
|
|
|
|
units=["", "k", "M", "G", "T", "P", "E", "Z", "Y"],
|
|
|
|
power=1000,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def format_archive(archive):
|
2022-08-11 20:35:18 +00:00
|
|
|
return "%-36s %s [%s]" % (archive.name, format_time(archive.ts), bin_to_hex(archive.id))
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
def parse_stringified_list(s):
|
2023-07-25 22:28:35 +00:00
|
|
|
items = re.split(" *, *", s)
|
|
|
|
return [item for item in items if item != ""]
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Location:
|
2022-06-15 22:58:21 +00:00
|
|
|
"""Object representing a repository location"""
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
# user must not contain "@", ":" or "/".
|
|
|
|
# Quoting adduser error message:
|
|
|
|
# "To avoid problems, the username should consist only of letters, digits,
|
|
|
|
# underscores, periods, at signs and dashes, and not start with a dash
|
|
|
|
# (as defined by IEEE Std 1003.1-2001)."
|
|
|
|
# We use "@" as separator between username and hostname, so we must
|
|
|
|
# disallow it within the pure username part.
|
|
|
|
optional_user_re = r"""
|
|
|
|
(?:(?P<user>[^@:/]+)@)?
|
|
|
|
"""
|
|
|
|
|
|
|
|
# path must not contain :: (it ends at :: or string end), but may contain single colons.
|
|
|
|
# to avoid ambiguities with other regexes, it must also not start with ":" nor with "//" nor with "ssh://".
|
2022-05-15 19:55:19 +00:00
|
|
|
local_path_re = r"""
|
2023-04-10 18:48:15 +00:00
|
|
|
(?!(:|//|ssh://|socket://)) # not starting with ":" or // or ssh:// or socket://
|
2017-07-31 15:54:45 +00:00
|
|
|
(?P<path>([^:]|(:(?!:)))+) # any chars, but no "::"
|
|
|
|
"""
|
|
|
|
|
|
|
|
# file_path must not contain :: (it ends at :: or string end), but may contain single colons.
|
|
|
|
# it must start with a / and that slash is part of the path.
|
|
|
|
file_path_re = r"""
|
|
|
|
(?P<path>(([^/]*)/([^:]|(:(?!:)))+)) # start opt. servername, then /, then any chars, but no "::"
|
|
|
|
"""
|
|
|
|
|
|
|
|
# abs_path must not contain :: (it ends at :: or string end), but may contain single colons.
|
|
|
|
# it must start with a / and that slash is part of the path.
|
|
|
|
abs_path_re = r"""
|
|
|
|
(?P<path>(/([^:]|(:(?!:)))+)) # start with /, then any chars, but no "::"
|
|
|
|
"""
|
|
|
|
|
2022-04-03 17:01:11 +00:00
|
|
|
# host NAME, or host IP ADDRESS (v4 or v6, v6 must be in square brackets)
|
|
|
|
host_re = r"""
|
|
|
|
(?P<host>(
|
|
|
|
(?!\[)[^:/]+(?<!\]) # hostname or v4 addr, not containing : or / (does not match v6 addr: no brackets!)
|
|
|
|
|
|
|
|
|
\[[0-9a-fA-F:.]+\]) # ipv6 address in brackets
|
|
|
|
)
|
|
|
|
"""
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
# regexes for misc. kinds of supported location specifiers:
|
|
|
|
ssh_re = re.compile(
|
|
|
|
r"""
|
2022-04-03 17:01:11 +00:00
|
|
|
(?P<proto>ssh):// # ssh://
|
|
|
|
"""
|
|
|
|
+ optional_user_re
|
|
|
|
+ host_re
|
|
|
|
+ r""" # user@ (optional), host name or address
|
|
|
|
(?::(?P<port>\d+))? # :port (optional)
|
2022-06-15 22:58:21 +00:00
|
|
|
"""
|
|
|
|
+ abs_path_re,
|
|
|
|
re.VERBOSE,
|
|
|
|
) # path
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2023-04-10 18:48:15 +00:00
|
|
|
socket_re = re.compile(
|
|
|
|
r"""
|
|
|
|
(?P<proto>socket):// # socket://
|
|
|
|
"""
|
|
|
|
+ abs_path_re,
|
|
|
|
re.VERBOSE,
|
|
|
|
) # path
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
file_re = re.compile(
|
|
|
|
r"""
|
2022-04-03 17:01:11 +00:00
|
|
|
(?P<proto>file):// # file://
|
2022-06-15 22:58:21 +00:00
|
|
|
"""
|
|
|
|
+ file_path_re,
|
|
|
|
re.VERBOSE,
|
|
|
|
) # servername/path or path
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2022-06-15 22:58:21 +00:00
|
|
|
local_re = re.compile(local_path_re, re.VERBOSE) # local path
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2019-11-29 23:36:56 +00:00
|
|
|
win_file_re = re.compile(
|
|
|
|
r"""
|
|
|
|
(?:file://)? # optional file protocol
|
|
|
|
(?P<path>
|
|
|
|
(?:[a-zA-Z]:)? # Drive letter followed by a colon (optional)
|
2023-07-25 23:10:24 +00:00
|
|
|
(?:[^:]+) # Anything which does not contain a :, at least one char
|
2019-11-29 23:36:56 +00:00
|
|
|
)
|
2022-06-15 22:58:21 +00:00
|
|
|
""",
|
|
|
|
re.VERBOSE,
|
|
|
|
)
|
2019-11-29 23:36:56 +00:00
|
|
|
|
2022-05-02 16:48:14 +00:00
|
|
|
def __init__(self, text="", overrides={}, other=False):
|
|
|
|
self.repo_env_var = "BORG_OTHER_REPO" if other else "BORG_REPO"
|
2022-06-15 15:44:38 +00:00
|
|
|
self.valid = False
|
|
|
|
self.proto = None
|
|
|
|
self.user = None
|
|
|
|
self._host = None
|
|
|
|
self.port = None
|
|
|
|
self.path = None
|
2022-06-15 22:58:21 +00:00
|
|
|
self.raw = None
|
|
|
|
self.processed = None
|
2022-06-15 15:44:38 +00:00
|
|
|
self.parse(text, overrides)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2020-11-02 23:07:41 +00:00
|
|
|
def parse(self, text, overrides={}):
|
2022-06-15 15:44:38 +00:00
|
|
|
if not text:
|
|
|
|
# we did not get a text to parse, so we try to fetch from the environment
|
|
|
|
text = os.environ.get(self.repo_env_var)
|
|
|
|
if text is None:
|
|
|
|
return
|
|
|
|
|
2022-01-30 02:10:55 +00:00
|
|
|
self.raw = text # as given by user, might contain placeholders
|
2022-06-15 15:44:38 +00:00
|
|
|
self.processed = replace_placeholders(self.raw, overrides) # after placeholder replacement
|
|
|
|
valid = self._parse(self.processed)
|
2017-07-31 15:54:45 +00:00
|
|
|
if valid:
|
2022-06-15 15:44:38 +00:00
|
|
|
self.valid = True
|
|
|
|
else:
|
|
|
|
raise ValueError('Invalid location format: "%s"' % self.processed)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
def _parse(self, text):
|
|
|
|
def normpath_special(p):
|
|
|
|
# avoid that normpath strips away our relative path hack and even makes p absolute
|
|
|
|
relative = p.startswith("/./")
|
|
|
|
p = os.path.normpath(p)
|
|
|
|
return ("/." + p) if relative else p
|
|
|
|
|
|
|
|
m = self.ssh_re.match(text)
|
|
|
|
if m:
|
|
|
|
self.proto = m.group("proto")
|
|
|
|
self.user = m.group("user")
|
|
|
|
self._host = m.group("host")
|
|
|
|
self.port = m.group("port") and int(m.group("port")) or None
|
|
|
|
self.path = normpath_special(m.group("path"))
|
|
|
|
return True
|
|
|
|
m = self.file_re.match(text)
|
2023-04-10 18:48:15 +00:00
|
|
|
if m:
|
|
|
|
self.proto = m.group("proto")
|
|
|
|
self.path = normpath_special(m.group("path"))
|
|
|
|
return True
|
|
|
|
m = self.socket_re.match(text)
|
2017-07-31 15:54:45 +00:00
|
|
|
if m:
|
|
|
|
self.proto = m.group("proto")
|
|
|
|
self.path = normpath_special(m.group("path"))
|
|
|
|
return True
|
2022-05-15 19:55:19 +00:00
|
|
|
m = self.local_re.match(text)
|
2017-07-31 15:54:45 +00:00
|
|
|
if m:
|
2022-05-15 19:55:19 +00:00
|
|
|
self.proto = "file"
|
2022-06-15 22:58:21 +00:00
|
|
|
self.path = normpath_special(m.group("path"))
|
2017-07-31 15:54:45 +00:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
items = [
|
|
|
|
"proto=%r" % self.proto,
|
|
|
|
"user=%r" % self.user,
|
|
|
|
"host=%r" % self.host,
|
|
|
|
"port=%r" % self.port,
|
|
|
|
"path=%r" % self.path,
|
|
|
|
]
|
|
|
|
return ", ".join(items)
|
|
|
|
|
|
|
|
def to_key_filename(self):
|
2018-10-29 11:27:56 +00:00
|
|
|
name = re.sub(r"[^\w]", "_", self.path).strip("_")
|
2023-04-10 18:48:15 +00:00
|
|
|
if self.proto not in ("file", "socket"):
|
2018-10-29 11:27:56 +00:00
|
|
|
name = re.sub(r"[^\w]", "_", self.host) + "__" + name
|
2017-07-31 15:54:45 +00:00
|
|
|
if len(name) > 100:
|
|
|
|
# Limit file names to some reasonable length. Most file systems
|
|
|
|
# limit them to 255 [unit of choice]; due to variations in unicode
|
|
|
|
# handling we truncate to 100 *characters*.
|
|
|
|
name = name[:100]
|
|
|
|
return os.path.join(get_keys_dir(), name)
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "Location(%s)" % self
|
|
|
|
|
|
|
|
@property
|
|
|
|
def host(self):
|
|
|
|
# strip square brackets used for IPv6 addrs
|
|
|
|
if self._host is not None:
|
|
|
|
return self._host.lstrip("[").rstrip("]")
|
|
|
|
|
|
|
|
def canonical_path(self):
|
2023-04-10 18:48:15 +00:00
|
|
|
if self.proto in ("file", "socket"):
|
2017-07-31 15:54:45 +00:00
|
|
|
return self.path
|
|
|
|
else:
|
|
|
|
if self.path and self.path.startswith("~"):
|
|
|
|
path = "/" + self.path # /~/x = path x relative to home dir
|
|
|
|
elif self.path and not self.path.startswith("/"):
|
|
|
|
path = "/./" + self.path # /./x = path x relative to cwd
|
|
|
|
else:
|
|
|
|
path = self.path
|
2022-02-27 18:31:33 +00:00
|
|
|
return "ssh://{}{}{}{}".format(
|
|
|
|
f"{self.user}@" if self.user else "",
|
2017-07-31 15:54:45 +00:00
|
|
|
self._host, # needed for ipv6 addrs
|
2022-02-27 18:31:33 +00:00
|
|
|
f":{self.port}" if self.port else "",
|
2017-07-31 15:54:45 +00:00
|
|
|
path,
|
|
|
|
)
|
|
|
|
|
2020-11-02 23:07:41 +00:00
|
|
|
def with_timestamp(self, timestamp):
|
2022-08-11 20:35:18 +00:00
|
|
|
# note: this only affects the repository URL/path, not the archive name!
|
2022-01-30 02:10:55 +00:00
|
|
|
return Location(
|
|
|
|
self.raw,
|
2022-08-11 20:35:18 +00:00
|
|
|
overrides={
|
|
|
|
"now": DatetimeWrapper(timestamp),
|
|
|
|
"utcnow": DatetimeWrapper(timestamp.astimezone(timezone.utc)),
|
|
|
|
},
|
2020-11-02 23:07:41 +00:00
|
|
|
)
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2022-06-15 15:07:42 +00:00
|
|
|
def location_validator(proto=None, other=False):
|
2017-07-31 15:54:45 +00:00
|
|
|
def validator(text):
|
|
|
|
try:
|
2022-05-02 16:48:14 +00:00
|
|
|
loc = Location(text, other=other)
|
2018-11-28 00:04:28 +00:00
|
|
|
except ValueError as err:
|
|
|
|
raise argparse.ArgumentTypeError(str(err)) from None
|
2017-11-11 04:30:02 +00:00
|
|
|
if proto is not None and loc.proto != proto:
|
|
|
|
if proto == "file":
|
|
|
|
raise argparse.ArgumentTypeError('"%s": Repository must be local' % text)
|
|
|
|
else:
|
|
|
|
raise argparse.ArgumentTypeError('"%s": Repository must be remote' % text)
|
2017-07-31 15:54:45 +00:00
|
|
|
return loc
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
return validator
|
|
|
|
|
|
|
|
|
2023-01-23 14:00:05 +00:00
|
|
|
def relative_time_marker_validator(text: str):
|
|
|
|
time_marker_regex = r"^\d+[md]$"
|
|
|
|
match = re.compile(time_marker_regex).search(text)
|
|
|
|
if not match:
|
|
|
|
raise argparse.ArgumentTypeError(f"Invalid relative time marker used: {text}")
|
|
|
|
else:
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
2022-12-15 21:54:46 +00:00
|
|
|
def text_validator(*, name, max_length, min_length=0, invalid_ctrl_chars="\0", invalid_chars="", no_blanks=False):
|
2017-07-31 15:54:45 +00:00
|
|
|
def validator(text):
|
archive names: validate more strictly, fixes #2290
we want to be able to use an archive name as a directory name,
e.g. for the FUSE fs built by borg mount.
thus we can not allow "/" in an archive name on linux.
on windows, the rules are more restrictive, disallowing
quite some more characters (':<>"|*?' plus some more).
we do not have FUSE fs / borg mount on windows yet, but
we better avoid any issues.
we can not avoid ":" though, as our {now} placeholder
generates ISO-8601 timestamps, including ":" chars.
also, we do not want to have leading/trailing blanks in
archive names, neither surrogate-escapes.
control chars are disallowed also, including chr(0).
we have python str here, thus chr(0) is not expected in there
(is not used to terminate a string, like it is in C).
2022-12-10 13:45:01 +00:00
|
|
|
assert isinstance(text, str)
|
2022-12-15 21:54:46 +00:00
|
|
|
if len(text) < min_length:
|
|
|
|
raise argparse.ArgumentTypeError(f'Invalid {name}: "{text}" [length < {min_length}]')
|
|
|
|
if len(text) > max_length:
|
|
|
|
raise argparse.ArgumentTypeError(f'Invalid {name}: "{text}" [length > {max_length}]')
|
|
|
|
if invalid_ctrl_chars and re.search(f"[{re.escape(invalid_ctrl_chars)}]", text):
|
|
|
|
raise argparse.ArgumentTypeError(f'Invalid {name}: "{text}" [invalid control chars detected]')
|
|
|
|
if invalid_chars and re.search(f"[{re.escape(invalid_chars)}]", text):
|
archive names: validate more strictly, fixes #2290
we want to be able to use an archive name as a directory name,
e.g. for the FUSE fs built by borg mount.
thus we can not allow "/" in an archive name on linux.
on windows, the rules are more restrictive, disallowing
quite some more characters (':<>"|*?' plus some more).
we do not have FUSE fs / borg mount on windows yet, but
we better avoid any issues.
we can not avoid ":" though, as our {now} placeholder
generates ISO-8601 timestamps, including ":" chars.
also, we do not want to have leading/trailing blanks in
archive names, neither surrogate-escapes.
control chars are disallowed also, including chr(0).
we have python str here, thus chr(0) is not expected in there
(is not used to terminate a string, like it is in C).
2022-12-10 13:45:01 +00:00
|
|
|
raise argparse.ArgumentTypeError(
|
2022-12-15 21:54:46 +00:00
|
|
|
f'Invalid {name}: "{text}" [invalid chars detected matching "{invalid_chars}"]'
|
archive names: validate more strictly, fixes #2290
we want to be able to use an archive name as a directory name,
e.g. for the FUSE fs built by borg mount.
thus we can not allow "/" in an archive name on linux.
on windows, the rules are more restrictive, disallowing
quite some more characters (':<>"|*?' plus some more).
we do not have FUSE fs / borg mount on windows yet, but
we better avoid any issues.
we can not avoid ":" though, as our {now} placeholder
generates ISO-8601 timestamps, including ":" chars.
also, we do not want to have leading/trailing blanks in
archive names, neither surrogate-escapes.
control chars are disallowed also, including chr(0).
we have python str here, thus chr(0) is not expected in there
(is not used to terminate a string, like it is in C).
2022-12-10 13:45:01 +00:00
|
|
|
)
|
2022-12-15 21:54:46 +00:00
|
|
|
if no_blanks and (text.startswith(" ") or text.endswith(" ")):
|
|
|
|
raise argparse.ArgumentTypeError(f'Invalid {name}: "{text}" [leading or trailing blanks detected]')
|
2022-12-12 17:01:07 +00:00
|
|
|
try:
|
|
|
|
text.encode("utf-8", errors="strict")
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
# looks like text contains surrogate-escapes
|
|
|
|
raise argparse.ArgumentTypeError(f'Invalid {name}: "{text}" [contains non-unicode characters]')
|
|
|
|
return text
|
|
|
|
|
|
|
|
return validator
|
|
|
|
|
|
|
|
|
|
|
|
comment_validator = text_validator(name="comment", max_length=10000)
|
|
|
|
|
|
|
|
|
2022-12-15 21:54:46 +00:00
|
|
|
def archivename_validator(text):
|
|
|
|
# we make sure that the archive name can be used as directory name (for borg mount)
|
|
|
|
MAX_PATH = 260 # Windows default. Since Win10, there is a registry setting LongPathsEnabled to get more.
|
|
|
|
MAX_DIRNAME = MAX_PATH - len("12345678.123")
|
|
|
|
SAFETY_MARGIN = 48 # borgfs path: mountpoint / archivename / dir / dir / ... / file
|
|
|
|
MAX_ARCHIVENAME = MAX_DIRNAME - SAFETY_MARGIN
|
|
|
|
invalid_ctrl_chars = "".join(chr(i) for i in range(32))
|
|
|
|
# note: ":" is also an invalid path char on windows, but we can not blacklist it,
|
|
|
|
# because e.g. our {now} placeholder creates ISO-8601 like output like 2022-12-10T20:47:42 .
|
|
|
|
invalid_chars = r"/" + r"\"<|>?*" # posix + windows
|
|
|
|
validate_text = text_validator(
|
|
|
|
name="archive name",
|
|
|
|
min_length=1,
|
|
|
|
max_length=MAX_ARCHIVENAME,
|
|
|
|
invalid_ctrl_chars=invalid_ctrl_chars,
|
|
|
|
invalid_chars=invalid_chars,
|
|
|
|
no_blanks=True,
|
|
|
|
)
|
|
|
|
return validate_text(text)
|
|
|
|
|
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
class BaseFormatter(metaclass=abc.ABCMeta):
|
|
|
|
format: str
|
|
|
|
static_data: Dict[str, Any]
|
|
|
|
FIXED_KEYS: ClassVar[Dict[str, str]] = {
|
2017-07-31 15:54:45 +00:00
|
|
|
# Formatting aids
|
|
|
|
"LF": "\n",
|
|
|
|
"SPACE": " ",
|
|
|
|
"TAB": "\t",
|
|
|
|
"CR": "\r",
|
|
|
|
"NUL": "\0",
|
2022-12-13 17:38:15 +00:00
|
|
|
"NEWLINE": "\n",
|
|
|
|
"NL": "\n", # \n is automatically converted to os.linesep on write
|
2017-07-31 15:54:45 +00:00
|
|
|
}
|
2023-06-11 20:41:36 +00:00
|
|
|
KEY_DESCRIPTIONS: ClassVar[Dict[str, str]] = {
|
|
|
|
"NEWLINE": "OS dependent line separator",
|
|
|
|
"NL": "alias of NEWLINE",
|
|
|
|
"NUL": "NUL character for creating print0 / xargs -0 like output",
|
|
|
|
"SPACE": "space character",
|
|
|
|
"TAB": "tab character",
|
|
|
|
"CR": "carriage return character",
|
|
|
|
"LF": "line feed character",
|
|
|
|
}
|
|
|
|
KEY_GROUPS: ClassVar[Tuple[Tuple[str, ...], ...]] = (("NEWLINE", "NL", "NUL", "SPACE", "TAB", "CR", "LF"),)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
def __init__(self, format: str, static: Dict[str, Any]) -> None:
|
|
|
|
self.format = partial_format(format, static)
|
|
|
|
self.static_data = static
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
@abc.abstractmethod
|
|
|
|
def get_item_data(self, item, jsonline=False) -> dict:
|
|
|
|
raise NotImplementedError
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
def format_item(self, item, jsonline=False, sort=False):
|
|
|
|
data = self.get_item_data(item, jsonline)
|
2017-07-31 15:54:45 +00:00
|
|
|
return (
|
2023-06-11 20:41:36 +00:00
|
|
|
f"{json.dumps(data, cls=BorgJsonEncoder, sort_keys=sort)}\n" if jsonline else self.format.format_map(data)
|
2022-07-06 13:37:27 +00:00
|
|
|
)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
@classmethod
|
|
|
|
def keys_help(cls):
|
|
|
|
help = []
|
|
|
|
keys: Set[str] = set()
|
|
|
|
keys.update(cls.KEY_DESCRIPTIONS.keys())
|
|
|
|
keys.update(key for group in cls.KEY_GROUPS for key in group)
|
|
|
|
|
|
|
|
for group in cls.KEY_GROUPS:
|
|
|
|
for key in group:
|
|
|
|
keys.remove(key)
|
|
|
|
text = "- " + key
|
|
|
|
if key in cls.KEY_DESCRIPTIONS:
|
|
|
|
text += ": " + cls.KEY_DESCRIPTIONS[key]
|
|
|
|
help.append(text)
|
|
|
|
help.append("")
|
|
|
|
assert not keys, str(keys)
|
|
|
|
return "\n".join(help)
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
class ArchiveFormatter(BaseFormatter):
|
|
|
|
KEY_DESCRIPTIONS = {
|
2022-12-12 18:00:58 +00:00
|
|
|
"archive": "archive name",
|
2017-11-22 16:25:49 +00:00
|
|
|
"name": 'alias of "archive"',
|
2022-12-12 17:55:14 +00:00
|
|
|
"comment": "archive comment",
|
2017-07-31 15:54:45 +00:00
|
|
|
# *start* is the key used by borg-info for this timestamp, this makes the formats more compatible
|
|
|
|
"start": "time (start) of creation of the archive",
|
2017-11-22 16:25:49 +00:00
|
|
|
"time": 'alias of "start"',
|
2017-07-31 15:54:45 +00:00
|
|
|
"end": "time (end) of creation of the archive",
|
2022-01-05 22:42:15 +00:00
|
|
|
"command_line": "command line which was used to create the archive",
|
2017-07-31 15:54:45 +00:00
|
|
|
"id": "internal ID of the archive",
|
2018-11-27 22:56:07 +00:00
|
|
|
"hostname": "hostname of host on which this archive was created",
|
|
|
|
"username": "username of user who created this archive",
|
2023-04-07 12:34:21 +00:00
|
|
|
"size": "size of this archive (data plus metadata, not considering compression and deduplication)",
|
|
|
|
"nfiles": "count of files in this archive",
|
2017-07-31 15:54:45 +00:00
|
|
|
}
|
|
|
|
KEY_GROUPS = (
|
2023-09-15 23:26:10 +00:00
|
|
|
("archive", "name", "comment", "id"),
|
2022-01-05 22:42:15 +00:00
|
|
|
("start", "time", "end", "command_line"),
|
2018-11-27 22:56:07 +00:00
|
|
|
("hostname", "username"),
|
2023-04-07 12:34:21 +00:00
|
|
|
("size", "nfiles"),
|
2017-07-31 15:54:45 +00:00
|
|
|
)
|
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
def __init__(self, format, repository, manifest, key, *, iec=False):
|
|
|
|
static_data = {} # here could be stuff on repo level, above archive level
|
|
|
|
static_data.update(self.FIXED_KEYS)
|
|
|
|
super().__init__(format, static_data)
|
2017-07-31 15:54:45 +00:00
|
|
|
self.repository = repository
|
|
|
|
self.manifest = manifest
|
|
|
|
self.key = key
|
|
|
|
self.name = None
|
|
|
|
self.id = None
|
|
|
|
self._archive = None
|
2021-03-20 23:33:31 +00:00
|
|
|
self.iec = iec
|
2017-07-31 15:54:45 +00:00
|
|
|
self.format_keys = {f[1] for f in Formatter().parse(format)}
|
|
|
|
self.call_keys = {
|
2023-04-07 12:23:43 +00:00
|
|
|
"hostname": partial(self.get_meta, "hostname", ""),
|
|
|
|
"username": partial(self.get_meta, "username", ""),
|
|
|
|
"comment": partial(self.get_meta, "comment", ""),
|
|
|
|
"command_line": partial(self.get_meta, "command_line", ""),
|
2023-04-07 12:34:21 +00:00
|
|
|
"size": partial(self.get_meta, "size", 0),
|
|
|
|
"nfiles": partial(self.get_meta, "nfiles", 0),
|
2017-07-31 15:54:45 +00:00
|
|
|
"end": self.get_ts_end,
|
|
|
|
}
|
|
|
|
self.used_call_keys = set(self.call_keys) & self.format_keys
|
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
def get_item_data(self, archive_info, jsonline=False):
|
2017-07-31 15:54:45 +00:00
|
|
|
self.name = archive_info.name
|
|
|
|
self.id = archive_info.id
|
|
|
|
item_data = {}
|
2023-06-11 20:41:36 +00:00
|
|
|
item_data.update({} if jsonline else self.static_data)
|
2017-07-31 15:54:45 +00:00
|
|
|
item_data.update(
|
|
|
|
{
|
2023-01-08 19:11:01 +00:00
|
|
|
"name": archive_info.name,
|
|
|
|
"archive": archive_info.name,
|
2017-07-31 15:54:45 +00:00
|
|
|
"id": bin_to_hex(archive_info.id),
|
2017-08-06 00:13:17 +00:00
|
|
|
"time": self.format_time(archive_info.ts),
|
|
|
|
"start": self.format_time(archive_info.ts),
|
2017-07-31 15:54:45 +00:00
|
|
|
}
|
|
|
|
)
|
|
|
|
for key in self.used_call_keys:
|
|
|
|
item_data[key] = self.call_keys[key]()
|
2023-01-19 19:46:40 +00:00
|
|
|
|
|
|
|
# Note: name and comment are validated, should never contain surrogate escapes.
|
2023-01-19 22:57:43 +00:00
|
|
|
# But unsure whether hostname, username, command_line could contain surrogate escapes, play safe:
|
|
|
|
for key in "hostname", "username", "command_line":
|
2023-01-19 19:46:40 +00:00
|
|
|
if key in item_data:
|
|
|
|
item_data.update(text_to_json(key, item_data[key]))
|
2017-07-31 15:54:45 +00:00
|
|
|
return item_data
|
|
|
|
|
|
|
|
@property
|
|
|
|
def archive(self):
|
|
|
|
"""lazy load / update loaded archive"""
|
|
|
|
if self._archive is None or self._archive.id != self.id:
|
|
|
|
from ..archive import Archive
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2022-08-23 01:25:06 +00:00
|
|
|
self._archive = Archive(self.manifest, self.name, iec=self.iec)
|
2017-07-31 15:54:45 +00:00
|
|
|
return self._archive
|
|
|
|
|
2023-04-07 12:23:43 +00:00
|
|
|
def get_meta(self, key, default=None):
|
|
|
|
return self.archive.metadata.get(key, default)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
def get_ts_end(self):
|
2017-08-06 00:13:17 +00:00
|
|
|
return self.format_time(self.archive.ts_end)
|
|
|
|
|
|
|
|
def format_time(self, ts):
|
2017-08-16 15:57:08 +00:00
|
|
|
return OutputTimestamp(ts)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
class ItemFormatter(BaseFormatter):
|
2022-01-07 21:21:22 +00:00
|
|
|
# we provide the hash algos from python stdlib (except shake_*) and additionally xxh64.
|
|
|
|
# shake_* is not provided because it uses an incompatible .digest() method to support variable length.
|
2022-07-15 11:26:35 +00:00
|
|
|
hash_algorithms = set(hashlib.algorithms_guaranteed).union({"xxh64"}).difference({"shake_128", "shake_256"})
|
2017-07-31 15:54:45 +00:00
|
|
|
KEY_DESCRIPTIONS = {
|
2023-06-11 20:41:36 +00:00
|
|
|
"type": "file type (file, dir, symlink, ...)",
|
|
|
|
"mode": "file mode (as in stat)",
|
|
|
|
"uid": "user id of file owner",
|
|
|
|
"gid": "group id of file owner",
|
|
|
|
"user": "user name of file owner",
|
|
|
|
"group": "group name of file owner",
|
2022-12-12 18:39:52 +00:00
|
|
|
"path": "file path",
|
2023-01-16 19:08:52 +00:00
|
|
|
"target": "link target for symlinks",
|
2022-05-08 12:14:47 +00:00
|
|
|
"hlid": "hard link identity (same if hardlinking same fs object)",
|
2023-06-11 20:41:36 +00:00
|
|
|
"flags": "file flags",
|
2023-01-16 19:08:52 +00:00
|
|
|
"extra": 'prepends {target} with " -> " for soft links and " link to " for hard links',
|
2023-06-11 20:41:36 +00:00
|
|
|
"size": "file size",
|
2022-06-11 22:17:20 +00:00
|
|
|
"dsize": "deduplicated size",
|
2017-07-31 15:54:45 +00:00
|
|
|
"num_chunks": "number of chunks in this file",
|
|
|
|
"unique_chunks": "number of unique chunks in this file",
|
2023-06-11 20:41:36 +00:00
|
|
|
"mtime": "file modification time",
|
|
|
|
"ctime": "file change time",
|
|
|
|
"atime": "file access time",
|
|
|
|
"isomtime": "file modification time (ISO 8601 format)",
|
|
|
|
"isoctime": "file change time (ISO 8601 format)",
|
|
|
|
"isoatime": "file access time (ISO 8601 format)",
|
2019-06-24 06:46:58 +00:00
|
|
|
"xxh64": "XXH64 checksum of this file (note: this is NOT a cryptographic hash!)",
|
2017-07-31 15:54:45 +00:00
|
|
|
"health": 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
|
2023-06-11 20:41:36 +00:00
|
|
|
"archiveid": "internal ID of the archive",
|
|
|
|
"archivename": "name of the archive",
|
2017-07-31 15:54:45 +00:00
|
|
|
}
|
|
|
|
KEY_GROUPS = (
|
2023-01-16 19:08:52 +00:00
|
|
|
("type", "mode", "uid", "gid", "user", "group", "path", "target", "hlid", "flags"),
|
2022-06-11 22:17:20 +00:00
|
|
|
("size", "dsize", "num_chunks", "unique_chunks"),
|
2017-07-31 15:54:45 +00:00
|
|
|
("mtime", "ctime", "atime", "isomtime", "isoctime", "isoatime"),
|
2019-06-24 06:46:58 +00:00
|
|
|
tuple(sorted(hash_algorithms)),
|
2017-07-31 15:54:45 +00:00
|
|
|
("archiveid", "archivename", "extra"),
|
|
|
|
("health",),
|
|
|
|
)
|
|
|
|
|
|
|
|
KEYS_REQUIRING_CACHE = ("dsize", "unique_chunks")
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def format_needs_cache(cls, format):
|
|
|
|
format_keys = {f[1] for f in Formatter().parse(format)}
|
|
|
|
return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
|
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
def __init__(self, archive, format):
|
2022-03-16 23:24:49 +00:00
|
|
|
from ..checksums import StreamingXXH64
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
static_data = {"archivename": archive.name, "archiveid": archive.fpr}
|
|
|
|
static_data.update(self.FIXED_KEYS)
|
|
|
|
super().__init__(format, static_data)
|
2019-06-24 06:46:58 +00:00
|
|
|
self.xxh64 = StreamingXXH64
|
2017-07-31 15:54:45 +00:00
|
|
|
self.archive = archive
|
|
|
|
self.format_keys = {f[1] for f in Formatter().parse(format)}
|
|
|
|
self.call_keys = {
|
|
|
|
"size": self.calculate_size,
|
2022-06-11 22:17:20 +00:00
|
|
|
"dsize": partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size),
|
2017-07-31 15:54:45 +00:00
|
|
|
"num_chunks": self.calculate_num_chunks,
|
|
|
|
"unique_chunks": partial(self.sum_unique_chunks_metadata, lambda chunk: 1),
|
2017-08-16 15:57:08 +00:00
|
|
|
"isomtime": partial(self.format_iso_time, "mtime"),
|
|
|
|
"isoctime": partial(self.format_iso_time, "ctime"),
|
|
|
|
"isoatime": partial(self.format_iso_time, "atime"),
|
|
|
|
"mtime": partial(self.format_time, "mtime"),
|
|
|
|
"ctime": partial(self.format_time, "ctime"),
|
|
|
|
"atime": partial(self.format_time, "atime"),
|
2017-07-31 15:54:45 +00:00
|
|
|
}
|
2019-06-24 06:46:58 +00:00
|
|
|
for hash_function in self.hash_algorithms:
|
2020-04-30 18:11:38 +00:00
|
|
|
self.call_keys[hash_function] = partial(self.hash_item, hash_function)
|
2017-07-31 15:54:45 +00:00
|
|
|
self.used_call_keys = set(self.call_keys) & self.format_keys
|
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
def get_item_data(self, item, jsonline=False):
|
2017-07-31 15:54:45 +00:00
|
|
|
item_data = {}
|
2023-06-11 20:41:36 +00:00
|
|
|
item_data.update({} if jsonline else self.static_data)
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2022-12-29 19:03:46 +00:00
|
|
|
item_data.update(text_to_json("path", item.path))
|
2023-01-16 19:08:52 +00:00
|
|
|
target = item.get("target", "")
|
|
|
|
item_data.update(text_to_json("target", target))
|
2023-06-11 20:41:36 +00:00
|
|
|
if not jsonline:
|
2023-01-16 19:08:52 +00:00
|
|
|
item_data["extra"] = "" if not target else f" -> {item_data['target']}"
|
2022-12-29 19:03:46 +00:00
|
|
|
|
2022-05-08 12:14:47 +00:00
|
|
|
hlid = item.get("hlid")
|
|
|
|
hlid = bin_to_hex(hlid) if hlid else ""
|
2022-12-29 19:03:46 +00:00
|
|
|
item_data["hlid"] = hlid
|
|
|
|
|
|
|
|
mode = stat.filemode(item.mode)
|
|
|
|
item_type = mode[0]
|
2017-07-31 15:54:45 +00:00
|
|
|
item_data["type"] = item_type
|
|
|
|
item_data["mode"] = mode
|
2022-12-29 19:03:46 +00:00
|
|
|
|
2023-01-14 22:54:08 +00:00
|
|
|
item_data["uid"] = item.get("uid") # int or None
|
|
|
|
item_data["gid"] = item.get("gid") # int or None
|
|
|
|
item_data.update(text_to_json("user", item.get("user", str(item_data["uid"]))))
|
|
|
|
item_data.update(text_to_json("group", item.get("group", str(item_data["gid"]))))
|
2022-12-29 19:03:46 +00:00
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
if jsonline:
|
2017-07-31 15:54:45 +00:00
|
|
|
item_data["healthy"] = "chunks_healthy" not in item
|
|
|
|
else:
|
|
|
|
item_data["health"] = "broken" if "chunks_healthy" in item else "healthy"
|
2022-09-14 09:24:50 +00:00
|
|
|
item_data["flags"] = item.get("bsdflags") # int if flags known, else (if flags unknown) None
|
2017-07-31 15:54:45 +00:00
|
|
|
for key in self.used_call_keys:
|
|
|
|
item_data[key] = self.call_keys[key](item)
|
|
|
|
return item_data
|
|
|
|
|
|
|
|
def sum_unique_chunks_metadata(self, metadata_func, item):
|
|
|
|
"""
|
|
|
|
sum unique chunks metadata, a unique chunk is a chunk which is referenced globally as often as it is in the
|
|
|
|
item
|
|
|
|
|
|
|
|
item: The item to sum its unique chunks' metadata
|
|
|
|
metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
|
2023-06-11 20:41:36 +00:00
|
|
|
the metadata needed from the chunk
|
2017-07-31 15:54:45 +00:00
|
|
|
"""
|
|
|
|
chunk_index = self.archive.cache.chunks
|
|
|
|
chunks = item.get("chunks", [])
|
|
|
|
chunks_counter = Counter(c.id for c in chunks)
|
|
|
|
return sum(metadata_func(c) for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id])
|
|
|
|
|
|
|
|
def calculate_num_chunks(self, item):
|
|
|
|
return len(item.get("chunks", []))
|
|
|
|
|
|
|
|
def calculate_size(self, item):
|
|
|
|
# note: does not support hardlink slaves, they will be size 0
|
2022-06-10 18:54:57 +00:00
|
|
|
return item.get_size()
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
def hash_item(self, hash_function, item):
|
|
|
|
if "chunks" not in item:
|
|
|
|
return ""
|
2022-01-07 21:21:22 +00:00
|
|
|
if hash_function == "xxh64":
|
2019-06-24 06:46:58 +00:00
|
|
|
hash = self.xxh64()
|
2022-01-07 21:21:22 +00:00
|
|
|
elif hash_function in self.hash_algorithms:
|
|
|
|
hash = hashlib.new(hash_function)
|
2023-09-15 20:19:29 +00:00
|
|
|
for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks], ro_type=ROBJ_FILE_STREAM):
|
2017-07-31 15:54:45 +00:00
|
|
|
hash.update(data)
|
|
|
|
return hash.hexdigest()
|
|
|
|
|
|
|
|
def format_time(self, key, item):
|
2017-08-16 15:57:08 +00:00
|
|
|
return OutputTimestamp(safe_timestamp(item.get(key) or item.mtime))
|
2017-07-31 15:54:45 +00:00
|
|
|
|
2017-08-16 15:57:08 +00:00
|
|
|
def format_iso_time(self, key, item):
|
2017-08-07 11:08:25 +00:00
|
|
|
return self.format_time(key, item).isoformat()
|
2017-07-31 15:54:45 +00:00
|
|
|
|
|
|
|
|
2023-06-11 20:41:36 +00:00
|
|
|
class DiffFormatter(BaseFormatter):
|
|
|
|
KEY_DESCRIPTIONS = {
|
|
|
|
"path": "archived file path",
|
|
|
|
"change": "all available changes",
|
|
|
|
"content": "file content change",
|
|
|
|
"mode": "file mode change",
|
|
|
|
"type": "file type change",
|
|
|
|
"owner": "file owner (user/group) change",
|
|
|
|
"user": "file user change",
|
|
|
|
"group": "file group change",
|
|
|
|
"link": "file link change",
|
|
|
|
"directory": "file directory change",
|
|
|
|
"blkdev": "file block device change",
|
|
|
|
"chrdev": "file character device change",
|
|
|
|
"fifo": "file fifo change",
|
|
|
|
"mtime": "file modification time change",
|
|
|
|
"ctime": "file change time change",
|
|
|
|
"isomtime": "file modification time change (ISO 8601)",
|
|
|
|
"isoctime": "file creation time change (ISO 8601)",
|
|
|
|
}
|
|
|
|
KEY_GROUPS = (
|
|
|
|
("path", "change"),
|
|
|
|
("content", "mode", "type", "owner", "group", "user"),
|
|
|
|
("link", "directory", "blkdev", "chrdev", "fifo"),
|
|
|
|
("mtime", "ctime", "isomtime", "isoctime"),
|
|
|
|
)
|
|
|
|
METADATA = ("mode", "type", "owner", "group", "user", "mtime", "ctime")
|
|
|
|
|
|
|
|
def __init__(self, format, content_only=False):
|
|
|
|
static_data = {}
|
|
|
|
static_data.update(self.FIXED_KEYS)
|
|
|
|
super().__init__(format or "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}", static_data)
|
|
|
|
self.content_only = content_only
|
|
|
|
self.format_keys = {f[1] for f in Formatter().parse(format)}
|
|
|
|
self.call_keys = {
|
|
|
|
"content": self.format_content,
|
|
|
|
"mode": self.format_mode,
|
|
|
|
"type": partial(self.format_mode, filetype=True),
|
|
|
|
"owner": partial(self.format_owner),
|
|
|
|
"group": partial(self.format_owner, spec="group"),
|
|
|
|
"user": partial(self.format_owner, spec="user"),
|
|
|
|
"link": partial(self.format_other, "link"),
|
|
|
|
"directory": partial(self.format_other, "directory"),
|
|
|
|
"blkdev": partial(self.format_other, "blkdev"),
|
|
|
|
"chrdev": partial(self.format_other, "chrdev"),
|
|
|
|
"fifo": partial(self.format_other, "fifo"),
|
|
|
|
"mtime": partial(self.format_time, "mtime"),
|
|
|
|
"ctime": partial(self.format_time, "ctime"),
|
|
|
|
"isomtime": partial(self.format_iso_time, "mtime"),
|
|
|
|
"isoctime": partial(self.format_iso_time, "ctime"),
|
|
|
|
}
|
|
|
|
self.used_call_keys = set(self.call_keys) & self.format_keys
|
|
|
|
if self.content_only:
|
|
|
|
self.used_call_keys -= set(self.METADATA)
|
|
|
|
|
|
|
|
def get_item_data(self, item: "ItemDiff", jsonline=False) -> dict:
|
|
|
|
diff_data = {}
|
|
|
|
for key in self.used_call_keys:
|
|
|
|
diff_data[key] = self.call_keys[key](item)
|
|
|
|
|
|
|
|
change = []
|
|
|
|
for key in self.call_keys:
|
|
|
|
if key in ("isomtime", "isoctime"):
|
|
|
|
continue
|
|
|
|
if self.content_only and key in self.METADATA:
|
|
|
|
continue
|
|
|
|
change.append(self.call_keys[key](item))
|
|
|
|
diff_data["change"] = " ".join([v for v in change if v])
|
|
|
|
diff_data["path"] = item.path
|
|
|
|
diff_data.update({} if jsonline else self.static_data)
|
|
|
|
return diff_data
|
|
|
|
|
|
|
|
def format_other(self, key, diff: "ItemDiff"):
|
|
|
|
change = diff.changes().get(key)
|
|
|
|
return f"{change.diff_type}".ljust(27) if change else "" # 27 is the length of the content change
|
|
|
|
|
|
|
|
def format_mode(self, diff: "ItemDiff", filetype=False):
|
|
|
|
change = diff.type() if filetype else diff.mode()
|
|
|
|
return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
|
|
|
|
|
|
|
|
def format_owner(self, diff: "ItemDiff", spec: Literal["owner", "user", "group"] = "owner"):
|
|
|
|
if spec == "user":
|
|
|
|
change = diff.user()
|
|
|
|
return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
|
|
|
|
if spec == "group":
|
|
|
|
change = diff.group()
|
|
|
|
return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
|
|
|
|
if spec != "owner":
|
|
|
|
raise ValueError(f"Invalid owner spec: {spec}")
|
|
|
|
change = diff.owner()
|
|
|
|
if change:
|
|
|
|
return "[{}:{} -> {}:{}]".format(
|
|
|
|
change.diff_data["item1"][0],
|
|
|
|
change.diff_data["item1"][1],
|
|
|
|
change.diff_data["item2"][0],
|
|
|
|
change.diff_data["item2"][1],
|
|
|
|
)
|
|
|
|
return ""
|
|
|
|
|
|
|
|
def format_content(self, diff: "ItemDiff"):
|
|
|
|
change = diff.content()
|
|
|
|
if change:
|
|
|
|
if change.diff_type == "added":
|
|
|
|
return "{}: {:>20}".format(change.diff_type, format_file_size(change.diff_data["added"]))
|
|
|
|
if change.diff_type == "removed":
|
|
|
|
return "{}: {:>18}".format(change.diff_type, format_file_size(change.diff_data["removed"]))
|
|
|
|
if "added" not in change.diff_data and "removed" not in change.diff_data:
|
|
|
|
return "modified: (can't get size)"
|
|
|
|
return "{}: {:>8} {:>8}".format(
|
|
|
|
change.diff_type,
|
|
|
|
format_file_size(change.diff_data["added"], precision=1, sign=True),
|
|
|
|
format_file_size(-change.diff_data["removed"], precision=1, sign=True),
|
|
|
|
)
|
|
|
|
return ""
|
|
|
|
|
|
|
|
def format_time(self, key, diff: "ItemDiff"):
|
|
|
|
change = diff.changes().get(key)
|
|
|
|
return f"[{key}: {change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
|
|
|
|
|
|
|
|
def format_iso_time(self, key, diff: "ItemDiff"):
|
|
|
|
change = diff.changes().get(key)
|
|
|
|
return (
|
|
|
|
f"[{key}: {change.diff_data['item1'].isoformat()} -> {change.diff_data['item2'].isoformat()}]"
|
|
|
|
if change
|
|
|
|
else ""
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
def file_status(mode):
|
|
|
|
if stat.S_ISREG(mode):
|
|
|
|
return "A"
|
|
|
|
elif stat.S_ISDIR(mode):
|
|
|
|
return "d"
|
|
|
|
elif stat.S_ISBLK(mode):
|
|
|
|
return "b"
|
|
|
|
elif stat.S_ISCHR(mode):
|
|
|
|
return "c"
|
|
|
|
elif stat.S_ISLNK(mode):
|
|
|
|
return "s"
|
|
|
|
elif stat.S_ISFIFO(mode):
|
|
|
|
return "f"
|
|
|
|
return "?"
|
|
|
|
|
|
|
|
|
|
|
|
def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True):
|
|
|
|
"""
|
|
|
|
clean lines (usually read from a config file):
|
|
|
|
|
|
|
|
1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments.
|
|
|
|
|
|
|
|
note: only "pure comment lines" are supported, no support for "trailing comments".
|
|
|
|
|
|
|
|
:param lines: input line iterator (e.g. list or open text file) that gives unclean input lines
|
|
|
|
:param lstrip: lstrip call arguments or False, if lstripping is not desired
|
|
|
|
:param rstrip: rstrip call arguments or False, if rstripping is not desired
|
|
|
|
:param remove_comments: remove comment lines (lines starting with "#")
|
|
|
|
:param remove_empty: remove empty lines
|
|
|
|
:return: yields processed lines
|
|
|
|
"""
|
|
|
|
for line in lines:
|
|
|
|
if lstrip is not False:
|
|
|
|
line = line.lstrip(lstrip)
|
|
|
|
if rstrip is not False:
|
|
|
|
line = line.rstrip(rstrip)
|
|
|
|
if remove_empty and not line:
|
|
|
|
continue
|
|
|
|
if remove_comments and line.startswith("#"):
|
|
|
|
continue
|
|
|
|
yield line
|
|
|
|
|
|
|
|
|
|
|
|
def swidth_slice(string, max_width):
|
|
|
|
"""
|
|
|
|
Return a slice of *max_width* cells from *string*.
|
|
|
|
|
|
|
|
Negative *max_width* means from the end of string.
|
|
|
|
|
|
|
|
*max_width* is in units of character cells (or "columns").
|
|
|
|
Latin characters are usually one cell wide, many CJK characters are two cells wide.
|
|
|
|
"""
|
|
|
|
from ..platform import swidth
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
reverse = max_width < 0
|
|
|
|
max_width = abs(max_width)
|
|
|
|
if reverse:
|
|
|
|
string = reversed(string)
|
|
|
|
current_swidth = 0
|
|
|
|
result = []
|
|
|
|
for character in string:
|
|
|
|
current_swidth += swidth(character)
|
|
|
|
if current_swidth > max_width:
|
|
|
|
break
|
|
|
|
result.append(character)
|
|
|
|
if reverse:
|
|
|
|
result.reverse()
|
|
|
|
return "".join(result)
|
|
|
|
|
|
|
|
|
|
|
|
def ellipsis_truncate(msg, space):
|
|
|
|
"""
|
|
|
|
shorten a long string by adding ellipsis between it and return it, example:
|
|
|
|
this_is_a_very_long_string -------> this_is..._string
|
|
|
|
"""
|
|
|
|
from ..platform import swidth
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
ellipsis_width = swidth("...")
|
|
|
|
msg_width = swidth(msg)
|
|
|
|
if space < 8:
|
|
|
|
# if there is very little space, just show ...
|
|
|
|
return "..." + " " * (space - ellipsis_width)
|
|
|
|
if space < ellipsis_width + msg_width:
|
2023-04-02 00:14:54 +00:00
|
|
|
return f"{swidth_slice(msg, space // 2 - ellipsis_width)}...{swidth_slice(msg, -space // 2)}"
|
2017-07-31 15:54:45 +00:00
|
|
|
return msg + " " * (space - msg_width)
|
|
|
|
|
|
|
|
|
|
|
|
class BorgJsonEncoder(json.JSONEncoder):
|
|
|
|
def default(self, o):
|
|
|
|
from ..repository import Repository
|
|
|
|
from ..remote import RemoteRepository
|
|
|
|
from ..archive import Archive
|
2023-09-22 21:40:42 +00:00
|
|
|
from ..cache import LocalCache, AdHocCache, NewCache
|
2022-07-06 13:37:27 +00:00
|
|
|
|
2017-07-31 15:54:45 +00:00
|
|
|
if isinstance(o, Repository) or isinstance(o, RemoteRepository):
|
|
|
|
return {"id": bin_to_hex(o.id), "location": o._location.canonical_path()}
|
|
|
|
if isinstance(o, Archive):
|
|
|
|
return o.info()
|
2023-09-22 21:40:42 +00:00
|
|
|
if isinstance(o, (LocalCache, NewCache)):
|
2017-07-31 15:54:45 +00:00
|
|
|
return {"path": o.path, "stats": o.stats()}
|
|
|
|
if isinstance(o, AdHocCache):
|
|
|
|
return {"stats": o.stats()}
|
2017-08-16 15:57:08 +00:00
|
|
|
if callable(getattr(o, "to_json", None)):
|
|
|
|
return o.to_json()
|
2017-07-31 15:54:45 +00:00
|
|
|
return super().default(o)
|
|
|
|
|
|
|
|
|
|
|
|
def basic_json_data(manifest, *, cache=None, extra=None):
|
|
|
|
key = manifest.key
|
|
|
|
data = extra or {}
|
|
|
|
data.update({"repository": BorgJsonEncoder().default(manifest.repository), "encryption": {"mode": key.ARG_NAME}})
|
2022-08-11 20:35:18 +00:00
|
|
|
data["repository"]["last_modified"] = OutputTimestamp(manifest.last_timestamp)
|
2017-07-31 15:54:45 +00:00
|
|
|
if key.NAME.startswith("key file"):
|
|
|
|
data["encryption"]["keyfile"] = key.find_key()
|
|
|
|
if cache:
|
|
|
|
data["cache"] = cache
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
def json_dump(obj):
|
|
|
|
"""Dump using BorgJSONEncoder."""
|
|
|
|
return json.dumps(obj, sort_keys=True, indent=4, cls=BorgJsonEncoder)
|
|
|
|
|
|
|
|
|
|
|
|
def json_print(obj):
|
|
|
|
print(json_dump(obj))
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_dump_dict(d):
|
|
|
|
def decode_bytes(value):
|
|
|
|
# this should somehow be reversible later, but usual strings should
|
|
|
|
# look nice and chunk ids should mostly show in hex. Use a special
|
|
|
|
# inband signaling character (ASCII DEL) to distinguish between
|
|
|
|
# decoded and hex mode.
|
|
|
|
if not value.startswith(b"\x7f"):
|
|
|
|
try:
|
|
|
|
value = value.decode()
|
|
|
|
return value
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
pass
|
|
|
|
return "\u007f" + bin_to_hex(value)
|
|
|
|
|
|
|
|
def decode_tuple(t):
|
|
|
|
res = []
|
|
|
|
for value in t:
|
|
|
|
if isinstance(value, dict):
|
|
|
|
value = decode(value)
|
|
|
|
elif isinstance(value, tuple) or isinstance(value, list):
|
|
|
|
value = decode_tuple(value)
|
|
|
|
elif isinstance(value, bytes):
|
|
|
|
value = decode_bytes(value)
|
|
|
|
res.append(value)
|
|
|
|
return res
|
|
|
|
|
|
|
|
def decode(d):
|
|
|
|
res = OrderedDict()
|
|
|
|
for key, value in d.items():
|
|
|
|
if isinstance(value, dict):
|
|
|
|
value = decode(value)
|
|
|
|
elif isinstance(value, (tuple, list)):
|
|
|
|
value = decode_tuple(value)
|
|
|
|
elif isinstance(value, bytes):
|
|
|
|
value = decode_bytes(value)
|
2022-05-04 08:34:33 +00:00
|
|
|
elif isinstance(value, Timestamp):
|
|
|
|
value = value.to_unix_nano()
|
2017-07-31 15:54:45 +00:00
|
|
|
if isinstance(key, bytes):
|
|
|
|
key = key.decode()
|
|
|
|
res[key] = value
|
|
|
|
return res
|
|
|
|
|
|
|
|
return decode(d)
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
|
|
|
|
|
2023-06-10 09:41:31 +00:00
|
|
|
class Highlander(argparse.Action):
|
|
|
|
"""make sure some option is only given once"""
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
self.__called = False
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
|
|
def __call__(self, parser, namespace, values, option_string=None):
|
|
|
|
if self.__called:
|
|
|
|
raise argparse.ArgumentError(self, "There can be only one.")
|
|
|
|
self.__called = True
|
|
|
|
setattr(namespace, self.dest, values)
|
|
|
|
|
|
|
|
|
|
|
|
class MakePathSafeAction(Highlander):
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
def __call__(self, parser, namespace, path, option_string=None):
|
|
|
|
try:
|
|
|
|
sanitized_path = make_path_safe(path)
|
|
|
|
except ValueError as e:
|
|
|
|
raise argparse.ArgumentError(self, e)
|
|
|
|
if sanitized_path == ".":
|
|
|
|
raise argparse.ArgumentError(self, f"{path!r} is not a valid file name")
|
|
|
|
setattr(namespace, self.dest, sanitized_path)
|