2022-05-08 16:32:07 +00:00
|
|
|
import json
|
2023-01-15 00:38:13 +00:00
|
|
|
import os
|
2017-01-03 03:27:51 +00:00
|
|
|
from collections import OrderedDict
|
2015-04-08 11:09:58 +00:00
|
|
|
from datetime import datetime, timezone
|
2016-05-18 21:59:47 +00:00
|
|
|
from io import StringIO
|
2015-12-14 21:08:53 +00:00
|
|
|
from unittest.mock import Mock
|
2014-01-22 19:58:48 +00:00
|
|
|
|
2017-05-02 17:05:27 +00:00
|
|
|
import pytest
|
2015-05-22 17:21:41 +00:00
|
|
|
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
from . import rejected_dotdot_paths
|
2017-05-02 17:05:27 +00:00
|
|
|
from ..crypto.key import PlaintextKey
|
2016-06-12 21:36:56 +00:00
|
|
|
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
|
2021-03-06 23:27:07 +00:00
|
|
|
from ..archive import BackupOSError, backup_io, backup_io_iter, get_item_uid_gid
|
2018-07-01 00:34:48 +00:00
|
|
|
from ..helpers import msgpack
|
2017-05-02 17:05:27 +00:00
|
|
|
from ..item import Item, ArchiveItem
|
2022-08-13 19:55:12 +00:00
|
|
|
from ..manifest import Manifest
|
2023-01-19 17:28:22 +00:00
|
|
|
from ..platform import uid2user, gid2group, is_win32
|
2015-05-22 17:21:41 +00:00
|
|
|
|
2014-01-22 19:58:48 +00:00
|
|
|
|
2016-05-18 21:59:47 +00:00
|
|
|
@pytest.fixture()
|
|
|
|
def stats():
|
|
|
|
stats = Statistics()
|
2022-06-11 20:29:43 +00:00
|
|
|
stats.update(20, unique=True)
|
2022-06-23 12:13:19 +00:00
|
|
|
stats.nfiles = 1
|
2016-05-18 21:59:47 +00:00
|
|
|
return stats
|
|
|
|
|
|
|
|
|
|
|
|
def test_stats_basic(stats):
|
|
|
|
assert stats.osize == 20
|
2022-06-11 20:29:43 +00:00
|
|
|
assert stats.usize == 20
|
|
|
|
stats.update(20, unique=False)
|
2016-05-18 21:59:47 +00:00
|
|
|
assert stats.osize == 40
|
2022-06-11 20:29:43 +00:00
|
|
|
assert stats.usize == 20
|
2016-05-18 21:59:47 +00:00
|
|
|
|
|
|
|
|
2023-07-10 23:14:51 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"item_path, update_size, expected_output",
|
|
|
|
[
|
|
|
|
("", 0, "20 B O 20 B U 1 N "), # test unchanged 'stats' fixture
|
|
|
|
("foo", 10**3, "1.02 kB O 20 B U 1 N foo"), # test updated original size and set item path
|
|
|
|
# test long item path which exceeds 80 characters
|
|
|
|
("foo" * 40, 10**3, "1.02 kB O 20 B U 1 N foofoofoofoofoofoofoofoofo...foofoofoofoofoofoofoofoofoofoo"),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_stats_progress(item_path, update_size, expected_output, stats, monkeypatch, columns=80):
|
2017-01-12 16:03:51 +00:00
|
|
|
monkeypatch.setenv("COLUMNS", str(columns))
|
2016-05-18 21:59:47 +00:00
|
|
|
out = StringIO()
|
2023-07-10 23:14:51 +00:00
|
|
|
item = Item(path=item_path) if item_path else None
|
|
|
|
s = expected_output
|
2016-05-18 21:59:47 +00:00
|
|
|
|
2023-07-10 23:14:51 +00:00
|
|
|
stats.update(update_size, unique=False)
|
|
|
|
stats.show_progress(item=item, stream=out)
|
2016-05-18 21:59:47 +00:00
|
|
|
buf = " " * (columns - len(s))
|
|
|
|
assert out.getvalue() == s + buf + "\r"
|
|
|
|
|
|
|
|
|
|
|
|
def test_stats_format(stats):
|
|
|
|
assert (
|
|
|
|
str(stats)
|
|
|
|
== """\
|
2022-06-23 12:13:19 +00:00
|
|
|
Number of files: 1
|
|
|
|
Original size: 20 B
|
|
|
|
Deduplicated size: 20 B
|
2024-04-04 10:45:28 +00:00
|
|
|
Time spent in hashing: 0.000 seconds
|
|
|
|
Time spent in chunking: 0.000 seconds
|
2022-10-19 19:40:02 +00:00
|
|
|
Added files: 0
|
|
|
|
Unchanged files: 0
|
|
|
|
Modified files: 0
|
|
|
|
Error files: 0
|
2023-02-25 00:47:39 +00:00
|
|
|
Files changed while reading: 0
|
2022-10-19 19:40:02 +00:00
|
|
|
Bytes read from remote: 0
|
|
|
|
Bytes sent to remote: 0
|
2022-06-23 12:13:19 +00:00
|
|
|
"""
|
2022-07-06 13:37:27 +00:00
|
|
|
)
|
2022-02-27 18:31:33 +00:00
|
|
|
s = f"{stats.osize_fmt}"
|
2016-05-18 21:59:47 +00:00
|
|
|
assert s == "20 B"
|
|
|
|
# kind of redundant, but id is variable so we can't match reliably
|
2022-06-11 20:29:43 +00:00
|
|
|
assert repr(stats) == f"<Statistics object at {id(stats):#x} (20, 20)>"
|
2016-05-18 21:59:47 +00:00
|
|
|
|
|
|
|
|
2022-05-08 16:32:07 +00:00
|
|
|
def test_stats_progress_json(stats):
|
|
|
|
stats.output_json = True
|
|
|
|
|
|
|
|
out = StringIO()
|
|
|
|
stats.show_progress(item=Item(path="foo"), stream=out)
|
|
|
|
result = json.loads(out.getvalue())
|
|
|
|
assert result["type"] == "archive_progress"
|
|
|
|
assert isinstance(result["time"], float)
|
|
|
|
assert result["finished"] is False
|
|
|
|
assert result["path"] == "foo"
|
|
|
|
assert result["original_size"] == 20
|
2022-06-23 12:13:19 +00:00
|
|
|
assert result["nfiles"] == 1
|
2022-05-08 16:32:07 +00:00
|
|
|
|
|
|
|
out = StringIO()
|
|
|
|
stats.show_progress(stream=out, final=True)
|
|
|
|
result = json.loads(out.getvalue())
|
|
|
|
assert result["type"] == "archive_progress"
|
|
|
|
assert isinstance(result["time"], float)
|
|
|
|
assert result["finished"] is True # see #6570
|
|
|
|
assert "path" not in result
|
|
|
|
assert "original_size" not in result
|
|
|
|
assert "nfiles" not in result
|
|
|
|
|
|
|
|
|
2023-07-10 23:14:51 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"isoformat, expected",
|
|
|
|
[
|
|
|
|
("1970-01-01T00:00:01.000001", datetime(1970, 1, 1, 0, 0, 1, 1, timezone.utc)), # test with microseconds
|
|
|
|
("1970-01-01T00:00:01", datetime(1970, 1, 1, 0, 0, 1, 0, timezone.utc)), # test without microseconds
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_timestamp_parsing(monkeypatch, isoformat, expected):
|
|
|
|
repository = Mock()
|
|
|
|
key = PlaintextKey(repository)
|
|
|
|
manifest = Manifest(key, repository)
|
|
|
|
a = Archive(manifest, "test", create=True)
|
|
|
|
a.metadata = ArchiveItem(time=isoformat)
|
|
|
|
assert a.ts == expected
|
|
|
|
|
|
|
|
|
2014-01-22 19:58:48 +00:00
|
|
|
class MockCache:
|
2017-03-05 04:19:32 +00:00
|
|
|
class MockRepo:
|
|
|
|
def async_response(self, wait=True):
|
|
|
|
pass
|
|
|
|
|
2014-01-22 19:58:48 +00:00
|
|
|
def __init__(self):
|
|
|
|
self.objects = {}
|
2017-03-05 04:19:32 +00:00
|
|
|
self.repository = self.MockRepo()
|
2014-01-22 19:58:48 +00:00
|
|
|
|
2023-09-15 20:19:29 +00:00
|
|
|
def add_chunk(self, id, meta, data, stats=None, wait=True, ro_type=None):
|
|
|
|
assert ro_type is not None
|
2022-09-05 00:53:28 +00:00
|
|
|
self.objects[id] = data
|
|
|
|
return id, len(data)
|
2014-01-22 19:58:48 +00:00
|
|
|
|
|
|
|
|
2023-07-26 19:18:25 +00:00
|
|
|
def test_cache_chunk_buffer():
|
|
|
|
data = [Item(path="p1"), Item(path="p2")]
|
|
|
|
cache = MockCache()
|
|
|
|
key = PlaintextKey(None)
|
|
|
|
chunks = CacheChunkBuffer(cache, key, None)
|
|
|
|
for d in data:
|
|
|
|
chunks.add(d)
|
|
|
|
chunks.flush()
|
|
|
|
chunks.flush(flush=True)
|
|
|
|
assert len(chunks.chunks) == 2
|
|
|
|
unpacker = msgpack.Unpacker()
|
|
|
|
for id in chunks.chunks:
|
|
|
|
unpacker.feed(cache.objects[id])
|
|
|
|
assert data == [Item(internal_dict=d) for d in unpacker]
|
|
|
|
|
|
|
|
|
|
|
|
def test_partial_cache_chunk_buffer():
|
|
|
|
big = "0123456789abcdefghijklmnopqrstuvwxyz" * 25000
|
|
|
|
data = [Item(path="full", target=big), Item(path="partial", target=big)]
|
|
|
|
cache = MockCache()
|
|
|
|
key = PlaintextKey(None)
|
|
|
|
chunks = CacheChunkBuffer(cache, key, None)
|
|
|
|
for d in data:
|
|
|
|
chunks.add(d)
|
|
|
|
chunks.flush(flush=False)
|
|
|
|
# the code is expected to leave the last partial chunk in the buffer
|
|
|
|
assert len(chunks.chunks) == 3
|
|
|
|
assert chunks.buffer.tell() > 0
|
|
|
|
# now really flush
|
|
|
|
chunks.flush(flush=True)
|
|
|
|
assert len(chunks.chunks) == 4
|
|
|
|
assert chunks.buffer.tell() == 0
|
|
|
|
unpacker = msgpack.Unpacker()
|
|
|
|
for id in chunks.chunks:
|
|
|
|
unpacker.feed(cache.objects[id])
|
|
|
|
assert data == [Item(internal_dict=d) for d in unpacker]
|
|
|
|
|
|
|
|
|
|
|
|
def make_chunks(items):
|
|
|
|
return b"".join(msgpack.packb({"path": item}) for item in items)
|
|
|
|
|
|
|
|
|
|
|
|
def _validator(value):
|
|
|
|
return isinstance(value, dict) and value.get("path") in ("foo", "bar", "boo", "baz")
|
|
|
|
|
|
|
|
|
|
|
|
def process(input):
|
|
|
|
unpacker = RobustUnpacker(validator=_validator, item_keys=ITEM_KEYS)
|
|
|
|
result = []
|
|
|
|
for should_sync, chunks in input:
|
|
|
|
if should_sync:
|
|
|
|
unpacker.resync()
|
|
|
|
for data in chunks:
|
|
|
|
unpacker.feed(data)
|
|
|
|
for item in unpacker:
|
|
|
|
result.append(item)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def test_extra_garbage_no_sync():
|
|
|
|
chunks = [(False, [make_chunks(["foo", "bar"])]), (False, [b"garbage"] + [make_chunks(["boo", "baz"])])]
|
|
|
|
res = process(chunks)
|
|
|
|
assert res == [{"path": "foo"}, {"path": "bar"}, 103, 97, 114, 98, 97, 103, 101, {"path": "boo"}, {"path": "baz"}]
|
|
|
|
|
|
|
|
|
|
|
|
def split(left, length):
|
|
|
|
parts = []
|
|
|
|
while left:
|
|
|
|
parts.append(left[:length])
|
|
|
|
left = left[length:]
|
|
|
|
return parts
|
|
|
|
|
|
|
|
|
|
|
|
def test_correct_stream():
|
|
|
|
chunks = split(make_chunks(["foo", "bar", "boo", "baz"]), 2)
|
|
|
|
input = [(False, chunks)]
|
|
|
|
result = process(input)
|
|
|
|
assert result == [{"path": "foo"}, {"path": "bar"}, {"path": "boo"}, {"path": "baz"}]
|
|
|
|
|
|
|
|
|
|
|
|
def test_missing_chunk():
|
|
|
|
chunks = split(make_chunks(["foo", "bar", "boo", "baz"]), 4)
|
|
|
|
input = [(False, chunks[:3]), (True, chunks[4:])]
|
|
|
|
result = process(input)
|
|
|
|
assert result == [{"path": "foo"}, {"path": "boo"}, {"path": "baz"}]
|
|
|
|
|
|
|
|
|
|
|
|
def test_corrupt_chunk():
|
|
|
|
chunks = split(make_chunks(["foo", "bar", "boo", "baz"]), 4)
|
|
|
|
input = [(False, chunks[:3]), (True, [b"gar", b"bage"] + chunks[3:])]
|
|
|
|
result = process(input)
|
|
|
|
assert result == [{"path": "foo"}, {"path": "boo"}, {"path": "baz"}]
|
2016-06-09 19:08:39 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def item_keys_serialized():
|
|
|
|
return [msgpack.packb(name) for name in ITEM_KEYS]
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"packed",
|
|
|
|
[b"", b"x", b"foobar"]
|
|
|
|
+ [
|
|
|
|
msgpack.packb(o)
|
|
|
|
for o in (
|
|
|
|
[None, 0, 0.0, False, "", {}, [], ()]
|
|
|
|
+ [42, 23.42, True, b"foobar", {b"foo": b"bar"}, [b"foo", b"bar"], (b"foo", b"bar")]
|
2022-07-06 13:37:27 +00:00
|
|
|
)
|
|
|
|
],
|
2016-06-09 19:08:39 +00:00
|
|
|
)
|
|
|
|
def test_invalid_msgpacked_item(packed, item_keys_serialized):
|
2016-06-12 13:07:49 +00:00
|
|
|
assert not valid_msgpacked_dict(packed, item_keys_serialized)
|
2016-06-09 19:08:39 +00:00
|
|
|
|
|
|
|
|
2017-01-03 03:27:51 +00:00
|
|
|
# pytest-xdist requires always same order for the keys and dicts:
|
|
|
|
IK = sorted(list(ITEM_KEYS))
|
|
|
|
|
|
|
|
|
2016-06-09 19:08:39 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"packed",
|
|
|
|
[
|
|
|
|
msgpack.packb(o)
|
|
|
|
for o in [
|
2022-05-05 17:36:02 +00:00
|
|
|
{"path": b"/a/b/c"}, # small (different msgpack mapping type!)
|
2017-01-03 03:27:51 +00:00
|
|
|
OrderedDict((k, b"") for k in IK), # as big (key count) as it gets
|
|
|
|
OrderedDict((k, b"x" * 1000) for k in IK), # as big (key count and volume) as it gets
|
2022-07-06 13:37:27 +00:00
|
|
|
]
|
2016-06-09 19:08:39 +00:00
|
|
|
],
|
2023-05-18 03:16:46 +00:00
|
|
|
ids=["minimal", "empty-values", "long-values"],
|
2016-06-09 19:08:39 +00:00
|
|
|
)
|
|
|
|
def test_valid_msgpacked_items(packed, item_keys_serialized):
|
2016-06-12 13:07:49 +00:00
|
|
|
assert valid_msgpacked_dict(packed, item_keys_serialized)
|
2016-06-09 19:08:39 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_key_length_msgpacked_items():
|
2022-05-05 17:36:02 +00:00
|
|
|
key = "x" * 32 # 31 bytes is the limit for fixstr msgpack type
|
2016-06-09 19:08:39 +00:00
|
|
|
data = {key: b""}
|
|
|
|
item_keys_serialized = [msgpack.packb(key)]
|
2016-06-12 13:07:49 +00:00
|
|
|
assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)
|
2016-06-27 20:00:24 +00:00
|
|
|
|
|
|
|
|
2016-06-30 22:13:53 +00:00
|
|
|
def test_backup_io():
|
|
|
|
with pytest.raises(BackupOSError):
|
2016-12-02 23:12:48 +00:00
|
|
|
with backup_io:
|
2016-06-27 20:00:24 +00:00
|
|
|
raise OSError(123)
|
|
|
|
|
|
|
|
|
2016-06-30 22:13:53 +00:00
|
|
|
def test_backup_io_iter():
|
2016-06-27 20:00:24 +00:00
|
|
|
class Iterator:
|
|
|
|
def __init__(self, exc):
|
|
|
|
self.exc = exc
|
|
|
|
|
|
|
|
def __next__(self):
|
|
|
|
raise self.exc()
|
|
|
|
|
|
|
|
oserror_iterator = Iterator(OSError)
|
2016-06-30 22:13:53 +00:00
|
|
|
with pytest.raises(BackupOSError):
|
2016-07-03 21:57:55 +00:00
|
|
|
for _ in backup_io_iter(oserror_iterator):
|
2016-06-27 20:00:24 +00:00
|
|
|
pass
|
|
|
|
|
|
|
|
normal_iterator = Iterator(StopIteration)
|
2016-07-03 21:57:55 +00:00
|
|
|
for _ in backup_io_iter(normal_iterator):
|
2016-06-27 20:00:24 +00:00
|
|
|
assert False, "StopIteration handled incorrectly"
|
2021-03-06 23:27:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_get_item_uid_gid():
|
|
|
|
# test requires that:
|
2023-01-15 00:38:13 +00:00
|
|
|
# - a user/group name for the current process' real uid/gid exists.
|
2021-03-06 23:27:07 +00:00
|
|
|
# - a system user/group udoesnotexist:gdoesnotexist does NOT exist.
|
|
|
|
|
2023-01-15 00:38:13 +00:00
|
|
|
try:
|
|
|
|
puid, pgid = os.getuid(), os.getgid() # UNIX only
|
|
|
|
except AttributeError:
|
|
|
|
puid, pgid = 0, 0
|
|
|
|
puser, pgroup = uid2user(puid), gid2group(pgid)
|
2021-03-06 23:27:07 +00:00
|
|
|
|
|
|
|
# this is intentionally a "strange" item, with not matching ids/names.
|
2023-01-15 00:38:13 +00:00
|
|
|
item = Item(path="filename", uid=1, gid=2, user=puser, group=pgroup)
|
2021-03-06 23:27:07 +00:00
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=False)
|
|
|
|
# these are found via a name-to-id lookup
|
2023-01-15 00:38:13 +00:00
|
|
|
assert uid == puid
|
|
|
|
assert gid == pgid
|
2021-03-06 23:27:07 +00:00
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=True)
|
|
|
|
# these are directly taken from the item.uid and .gid
|
|
|
|
assert uid == 1
|
|
|
|
assert gid == 2
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=False, uid_forced=3, gid_forced=4)
|
|
|
|
# these are enforced (not from item metadata)
|
|
|
|
assert uid == 3
|
|
|
|
assert gid == 4
|
|
|
|
|
|
|
|
# item metadata broken, has negative ids.
|
2023-01-15 00:38:13 +00:00
|
|
|
item = Item(path="filename", uid=-1, gid=-2, user=puser, group=pgroup)
|
2021-03-06 23:27:07 +00:00
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=True)
|
|
|
|
# use the uid/gid defaults (which both default to 0).
|
|
|
|
assert uid == 0
|
|
|
|
assert gid == 0
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=True, uid_default=5, gid_default=6)
|
|
|
|
# use the uid/gid defaults (as given).
|
|
|
|
assert uid == 5
|
|
|
|
assert gid == 6
|
|
|
|
|
|
|
|
# item metadata broken, has negative ids and non-existing user/group names.
|
|
|
|
item = Item(path="filename", uid=-3, gid=-4, user="udoesnotexist", group="gdoesnotexist")
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=False)
|
|
|
|
# use the uid/gid defaults (which both default to 0).
|
|
|
|
assert uid == 0
|
|
|
|
assert gid == 0
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=True, uid_default=7, gid_default=8)
|
|
|
|
# use the uid/gid defaults (as given).
|
|
|
|
assert uid == 7
|
|
|
|
assert gid == 8
|
|
|
|
|
2023-01-19 17:28:22 +00:00
|
|
|
if not is_win32:
|
|
|
|
# due to the hack in borg.platform.windows user2uid / group2gid, these always return 0
|
|
|
|
# (no matter which username we ask for) and they never raise a KeyError (like e.g. for
|
|
|
|
# a non-existing user/group name). Thus, these tests can currently not succeed on win32.
|
2021-03-06 23:27:07 +00:00
|
|
|
|
2023-01-19 17:28:22 +00:00
|
|
|
# item metadata has valid uid/gid, but non-existing user/group names.
|
|
|
|
item = Item(path="filename", uid=9, gid=10, user="udoesnotexist", group="gdoesnotexist")
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=False)
|
|
|
|
# because user/group name does not exist here, use valid numeric ids from item metadata.
|
|
|
|
assert uid == 9
|
|
|
|
assert gid == 10
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=False, uid_default=11, gid_default=12)
|
|
|
|
# because item uid/gid seems valid, do not use the given uid/gid defaults
|
|
|
|
assert uid == 9
|
|
|
|
assert gid == 10
|
2023-01-14 22:47:18 +00:00
|
|
|
|
|
|
|
# item metadata only has uid/gid, but no user/group.
|
|
|
|
item = Item(path="filename", uid=13, gid=14)
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=False)
|
|
|
|
# it'll check user/group first, but as there is nothing in the item, falls back to uid/gid.
|
|
|
|
assert uid == 13
|
|
|
|
assert gid == 14
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=True)
|
|
|
|
# does not check user/group, directly returns uid/gid.
|
|
|
|
assert uid == 13
|
|
|
|
assert gid == 14
|
|
|
|
|
|
|
|
# item metadata has no uid/gid/user/group.
|
|
|
|
item = Item(path="filename")
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=False, uid_default=15)
|
|
|
|
# as there is nothing, it'll fall back to uid_default/gid_default.
|
|
|
|
assert uid == 15
|
|
|
|
assert gid == 0
|
|
|
|
|
|
|
|
uid, gid = get_item_uid_gid(item, numeric=True, gid_default=16)
|
|
|
|
# as there is nothing, it'll fall back to uid_default/gid_default.
|
|
|
|
assert uid == 0
|
|
|
|
assert gid == 16
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_reject_non_sanitized_item():
|
|
|
|
for path in rejected_dotdot_paths:
|
|
|
|
with pytest.raises(ValueError, match="unexpected '..' element in path"):
|
|
|
|
Item(path=path, user="root", group="root")
|