2016-04-25 03:57:30 +00:00
|
|
|
import pytest
|
|
|
|
|
2016-08-09 18:49:56 +00:00
|
|
|
from ..cache import ChunkListEntry
|
2022-09-12 22:25:57 +00:00
|
|
|
from ..item import Item, chunks_contents_equal
|
2016-04-25 03:57:30 +00:00
|
|
|
from ..helpers import StableDict
|
2022-05-04 08:34:33 +00:00
|
|
|
from ..helpers.msgpack import Timestamp
|
2016-04-25 03:57:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_item_empty():
|
|
|
|
item = Item()
|
|
|
|
|
|
|
|
assert item.as_dict() == {}
|
|
|
|
|
|
|
|
assert "path" not in item
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
"invalid-key" in item
|
|
|
|
with pytest.raises(TypeError):
|
|
|
|
b"path" in item
|
|
|
|
with pytest.raises(TypeError):
|
|
|
|
42 in item
|
|
|
|
|
|
|
|
assert item.get("mode") is None
|
|
|
|
assert item.get("mode", 0o666) == 0o666
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
item.get("invalid-key")
|
|
|
|
with pytest.raises(TypeError):
|
|
|
|
item.get(b"mode")
|
|
|
|
with pytest.raises(TypeError):
|
|
|
|
item.get(42)
|
|
|
|
|
|
|
|
with pytest.raises(AttributeError):
|
|
|
|
item.path
|
|
|
|
|
|
|
|
with pytest.raises(AttributeError):
|
|
|
|
del item.path
|
|
|
|
|
|
|
|
|
2023-06-04 00:02:27 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"item_dict, path, mode",
|
|
|
|
[ # does not matter whether we get str or bytes keys
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
({b"path": "a/b/c", b"mode": 0o666}, "a/b/c", 0o666),
|
|
|
|
({"path": "a/b/c", "mode": 0o666}, "a/b/c", 0o666),
|
2023-06-04 00:02:27 +00:00
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_item_from_dict(item_dict, path, mode):
|
|
|
|
item = Item(item_dict)
|
|
|
|
assert item.path == path
|
|
|
|
assert item.mode == mode
|
2016-04-25 03:57:30 +00:00
|
|
|
assert "path" in item
|
|
|
|
assert "mode" in item
|
|
|
|
|
2016-04-25 12:12:39 +00:00
|
|
|
|
2023-06-04 00:02:27 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"invalid_item, error",
|
|
|
|
[
|
|
|
|
(42, TypeError), # invalid - no dict
|
|
|
|
({42: 23}, TypeError), # invalid - no bytes/str key
|
|
|
|
({"foobar": "baz"}, ValueError), # invalid - unknown key
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_item_invalid(invalid_item, error):
|
|
|
|
with pytest.raises(error):
|
|
|
|
Item(invalid_item)
|
2016-04-25 12:12:39 +00:00
|
|
|
|
2016-04-25 03:57:30 +00:00
|
|
|
|
|
|
|
def test_item_from_kw():
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
item = Item(path="a/b/c", mode=0o666)
|
|
|
|
assert item.path == "a/b/c"
|
2016-04-25 03:57:30 +00:00
|
|
|
assert item.mode == 0o666
|
|
|
|
|
|
|
|
|
|
|
|
def test_item_int_property():
|
|
|
|
item = Item()
|
|
|
|
item.mode = 0o666
|
|
|
|
assert item.mode == 0o666
|
|
|
|
assert item.as_dict() == {"mode": 0o666}
|
|
|
|
del item.mode
|
|
|
|
assert item.as_dict() == {}
|
|
|
|
with pytest.raises(TypeError):
|
|
|
|
item.mode = "invalid"
|
|
|
|
|
|
|
|
|
2023-06-04 00:02:27 +00:00
|
|
|
@pytest.mark.parametrize("atime", [42, 2**65])
|
|
|
|
def test_item_mptimestamp_property(atime):
|
2017-03-21 01:21:32 +00:00
|
|
|
item = Item()
|
2023-06-04 00:02:27 +00:00
|
|
|
item.atime = atime
|
|
|
|
assert item.atime == atime
|
|
|
|
assert item.as_dict() == {"atime": Timestamp.from_unix_nano(atime)}
|
2017-03-21 01:21:32 +00:00
|
|
|
|
|
|
|
|
2016-04-25 03:57:30 +00:00
|
|
|
def test_item_se_str_property():
|
|
|
|
# start simple
|
|
|
|
item = Item()
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
item.path = "a/b/c"
|
|
|
|
assert item.path == "a/b/c"
|
|
|
|
assert item.as_dict() == {"path": "a/b/c"}
|
2016-04-25 03:57:30 +00:00
|
|
|
del item.path
|
|
|
|
assert item.as_dict() == {}
|
|
|
|
with pytest.raises(TypeError):
|
|
|
|
item.path = 42
|
|
|
|
|
|
|
|
# non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
item = Item(internal_dict={"path": b"a/\xfc/c"})
|
|
|
|
assert item.path == "a/\udcfc/c" # getting a surrogate-escaped representation
|
|
|
|
assert item.as_dict() == {"path": "a/\udcfc/c"}
|
2016-04-25 03:57:30 +00:00
|
|
|
del item.path
|
|
|
|
assert "path" not in item
|
Sanitize paths during archive creation/extraction/...
Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:
$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'
Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.
This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.
With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.
I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:
a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.
Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.
This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
|
|
|
item.path = "a/\udcfc/c" # setting using a surrogate-escaped representation
|
|
|
|
assert item.as_dict() == {"path": "a/\udcfc/c"}
|
2016-04-25 03:57:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_item_list_property():
|
|
|
|
item = Item()
|
|
|
|
item.chunks = []
|
|
|
|
assert item.chunks == []
|
|
|
|
item.chunks.append(0)
|
|
|
|
assert item.chunks == [0]
|
|
|
|
item.chunks.append(1)
|
|
|
|
assert item.chunks == [0, 1]
|
|
|
|
assert item.as_dict() == {"chunks": [0, 1]}
|
|
|
|
|
|
|
|
|
|
|
|
def test_item_dict_property():
|
|
|
|
item = Item()
|
|
|
|
item.xattrs = StableDict()
|
|
|
|
assert item.xattrs == StableDict()
|
|
|
|
item.xattrs["foo"] = "bar"
|
|
|
|
assert item.xattrs["foo"] == "bar"
|
|
|
|
item.xattrs["bar"] = "baz"
|
|
|
|
assert item.xattrs == StableDict({"foo": "bar", "bar": "baz"})
|
|
|
|
assert item.as_dict() == {"xattrs": {"foo": "bar", "bar": "baz"}}
|
2016-04-27 21:17:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_unknown_property():
|
|
|
|
# we do not want the user to be able to set unknown attributes -
|
|
|
|
# they won't get into the .as_dict() result dictionary.
|
|
|
|
# also they might be just typos of known attributes.
|
|
|
|
item = Item()
|
|
|
|
with pytest.raises(AttributeError):
|
|
|
|
item.unknown_attribute = None
|
2016-08-09 18:49:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_item_file_size():
|
2022-06-10 18:36:58 +00:00
|
|
|
item = Item(mode=0o100666, chunks=[ChunkListEntry(size=1000, id=None), ChunkListEntry(size=2000, id=None)])
|
2017-02-18 06:02:11 +00:00
|
|
|
assert item.get_size() == 3000
|
2017-06-12 07:16:05 +00:00
|
|
|
item.get_size(memorize=True)
|
|
|
|
assert item.size == 3000
|
2016-08-09 18:49:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_item_file_size_no_chunks():
|
2017-03-26 14:05:22 +00:00
|
|
|
item = Item(mode=0o100666)
|
2017-02-18 06:02:11 +00:00
|
|
|
assert item.get_size() == 0
|
2017-07-29 12:26:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_item_optr():
|
|
|
|
item = Item()
|
|
|
|
assert Item.from_optr(item.to_optr()) is item
|
2022-09-12 22:25:57 +00:00
|
|
|
|
|
|
|
|
2023-06-04 00:02:27 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"chunk_a, chunk_b, chunks_equal",
|
|
|
|
[
|
|
|
|
(["1234", "567A", "bC"], ["1", "23", "4567A", "b", "C"], True), # equal
|
|
|
|
(["12345"], ["1234", "56"], False), # one iterator exhausted before the other
|
|
|
|
(["1234", "65"], ["1234", "56"], False), # content mismatch
|
|
|
|
(["1234", "56"], ["1234", "565"], False), # first is the prefix of second
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_chunk_content_equal(chunk_a: str, chunk_b: str, chunks_equal):
|
|
|
|
chunks_a = [data.encode() for data in chunk_a]
|
|
|
|
chunks_b = [data.encode() for data in chunk_b]
|
|
|
|
compare1 = chunks_contents_equal(iter(chunks_a), iter(chunks_b))
|
|
|
|
compare2 = chunks_contents_equal(iter(chunks_b), iter(chunks_a))
|
|
|
|
assert compare1 == compare2
|
|
|
|
assert compare1 == chunks_equal
|