borg/src/borg/testsuite/item.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

176 lines
5.0 KiB
Python
Raw Permalink Normal View History

2016-04-25 03:57:30 +00:00
import pytest
2016-08-09 18:49:56 +00:00
from ..cache import ChunkListEntry
from ..item import Item, chunks_contents_equal
2016-04-25 03:57:30 +00:00
from ..helpers import StableDict
from ..helpers.msgpack import Timestamp
2016-04-25 03:57:30 +00:00
def test_item_empty():
item = Item()
assert item.as_dict() == {}
assert "path" not in item
with pytest.raises(ValueError):
"invalid-key" in item
with pytest.raises(TypeError):
b"path" in item
with pytest.raises(TypeError):
42 in item
assert item.get("mode") is None
assert item.get("mode", 0o666) == 0o666
with pytest.raises(ValueError):
item.get("invalid-key")
with pytest.raises(TypeError):
item.get(b"mode")
with pytest.raises(TypeError):
item.get(42)
with pytest.raises(AttributeError):
item.path
with pytest.raises(AttributeError):
del item.path
@pytest.mark.parametrize(
"item_dict, path, mode",
[ # does not matter whether we get str or bytes keys
Sanitize paths during archive creation/extraction/... Paths are not always sanitized when creating an archive and, more importantly, never when extracting one. The following example shows how this can be used to attempt to write a file outside the extraction directory: $ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 - $ borg list -r ~/borg/a archive-1 -rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow $ mkdir borg/target $ cd borg/target $ borg extract -r ~/borg/a archive-1 x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc' Note that Borg tries to extract the file to /etc/shadow and the permission error is a result of the user not having access. This patch ensures file names are sanitized before archiving. As for files extracted from the archive, paths are sanitized by making all paths relative, removing '.' elements, and removing superfluous slashes (as in '//'). '..' elements, however, are rejected outright. The reasoning here is that it is easy to start a path with './' or insert a '//' by accident (e.g. via --stdin-name or import-tar). '..', however, seem unlikely to be the result of an accident and could indicate a tampered repository. With paths being sanitized as they are being read, this "errors" will be corrected during the `borg transfer` required when upgrading to Borg 2. Hence, the sanitation, when reading the archive, can be removed once support for reading v1 repositories is dropped. V2 repository will not contain non-sanitized paths. Of course, a check for absolute paths and '..' elements needs to kept in place to detect tempered archives. I recommend treating this as a security issue. I see the following cases where extracting a file outside the extraction path could constitute a security risk: a) When extraction is done as a different user than archive creation. The user that created the archive may be able to get a file overwritten as a different user. b) When the archive is created on one host and extracted on another. The user that created the archive may be able to get a file overwritten on another host. c) When an archive is created and extracted after a OS reinstall. When a host is suspected compromised, it is common to reinstall (or set up a new machine), extract the backups and then evaluate their integrity. A user that manipulates the archive before such a reinstall may be able to get a file overwritten outside the extraction path and may evade integrity checks. Notably absent is the creation and extraction on the same host as the same user. In such case, an adversary must be assumed to be able to replace any file directly. This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
({b"path": "a/b/c", b"mode": 0o666}, "a/b/c", 0o666),
({"path": "a/b/c", "mode": 0o666}, "a/b/c", 0o666),
],
)
def test_item_from_dict(item_dict, path, mode):
item = Item(item_dict)
assert item.path == path
assert item.mode == mode
2016-04-25 03:57:30 +00:00
assert "path" in item
assert "mode" in item
2016-04-25 12:12:39 +00:00
@pytest.mark.parametrize(
"invalid_item, error",
[
(42, TypeError), # invalid - no dict
({42: 23}, TypeError), # invalid - no bytes/str key
({"foobar": "baz"}, ValueError), # invalid - unknown key
],
)
def test_item_invalid(invalid_item, error):
with pytest.raises(error):
Item(invalid_item)
2016-04-25 12:12:39 +00:00
2016-04-25 03:57:30 +00:00
def test_item_from_kw():
Sanitize paths during archive creation/extraction/... Paths are not always sanitized when creating an archive and, more importantly, never when extracting one. The following example shows how this can be used to attempt to write a file outside the extraction directory: $ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 - $ borg list -r ~/borg/a archive-1 -rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow $ mkdir borg/target $ cd borg/target $ borg extract -r ~/borg/a archive-1 x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc' Note that Borg tries to extract the file to /etc/shadow and the permission error is a result of the user not having access. This patch ensures file names are sanitized before archiving. As for files extracted from the archive, paths are sanitized by making all paths relative, removing '.' elements, and removing superfluous slashes (as in '//'). '..' elements, however, are rejected outright. The reasoning here is that it is easy to start a path with './' or insert a '//' by accident (e.g. via --stdin-name or import-tar). '..', however, seem unlikely to be the result of an accident and could indicate a tampered repository. With paths being sanitized as they are being read, this "errors" will be corrected during the `borg transfer` required when upgrading to Borg 2. Hence, the sanitation, when reading the archive, can be removed once support for reading v1 repositories is dropped. V2 repository will not contain non-sanitized paths. Of course, a check for absolute paths and '..' elements needs to kept in place to detect tempered archives. I recommend treating this as a security issue. I see the following cases where extracting a file outside the extraction path could constitute a security risk: a) When extraction is done as a different user than archive creation. The user that created the archive may be able to get a file overwritten as a different user. b) When the archive is created on one host and extracted on another. The user that created the archive may be able to get a file overwritten on another host. c) When an archive is created and extracted after a OS reinstall. When a host is suspected compromised, it is common to reinstall (or set up a new machine), extract the backups and then evaluate their integrity. A user that manipulates the archive before such a reinstall may be able to get a file overwritten outside the extraction path and may evade integrity checks. Notably absent is the creation and extraction on the same host as the same user. In such case, an adversary must be assumed to be able to replace any file directly. This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
item = Item(path="a/b/c", mode=0o666)
assert item.path == "a/b/c"
2016-04-25 03:57:30 +00:00
assert item.mode == 0o666
def test_item_int_property():
item = Item()
item.mode = 0o666
assert item.mode == 0o666
assert item.as_dict() == {"mode": 0o666}
del item.mode
assert item.as_dict() == {}
with pytest.raises(TypeError):
item.mode = "invalid"
@pytest.mark.parametrize("atime", [42, 2**65])
def test_item_mptimestamp_property(atime):
item = Item()
item.atime = atime
assert item.atime == atime
assert item.as_dict() == {"atime": Timestamp.from_unix_nano(atime)}
2016-04-25 03:57:30 +00:00
def test_item_se_str_property():
# start simple
item = Item()
Sanitize paths during archive creation/extraction/... Paths are not always sanitized when creating an archive and, more importantly, never when extracting one. The following example shows how this can be used to attempt to write a file outside the extraction directory: $ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 - $ borg list -r ~/borg/a archive-1 -rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow $ mkdir borg/target $ cd borg/target $ borg extract -r ~/borg/a archive-1 x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc' Note that Borg tries to extract the file to /etc/shadow and the permission error is a result of the user not having access. This patch ensures file names are sanitized before archiving. As for files extracted from the archive, paths are sanitized by making all paths relative, removing '.' elements, and removing superfluous slashes (as in '//'). '..' elements, however, are rejected outright. The reasoning here is that it is easy to start a path with './' or insert a '//' by accident (e.g. via --stdin-name or import-tar). '..', however, seem unlikely to be the result of an accident and could indicate a tampered repository. With paths being sanitized as they are being read, this "errors" will be corrected during the `borg transfer` required when upgrading to Borg 2. Hence, the sanitation, when reading the archive, can be removed once support for reading v1 repositories is dropped. V2 repository will not contain non-sanitized paths. Of course, a check for absolute paths and '..' elements needs to kept in place to detect tempered archives. I recommend treating this as a security issue. I see the following cases where extracting a file outside the extraction path could constitute a security risk: a) When extraction is done as a different user than archive creation. The user that created the archive may be able to get a file overwritten as a different user. b) When the archive is created on one host and extracted on another. The user that created the archive may be able to get a file overwritten on another host. c) When an archive is created and extracted after a OS reinstall. When a host is suspected compromised, it is common to reinstall (or set up a new machine), extract the backups and then evaluate their integrity. A user that manipulates the archive before such a reinstall may be able to get a file overwritten outside the extraction path and may evade integrity checks. Notably absent is the creation and extraction on the same host as the same user. In such case, an adversary must be assumed to be able to replace any file directly. This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
item.path = "a/b/c"
assert item.path == "a/b/c"
assert item.as_dict() == {"path": "a/b/c"}
2016-04-25 03:57:30 +00:00
del item.path
assert item.as_dict() == {}
with pytest.raises(TypeError):
item.path = 42
# non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
Sanitize paths during archive creation/extraction/... Paths are not always sanitized when creating an archive and, more importantly, never when extracting one. The following example shows how this can be used to attempt to write a file outside the extraction directory: $ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 - $ borg list -r ~/borg/a archive-1 -rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow $ mkdir borg/target $ cd borg/target $ borg extract -r ~/borg/a archive-1 x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc' Note that Borg tries to extract the file to /etc/shadow and the permission error is a result of the user not having access. This patch ensures file names are sanitized before archiving. As for files extracted from the archive, paths are sanitized by making all paths relative, removing '.' elements, and removing superfluous slashes (as in '//'). '..' elements, however, are rejected outright. The reasoning here is that it is easy to start a path with './' or insert a '//' by accident (e.g. via --stdin-name or import-tar). '..', however, seem unlikely to be the result of an accident and could indicate a tampered repository. With paths being sanitized as they are being read, this "errors" will be corrected during the `borg transfer` required when upgrading to Borg 2. Hence, the sanitation, when reading the archive, can be removed once support for reading v1 repositories is dropped. V2 repository will not contain non-sanitized paths. Of course, a check for absolute paths and '..' elements needs to kept in place to detect tempered archives. I recommend treating this as a security issue. I see the following cases where extracting a file outside the extraction path could constitute a security risk: a) When extraction is done as a different user than archive creation. The user that created the archive may be able to get a file overwritten as a different user. b) When the archive is created on one host and extracted on another. The user that created the archive may be able to get a file overwritten on another host. c) When an archive is created and extracted after a OS reinstall. When a host is suspected compromised, it is common to reinstall (or set up a new machine), extract the backups and then evaluate their integrity. A user that manipulates the archive before such a reinstall may be able to get a file overwritten outside the extraction path and may evade integrity checks. Notably absent is the creation and extraction on the same host as the same user. In such case, an adversary must be assumed to be able to replace any file directly. This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
item = Item(internal_dict={"path": b"a/\xfc/c"})
assert item.path == "a/\udcfc/c" # getting a surrogate-escaped representation
assert item.as_dict() == {"path": "a/\udcfc/c"}
2016-04-25 03:57:30 +00:00
del item.path
assert "path" not in item
Sanitize paths during archive creation/extraction/... Paths are not always sanitized when creating an archive and, more importantly, never when extracting one. The following example shows how this can be used to attempt to write a file outside the extraction directory: $ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 - $ borg list -r ~/borg/a archive-1 -rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow $ mkdir borg/target $ cd borg/target $ borg extract -r ~/borg/a archive-1 x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc' Note that Borg tries to extract the file to /etc/shadow and the permission error is a result of the user not having access. This patch ensures file names are sanitized before archiving. As for files extracted from the archive, paths are sanitized by making all paths relative, removing '.' elements, and removing superfluous slashes (as in '//'). '..' elements, however, are rejected outright. The reasoning here is that it is easy to start a path with './' or insert a '//' by accident (e.g. via --stdin-name or import-tar). '..', however, seem unlikely to be the result of an accident and could indicate a tampered repository. With paths being sanitized as they are being read, this "errors" will be corrected during the `borg transfer` required when upgrading to Borg 2. Hence, the sanitation, when reading the archive, can be removed once support for reading v1 repositories is dropped. V2 repository will not contain non-sanitized paths. Of course, a check for absolute paths and '..' elements needs to kept in place to detect tempered archives. I recommend treating this as a security issue. I see the following cases where extracting a file outside the extraction path could constitute a security risk: a) When extraction is done as a different user than archive creation. The user that created the archive may be able to get a file overwritten as a different user. b) When the archive is created on one host and extracted on another. The user that created the archive may be able to get a file overwritten on another host. c) When an archive is created and extracted after a OS reinstall. When a host is suspected compromised, it is common to reinstall (or set up a new machine), extract the backups and then evaluate their integrity. A user that manipulates the archive before such a reinstall may be able to get a file overwritten outside the extraction path and may evade integrity checks. Notably absent is the creation and extraction on the same host as the same user. In such case, an adversary must be assumed to be able to replace any file directly. This also (partially) fixes #7099.
2022-10-23 16:39:09 +00:00
item.path = "a/\udcfc/c" # setting using a surrogate-escaped representation
assert item.as_dict() == {"path": "a/\udcfc/c"}
2016-04-25 03:57:30 +00:00
def test_item_list_property():
item = Item()
item.chunks = []
assert item.chunks == []
item.chunks.append(0)
assert item.chunks == [0]
item.chunks.append(1)
assert item.chunks == [0, 1]
assert item.as_dict() == {"chunks": [0, 1]}
def test_item_dict_property():
item = Item()
item.xattrs = StableDict()
assert item.xattrs == StableDict()
item.xattrs["foo"] = "bar"
assert item.xattrs["foo"] == "bar"
item.xattrs["bar"] = "baz"
assert item.xattrs == StableDict({"foo": "bar", "bar": "baz"})
assert item.as_dict() == {"xattrs": {"foo": "bar", "bar": "baz"}}
def test_unknown_property():
# we do not want the user to be able to set unknown attributes -
# they won't get into the .as_dict() result dictionary.
# also they might be just typos of known attributes.
item = Item()
with pytest.raises(AttributeError):
item.unknown_attribute = None
2016-08-09 18:49:56 +00:00
def test_item_file_size():
2022-06-10 18:36:58 +00:00
item = Item(mode=0o100666, chunks=[ChunkListEntry(size=1000, id=None), ChunkListEntry(size=2000, id=None)])
assert item.get_size() == 3000
item.get_size(memorize=True)
assert item.size == 3000
2016-08-09 18:49:56 +00:00
def test_item_file_size_no_chunks():
item = Item(mode=0o100666)
assert item.get_size() == 0
2017-07-29 12:26:15 +00:00
def test_item_optr():
item = Item()
assert Item.from_optr(item.to_optr()) is item
@pytest.mark.parametrize(
"chunk_a, chunk_b, chunks_equal",
[
(["1234", "567A", "bC"], ["1", "23", "4567A", "b", "C"], True), # equal
(["12345"], ["1234", "56"], False), # one iterator exhausted before the other
(["1234", "65"], ["1234", "56"], False), # content mismatch
(["1234", "56"], ["1234", "565"], False), # first is the prefix of second
],
)
def test_chunk_content_equal(chunk_a: str, chunk_b: str, chunks_equal):
chunks_a = [data.encode() for data in chunk_a]
chunks_b = [data.encode() for data in chunk_b]
compare1 = chunks_contents_equal(iter(chunks_a), iter(chunks_b))
compare2 = chunks_contents_equal(iter(chunks_b), iter(chunks_a))
assert compare1 == compare2
assert compare1 == chunks_equal