From 438cf2e7ef0fc05b079eeecc9e93e1cda9ac1149 Mon Sep 17 00:00:00 2001 From: Peter Gerber Date: Sun, 23 Oct 2022 16:39:09 +0000 Subject: [PATCH] Sanitize paths during archive creation/extraction/... Paths are not always sanitized when creating an archive and, more importantly, never when extracting one. The following example shows how this can be used to attempt to write a file outside the extraction directory: $ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 - $ borg list -r ~/borg/a archive-1 -rw-rw---- root root 7 Sun, 2022-10-23 19:14:27 x/../../../../../etc/shadow $ mkdir borg/target $ cd borg/target $ borg extract -r ~/borg/a archive-1 x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc' Note that Borg tries to extract the file to /etc/shadow and the permission error is a result of the user not having access. This patch ensures file names are sanitized before archiving. As for files extracted from the archive, paths are sanitized by making all paths relative, removing '.' elements, and removing superfluous slashes (as in '//'). '..' elements, however, are rejected outright. The reasoning here is that it is easy to start a path with './' or insert a '//' by accident (e.g. via --stdin-name or import-tar). '..', however, seem unlikely to be the result of an accident and could indicate a tampered repository. With paths being sanitized as they are being read, this "errors" will be corrected during the `borg transfer` required when upgrading to Borg 2. Hence, the sanitation, when reading the archive, can be removed once support for reading v1 repositories is dropped. V2 repository will not contain non-sanitized paths. Of course, a check for absolute paths and '..' elements needs to kept in place to detect tempered archives. I recommend treating this as a security issue. I see the following cases where extracting a file outside the extraction path could constitute a security risk: a) When extraction is done as a different user than archive creation. The user that created the archive may be able to get a file overwritten as a different user. b) When the archive is created on one host and extracted on another. The user that created the archive may be able to get a file overwritten on another host. c) When an archive is created and extracted after a OS reinstall. When a host is suspected compromised, it is common to reinstall (or set up a new machine), extract the backups and then evaluate their integrity. A user that manipulates the archive before such a reinstall may be able to get a file overwritten outside the extraction path and may evade integrity checks. Notably absent is the creation and extraction on the same host as the same user. In such case, an adversary must be assumed to be able to replace any file directly. This also (partially) fixes #7099. --- src/borg/archive.py | 8 +-- src/borg/archiver/create_cmd.py | 3 +- src/borg/helpers/__init__.py | 3 +- src/borg/helpers/fs.py | 62 ++++++++++++++++-- src/borg/helpers/parseformat.py | 15 ++++- src/borg/item.pyx | 3 +- src/borg/testsuite/__init__.py | 19 ++++++ src/borg/testsuite/archive.py | 7 ++ src/borg/testsuite/archiver/create_cmd.py | 31 ++++++++- src/borg/testsuite/archiver/dotdot_path.tar | Bin 0 -> 10240 bytes src/borg/testsuite/archiver/tar_cmds.py | 23 +++++++ src/borg/testsuite/archiver/unusual_paths.tar | Bin 0 -> 10240 bytes src/borg/testsuite/helpers.py | 38 ++++++++--- src/borg/testsuite/item.py | 24 +++---- 14 files changed, 199 insertions(+), 37 deletions(-) create mode 100644 src/borg/testsuite/archiver/dotdot_path.tar create mode 100644 src/borg/testsuite/archiver/unusual_paths.tar diff --git a/src/borg/archive.py b/src/borg/archive.py index 3caf5060..68aef794 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -32,7 +32,7 @@ from .helpers import Error, IntegrityError, set_ec from .platform import uid2user, user2uid, gid2group, group2gid from .helpers import parse_timestamp, archive_ts_now from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize -from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd +from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd, remove_dotdot_prefixes from .helpers import StableDict from .helpers import bin_to_hex from .helpers import safe_ns @@ -853,8 +853,6 @@ Duration: {0.duration} return dest = self.cwd - if item.path.startswith(("/", "../")): - raise Exception("Path should be relative and local") path = os.path.join(dest, item.path) # Attempt to remove existing files, ignore errors on failure try: @@ -1376,8 +1374,8 @@ class FilesystemObjectProcessors: @contextmanager def create_helper(self, path, st, status=None, hardlinkable=True): - safe_path = make_path_safe(path) - item = Item(path=safe_path) + sanitized_path = remove_dotdot_prefixes(path) + item = Item(path=sanitized_path) hardlinked = hardlinkable and st.st_nlink > 1 hl_chunks = None update_map = False diff --git a/src/borg/archiver/create_cmd.py b/src/borg/archiver/create_cmd.py index 0d1d2fa6..a791b091 100644 --- a/src/borg/archiver/create_cmd.py +++ b/src/borg/archiver/create_cmd.py @@ -27,6 +27,7 @@ from ..helpers import basic_json_data, json_print from ..helpers import flags_root, flags_dir, flags_special_follow, flags_special from ..helpers import sig_int, ignore_sigint from ..helpers import iter_separated +from ..helpers import MakePathSafeAction from ..manifest import Manifest from ..patterns import PatternMatcher from ..platform import is_win32 @@ -766,7 +767,7 @@ class CreateMixIn: metavar="NAME", dest="stdin_name", default="stdin", - action=Highlander, + action=MakePathSafeAction, # MakePathSafeAction maybe should subclass from Highlander. help="use NAME in archive for stdin data (default: %(default)r)", ) subparser.add_argument( diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index a6bb84a3..7a83a44d 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -13,7 +13,7 @@ from .datastruct import StableDict, Buffer, EfficientCollectionQueue from .errors import Error, ErrorWithTraceback, IntegrityError, DecompressionError from .fs import ensure_dir, join_base_dir, get_socket_filename from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir -from .fs import dir_is_tagged, dir_is_cachedir, make_path_safe, scandir_inorder +from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, umount from .fs import O_, flags_root, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime from .fs import HardLinkManager @@ -31,6 +31,7 @@ from .parseformat import location_validator, archivename_validator, comment_vali from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status from .parseformat import swidth_slice, ellipsis_truncate from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict +from .parseformat import MakePathSafeAction from .process import daemonize, daemonizing from .process import signal_handler, raising_signal_handler, sig_int, ignore_sigint, SigHup, SigTerm from .process import popen_with_error_handling, is_terminal, prepare_subprocess_env, create_filter_process diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index 60537cdf..68f0cd69 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -217,12 +217,64 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present): return tag_names -_safe_re = re.compile(r"^((\.\.)?/+)+") - - def make_path_safe(path): - """Make path safe by making it relative and local""" - return _safe_re.sub("", path) or "." + """ + Make path safe by making it relative and normalized. + + `path` is sanitized by making it relative, removing + consecutive slashes (e.g. '//'), removing '.' elements, + and removing trailing slashes. + + For reasons of security, a ValueError is raised should + `path` contain any '..' elements. + """ + path = path.lstrip("/") + if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..": + raise ValueError(f"unexpected '..' element in path {path!r}") + path = os.path.normpath(path) + return path + + +_dotdot_re = re.compile(r"^(\.\./)+") + + +def remove_dotdot_prefixes(path): + """ + Remove '../'s at the beginning of `path`. Additionally, + the path is made relative. + + `path` is expected to be normalized already (e.g. via `os.path.normpath()`). + """ + path = path.lstrip("/") + path = _dotdot_re.sub("", path) + if path in ["", ".."]: + return "." + return path + + +def assert_sanitized_path(path): + assert isinstance(path, str) + # `path` should have been sanitized earlier. Some features, + # like pattern matching rely on a sanitized path. As a + # precaution we check here again. + if make_path_safe(path) != path: + raise ValueError(f"path {path!r} is not sanitized") + return path + + +def to_sanitized_path(path): + assert isinstance(path, str) + # Legacy versions of Borg still allowed non-sanitized paths + # to be stored. So, we sanitize them when reading. + # + # Borg 2 ensures paths are safe before storing them. Thus, when + # support for reading Borg 1 archives is dropped, this should be + # changed to a simple check to verify paths aren't malicious. + # Namely, absolute paths and paths containing '..' elements must + # be rejected. + # + # Also checks for '..' elements in `path` for reasons of security. + return make_path_safe(path) class HardLinkManager: diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 973efc13..ede70547 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -19,7 +19,7 @@ from ..logger import create_logger logger = create_logger() from .errors import Error -from .fs import get_keys_dir +from .fs import get_keys_dir, make_path_safe from .msgpack import Timestamp from .time import OutputTimestamp, format_time, safe_timestamp from .. import __version__ as borg_version @@ -840,7 +840,7 @@ class ItemFormatter(BaseFormatter): from ..item import Item - fake_item = Item(mode=0, path="", user="", group="", mtime=0, uid=0, gid=0) + fake_item = Item(mode=0, path="foo", user="", group="", mtime=0, uid=0, gid=0) formatter = cls(FakeArchive, "") keys = [] keys.extend(formatter.call_keys.keys()) @@ -1147,3 +1147,14 @@ def prepare_dump_dict(d): return res return decode(d) + + +class MakePathSafeAction(argparse.Action): + def __call__(self, parser, namespace, path, option_string=None): + try: + sanitized_path = make_path_safe(path) + except ValueError as e: + raise argparse.ArgumentError(self, e) + if sanitized_path == ".": + raise argparse.ArgumentError(self, f"{path!r} is not a valid file name") + setattr(namespace, self.dest, sanitized_path) diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 3b5b1042..c2abdfc9 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -7,6 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize from .constants import ITEM_KEYS, ARCHIVE_KEYS from .helpers import StableDict from .helpers import format_file_size +from .helpers.fs import assert_sanitized_path, to_sanitized_path from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp from .helpers.time import OutputTimestamp, safe_timestamp @@ -262,7 +263,7 @@ cdef class Item(PropDict): # properties statically defined, so that IDEs can know their names: - path = PropDictProperty(str, 'surrogate-escaped str') + path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path) source = PropDictProperty(str, 'surrogate-escaped str') # legacy borg 1.x. borg 2: see .target target = PropDictProperty(str, 'surrogate-escaped str') user = PropDictProperty(str, 'surrogate-escaped str') diff --git a/src/borg/testsuite/__init__.py b/src/borg/testsuite/__init__.py index 5c3a0a8f..a20949c7 100644 --- a/src/borg/testsuite/__init__.py +++ b/src/borg/testsuite/__init__.py @@ -23,6 +23,7 @@ from ..helpers import umount from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR from .. import platform + # Note: this is used by borg.selftest, do not use or import py.test functionality here. from ..fuse_impl import llfuse, has_pyfuse3, has_llfuse @@ -61,6 +62,24 @@ def same_ts_ns(ts_ns1, ts_ns2): return diff_ts <= diff_max +rejected_dotdot_paths = ( + "..", + "../", + "../etc/shadow", + "/..", + "/../", + "/../etc", + "/../etc/", + "etc/..", + "/etc/..", + "/etc/../etc/shadow", + "//etc/..", + "etc//..", + "etc/..//", + "foo/../bar", +) + + @contextmanager def unopened_tempfile(): with tempfile.TemporaryDirectory() as tempdir: diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index 0df979d4..e80c208f 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -8,6 +8,7 @@ from unittest.mock import Mock import pytest from . import BaseTestCase +from . import rejected_dotdot_paths from ..crypto.key import PlaintextKey from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics from ..archive import BackupOSError, backup_io, backup_io_iter, get_item_uid_gid @@ -394,3 +395,9 @@ def test_get_item_uid_gid(): # as there is nothing, it'll fall back to uid_default/gid_default. assert uid == 0 assert gid == 16 + + +def test_reject_non_sanitized_item(): + for path in rejected_dotdot_paths: + with pytest.raises(ValueError, match="unexpected '..' element in path"): + Item(path=path, user="root", group="root") diff --git a/src/borg/testsuite/archiver/create_cmd.py b/src/borg/testsuite/archiver/create_cmd.py index 73eed906..cd80ea48 100644 --- a/src/borg/testsuite/archiver/create_cmd.py +++ b/src/borg/testsuite/archiver/create_cmd.py @@ -279,6 +279,33 @@ class ArchiverTestCase(ArchiverTestCaseBase): assert "input/file2" not in out # it skipped file2 assert "input/file3" in out + def test_sanitized_stdin_name(self): + self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) + self.cmd(f"--repo={self.repository_location}", "create", "--stdin-name", "./a//path", "test", "-", input=b"") + item = json.loads(self.cmd(f"--repo={self.repository_location}", "list", "test", "--json-lines")) + assert item["path"] == "a/path" + + def test_dotdot_stdin_name(self): + self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) + output = self.cmd( + f"--repo={self.repository_location}", + "create", + "--stdin-name", + "foo/../bar", + "test", + "-", + input=b"", + exit_code=2, + ) + assert output.endswith("'..' element in path 'foo/../bar'" + os.linesep) + + def test_dot_stdin_name(self): + self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) + output = self.cmd( + f"--repo={self.repository_location}", "create", "--stdin-name", "./", "test", "-", input=b"", exit_code=2 + ) + assert output.endswith("'./' is not a valid file name" + os.linesep) + def test_create_content_from_command(self): self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) input_data = "some test content" @@ -586,7 +613,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): ) self._assert_test_keep_tagged() - def test_path_normalization(self): + def test_path_sanitation(self): self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) self.create_regular_file("dir1/dir2/file", size=1024 * 80) with changedir("input/dir1/dir2"): @@ -595,7 +622,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_not_in("..", output) self.assert_in(" input/dir1/dir2/file", output) - def test_exclude_normalization(self): + def test_exclude_sanitation(self): self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) self.create_regular_file("file1", size=1024 * 80) self.create_regular_file("file2", size=1024 * 80) diff --git a/src/borg/testsuite/archiver/dotdot_path.tar b/src/borg/testsuite/archiver/dotdot_path.tar new file mode 100644 index 0000000000000000000000000000000000000000..a14c11912c9de3e5b53ef6ab21b1846f745ed1cd GIT binary patch literal 10240 zcmeIuOA3G>6ouit6jxwAb3IW(vp|cs-vfds)x_a{5H5IBX&N-#ORF_9hr_$fxIswCRP^EjLxZy-f2kN-4$|=Th6swf@#SH>h>0wfZ9Kz3NZF zW&KOe1Hm(~>i?;ew&X6>f0>@UE~xcyp!L=kJCzOqqqbV7)mY@<9M3_~y<~226#b3w zee%EdL_)gLcChoI;{Teywf@c$zue0DtG0w+rBA)o)o!~d6zP1n}?`5##S!~S=C zPY5Ah7vTR(CIE|o00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p I2>b(qH}pza!vFvP literal 0 HcmV?d00001 diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index 8cdd8bf9..0f96ad6c 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -25,7 +25,7 @@ from ..helpers import ( PlaceholderError, replace_placeholders, ) -from ..helpers import make_path_safe, clean_lines +from ..helpers import remove_dotdot_prefixes, make_path_safe, clean_lines from ..helpers import interval from ..helpers import get_base_dir, get_cache_dir, get_keys_dir, get_security_dir, get_config_dir, get_runtime_dir from ..helpers import is_slow_msgpack @@ -48,6 +48,7 @@ from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded from ..platform import is_cygwin, is_win32, is_darwin, swidth from . import BaseTestCase, FakeInputs, are_hardlinks_supported +from . import rejected_dotdot_paths def test_bin_to_hex(): @@ -393,16 +394,37 @@ def test_chunkerparams(): ChunkerParams("fixed,%d,%d" % (4096, MAX_DATA_SIZE + 1)) # too big header size +class RemoveDotdotPrefixesTestCase(BaseTestCase): + def test(self): + self.assert_equal(remove_dotdot_prefixes("."), ".") + self.assert_equal(remove_dotdot_prefixes(".."), ".") + self.assert_equal(remove_dotdot_prefixes("/"), ".") + self.assert_equal(remove_dotdot_prefixes("//"), ".") + self.assert_equal(remove_dotdot_prefixes("foo"), "foo") + self.assert_equal(remove_dotdot_prefixes("foo/bar"), "foo/bar") + self.assert_equal(remove_dotdot_prefixes("/foo/bar"), "foo/bar") + self.assert_equal(remove_dotdot_prefixes("../foo/bar"), "foo/bar") + + class MakePathSafeTestCase(BaseTestCase): def test(self): + self.assert_equal(make_path_safe("."), ".") + self.assert_equal(make_path_safe("./"), ".") + self.assert_equal(make_path_safe("./foo"), "foo") + self.assert_equal(make_path_safe(".//foo"), "foo") + self.assert_equal(make_path_safe(".//foo//bar//"), "foo/bar") self.assert_equal(make_path_safe("/foo/bar"), "foo/bar") - self.assert_equal(make_path_safe("/foo/bar"), "foo/bar") - self.assert_equal(make_path_safe("/f/bar"), "f/bar") - self.assert_equal(make_path_safe("fo/bar"), "fo/bar") - self.assert_equal(make_path_safe("../foo/bar"), "foo/bar") - self.assert_equal(make_path_safe("../../foo/bar"), "foo/bar") - self.assert_equal(make_path_safe("/"), ".") - self.assert_equal(make_path_safe("/"), ".") + self.assert_equal(make_path_safe("//foo/bar"), "foo/bar") + self.assert_equal(make_path_safe("//foo/./bar"), "foo/bar") + self.assert_equal(make_path_safe(".test"), ".test") + self.assert_equal(make_path_safe(".test."), ".test.") + self.assert_equal(make_path_safe("..test.."), "..test..") + self.assert_equal(make_path_safe("/te..st/foo/bar"), "te..st/foo/bar") + self.assert_equal(make_path_safe("/..test../abc//"), "..test../abc") + + for path in rejected_dotdot_paths: + with pytest.raises(ValueError, match="unexpected '..' element in path"): + make_path_safe(path) class MockArchive: diff --git a/src/borg/testsuite/item.py b/src/borg/testsuite/item.py index 1966d7b4..34fb8fe4 100644 --- a/src/borg/testsuite/item.py +++ b/src/borg/testsuite/item.py @@ -38,8 +38,8 @@ def test_item_empty(): @pytest.mark.parametrize( "item_dict, path, mode", [ # does not matter whether we get str or bytes keys - ({b"path": "/a/b/c", b"mode": 0o666}, "/a/b/c", 0o666), - ({"path": "/a/b/c", "mode": 0o666}, "/a/b/c", 0o666), + ({b"path": "a/b/c", b"mode": 0o666}, "a/b/c", 0o666), + ({"path": "a/b/c", "mode": 0o666}, "a/b/c", 0o666), ], ) def test_item_from_dict(item_dict, path, mode): @@ -64,8 +64,8 @@ def test_item_invalid(invalid_item, error): def test_item_from_kw(): - item = Item(path="/a/b/c", mode=0o666) - assert item.path == "/a/b/c" + item = Item(path="a/b/c", mode=0o666) + assert item.path == "a/b/c" assert item.mode == 0o666 @@ -91,22 +91,22 @@ def test_item_mptimestamp_property(atime): def test_item_se_str_property(): # start simple item = Item() - item.path = "/a/b/c" - assert item.path == "/a/b/c" - assert item.as_dict() == {"path": "/a/b/c"} + item.path = "a/b/c" + assert item.path == "a/b/c" + assert item.as_dict() == {"path": "a/b/c"} del item.path assert item.as_dict() == {} with pytest.raises(TypeError): item.path = 42 # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut - item = Item(internal_dict={"path": b"/a/\xfc/c"}) - assert item.path == "/a/\udcfc/c" # getting a surrogate-escaped representation - assert item.as_dict() == {"path": "/a/\udcfc/c"} + item = Item(internal_dict={"path": b"a/\xfc/c"}) + assert item.path == "a/\udcfc/c" # getting a surrogate-escaped representation + assert item.as_dict() == {"path": "a/\udcfc/c"} del item.path assert "path" not in item - item.path = "/a/\udcfc/c" # setting using a surrogate-escaped representation - assert item.as_dict() == {"path": "/a/\udcfc/c"} + item.path = "a/\udcfc/c" # setting using a surrogate-escaped representation + assert item.as_dict() == {"path": "a/\udcfc/c"} def test_item_list_property():