diff --git a/setup.cfg b/setup.cfg index 47cf3119c..99e81917a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -170,7 +170,7 @@ per_file_ignores = src/borg/testsuite/archiver/return_codes.py:F401,F405,F811 src/borg/testsuite/benchmark.py:F401,F811 src/borg/testsuite/chunker.py:E501,F405 - src/borg/testsuite/chunker_pytest.py:F401 + src/borg/testsuite/chunker_pytest.py:F401,F405 src/borg/testsuite/chunker_slow.py:F405 src/borg/testsuite/crypto.py:E126,E501,E741 src/borg/testsuite/file_integrity.py:F401 diff --git a/src/borg/chunker.pyi b/src/borg/chunker.pyi index 0d2e493f6..4d5d7d733 100644 --- a/src/borg/chunker.pyi +++ b/src/borg/chunker.pyi @@ -17,6 +17,10 @@ fmap_entry = Tuple[int, int, bool] def sparsemap(fd: BinaryIO = None, fh: int = -1) -> List[fmap_entry]: ... +class ChunkerFailing: + def __init__(self, block_size: int, map: str) -> None: ... + def chunkify(self, fd: BinaryIO = None, fh: int = -1) -> Iterator: ... + class ChunkerFixed: def __init__(self, block_size: int, header_size: int = 0, sparse: bool = False) -> None: ... def chunkify(self, fd: BinaryIO = None, fh: int = -1, fmap: List[fmap_entry] = None) -> Iterator: ... diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx index e2081c51c..d69976afd 100644 --- a/src/borg/chunker.pyx +++ b/src/borg/chunker.pyx @@ -123,6 +123,53 @@ def sparsemap(fd=None, fh=-1): dseek(curr, os.SEEK_SET, fd, fh) +class ChunkerFailing: + """ + This is a very simple chunker for testing purposes. + + Reads block_size chunks, starts failing at block , failures, then succeeds. + """ + def __init__(self, block_size, map): + self.block_size = block_size + # one char per block: r/R = successful read, e/E = I/O Error, e.g.: "rrrrErrrEEr" + # blocks beyond the map will have same behaviour as the last map char indicates. + map = map.upper() + if not set(map).issubset({"R", "E"}): + raise ValueError("unsupported map character") + self.map = map + self.count = 0 + self.chunking_time = 0.0 # not updated, just provided so that caller does not crash + + def chunkify(self, fd=None, fh=-1): + """ + Cut a file into chunks. + + :param fd: Python file object + :param fh: OS-level file handle (if available), + defaults to -1 which means not to use OS-level fd. + """ + use_fh = fh >= 0 + wanted = self.block_size + while True: + data = os.read(fh, wanted) if use_fh else fd.read(wanted) + got = len(data) + if got > 0: + idx = self.count if self.count < len(self.map) else -1 + behaviour = self.map[idx] + if behaviour == "E": + self.count += 1 + fname = None if use_fh else getattr(fd, "name", None) + raise OSError(errno.EIO, "simulated I/O error", fname) + elif behaviour == "R": + self.count += 1 + yield Chunk(data, size=got, allocation=CH_DATA) + else: + raise ValueError("unsupported map character") + if got < wanted: + # we did not get enough data, looks like EOF. + return + + class ChunkerFixed: """ This is a simple chunker for input data with data usually staying at same @@ -294,6 +341,8 @@ def get_chunker(algo, *params, **kw): if algo == 'fixed': sparse = kw['sparse'] return ChunkerFixed(*params, sparse=sparse) + if algo == 'fail': + return ChunkerFailing(*params) raise TypeError('unsupported chunker algo %r' % algo) diff --git a/src/borg/constants.py b/src/borg/constants.py index 46581c5a6..a5bde610f 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -78,6 +78,7 @@ # chunker algorithms CH_BUZHASH = "buzhash" CH_FIXED = "fixed" +CH_FAIL = "fail" # buzhash chunker params CHUNK_MIN_EXP = 19 # 2**19 == 512kiB diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 7d469ad4d..ca16f2509 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -139,6 +139,10 @@ def ChunkerParams(s): if count == 0: raise ValueError("no chunker params given") algo = params[0].lower() + if algo == CH_FAIL and count == 3: + block_size = int(params[1]) + fail_map = str(params[2]) + return algo, block_size, fail_map if algo == CH_FIXED and 2 <= count <= 3: # fixed, block_size[, header_size] block_size = int(params[1]) header_size = int(params[2]) if count == 3 else 0 diff --git a/src/borg/testsuite/archiver/create_cmd.py b/src/borg/testsuite/archiver/create_cmd.py index 82cf8a35b..63310b6b6 100644 --- a/src/borg/testsuite/archiver/create_cmd.py +++ b/src/borg/testsuite/archiver/create_cmd.py @@ -191,6 +191,32 @@ def test_create_stdin_checkpointing(self): out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True) assert out == input_data + def test_create_erroneous_file(self): + chunk_size = 1000 # fixed chunker with this size, also volume based checkpointing after that volume + self.create_regular_file(os.path.join(self.input_path, "file1"), size=chunk_size * 2) + self.create_regular_file(os.path.join(self.input_path, "file2"), size=chunk_size * 2) + self.create_regular_file(os.path.join(self.input_path, "file3"), size=chunk_size * 2) + self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) + flist = "".join(f"input/file{n}\n" for n in range(1, 4)) + out = self.cmd( + f"--repo={self.repository_location}", + "create", + f"--chunker-params=fail,{chunk_size},RRRERRR", + "--paths-from-stdin", + "--list", + "test", + input=flist.encode(), + exit_code=1, + ) + assert "E input/file2" in out + # repo looking good overall? checks for rc == 0. + self.cmd(f"--repo={self.repository_location}", "check", "--debug") + # check files in created archive + out = self.cmd(f"--repo={self.repository_location}", "list", "test") + assert "input/file1" in out + assert "input/file2" not in out + assert "input/file3" in out + def test_create_content_from_command(self): self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) input_data = "some test content" diff --git a/src/borg/testsuite/chunker_pytest.py b/src/borg/testsuite/chunker_pytest.py index b4161268d..0b7a788a6 100644 --- a/src/borg/testsuite/chunker_pytest.py +++ b/src/borg/testsuite/chunker_pytest.py @@ -5,7 +5,7 @@ import pytest from .chunker import cf -from ..chunker import ChunkerFixed, sparsemap, has_seek_hole +from ..chunker import ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing from ..constants import * # NOQA BS = 4096 # fs block size @@ -125,3 +125,29 @@ def get_chunks(fname, sparse, header_size): fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size) + + +def test_chunker_failing(): + SIZE = 4096 + data = bytes(2 * SIZE + 1000) + chunker = ChunkerFailing(SIZE, "rEErrr") # cut chunks, start failing at block 1, fail 2 times + with BytesIO(data) as fd: + ch = chunker.chunkify(fd) + c1 = next(ch) # block 0: ok + assert c1.meta["allocation"] == CH_DATA + assert c1.data == data[:SIZE] + with pytest.raises(OSError): # block 1: failure 1 + next(ch) + with BytesIO(data) as fd: + ch = chunker.chunkify(fd) + with pytest.raises(OSError): # block 2: failure 2 + next(ch) + with BytesIO(data) as fd: + ch = chunker.chunkify(fd) + c1 = next(ch) # block 3: success! + c2 = next(ch) # block 4: success! + c3 = next(ch) # block 5: success! + assert c1.meta["allocation"] == c2.meta["allocation"] == c3.meta["allocation"] == CH_DATA + assert c1.data == data[:SIZE] + assert c2.data == data[SIZE : 2 * SIZE] + assert c3.data == data[2 * SIZE :]