From 7f973a5b34d5c6a40cbac0189c28ceedb5fec7d4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Feb 2023 00:41:01 +0100 Subject: [PATCH 1/2] implement "fail" chunker for testing purposes --chunker-params=fail,4096,rrrEErrrr means: - cut chunks of 4096b fixed size (last chunk in a file can be less) - read chunks 0, 1 and 2 successfully - error at chunk 3 and 4 (simulated OSError(errno.EIO)) - read successfully again for the next 4 chunks Chunks are counted inside the chunker instance, starting from 0, always increasing while the same instance is used. Read chunks as well as failed chunks count up by 1. --- setup.cfg | 2 +- src/borg/chunker.pyi | 4 +++ src/borg/chunker.pyx | 49 ++++++++++++++++++++++++++++ src/borg/constants.py | 1 + src/borg/helpers/parseformat.py | 4 +++ src/borg/testsuite/chunker_pytest.py | 28 +++++++++++++++- 6 files changed, 86 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 47cf3119c..99e81917a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -170,7 +170,7 @@ per_file_ignores = src/borg/testsuite/archiver/return_codes.py:F401,F405,F811 src/borg/testsuite/benchmark.py:F401,F811 src/borg/testsuite/chunker.py:E501,F405 - src/borg/testsuite/chunker_pytest.py:F401 + src/borg/testsuite/chunker_pytest.py:F401,F405 src/borg/testsuite/chunker_slow.py:F405 src/borg/testsuite/crypto.py:E126,E501,E741 src/borg/testsuite/file_integrity.py:F401 diff --git a/src/borg/chunker.pyi b/src/borg/chunker.pyi index 0d2e493f6..4d5d7d733 100644 --- a/src/borg/chunker.pyi +++ b/src/borg/chunker.pyi @@ -17,6 +17,10 @@ fmap_entry = Tuple[int, int, bool] def sparsemap(fd: BinaryIO = None, fh: int = -1) -> List[fmap_entry]: ... +class ChunkerFailing: + def __init__(self, block_size: int, map: str) -> None: ... + def chunkify(self, fd: BinaryIO = None, fh: int = -1) -> Iterator: ... + class ChunkerFixed: def __init__(self, block_size: int, header_size: int = 0, sparse: bool = False) -> None: ... def chunkify(self, fd: BinaryIO = None, fh: int = -1, fmap: List[fmap_entry] = None) -> Iterator: ... diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx index e2081c51c..d69976afd 100644 --- a/src/borg/chunker.pyx +++ b/src/borg/chunker.pyx @@ -123,6 +123,53 @@ def sparsemap(fd=None, fh=-1): dseek(curr, os.SEEK_SET, fd, fh) +class ChunkerFailing: + """ + This is a very simple chunker for testing purposes. + + Reads block_size chunks, starts failing at block , failures, then succeeds. + """ + def __init__(self, block_size, map): + self.block_size = block_size + # one char per block: r/R = successful read, e/E = I/O Error, e.g.: "rrrrErrrEEr" + # blocks beyond the map will have same behaviour as the last map char indicates. + map = map.upper() + if not set(map).issubset({"R", "E"}): + raise ValueError("unsupported map character") + self.map = map + self.count = 0 + self.chunking_time = 0.0 # not updated, just provided so that caller does not crash + + def chunkify(self, fd=None, fh=-1): + """ + Cut a file into chunks. + + :param fd: Python file object + :param fh: OS-level file handle (if available), + defaults to -1 which means not to use OS-level fd. + """ + use_fh = fh >= 0 + wanted = self.block_size + while True: + data = os.read(fh, wanted) if use_fh else fd.read(wanted) + got = len(data) + if got > 0: + idx = self.count if self.count < len(self.map) else -1 + behaviour = self.map[idx] + if behaviour == "E": + self.count += 1 + fname = None if use_fh else getattr(fd, "name", None) + raise OSError(errno.EIO, "simulated I/O error", fname) + elif behaviour == "R": + self.count += 1 + yield Chunk(data, size=got, allocation=CH_DATA) + else: + raise ValueError("unsupported map character") + if got < wanted: + # we did not get enough data, looks like EOF. + return + + class ChunkerFixed: """ This is a simple chunker for input data with data usually staying at same @@ -294,6 +341,8 @@ def get_chunker(algo, *params, **kw): if algo == 'fixed': sparse = kw['sparse'] return ChunkerFixed(*params, sparse=sparse) + if algo == 'fail': + return ChunkerFailing(*params) raise TypeError('unsupported chunker algo %r' % algo) diff --git a/src/borg/constants.py b/src/borg/constants.py index 46581c5a6..a5bde610f 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -78,6 +78,7 @@ # chunker algorithms CH_BUZHASH = "buzhash" CH_FIXED = "fixed" +CH_FAIL = "fail" # buzhash chunker params CHUNK_MIN_EXP = 19 # 2**19 == 512kiB diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 7d469ad4d..ca16f2509 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -139,6 +139,10 @@ def ChunkerParams(s): if count == 0: raise ValueError("no chunker params given") algo = params[0].lower() + if algo == CH_FAIL and count == 3: + block_size = int(params[1]) + fail_map = str(params[2]) + return algo, block_size, fail_map if algo == CH_FIXED and 2 <= count <= 3: # fixed, block_size[, header_size] block_size = int(params[1]) header_size = int(params[2]) if count == 3 else 0 diff --git a/src/borg/testsuite/chunker_pytest.py b/src/borg/testsuite/chunker_pytest.py index b4161268d..0b7a788a6 100644 --- a/src/borg/testsuite/chunker_pytest.py +++ b/src/borg/testsuite/chunker_pytest.py @@ -5,7 +5,7 @@ import pytest from .chunker import cf -from ..chunker import ChunkerFixed, sparsemap, has_seek_hole +from ..chunker import ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing from ..constants import * # NOQA BS = 4096 # fs block size @@ -125,3 +125,29 @@ def get_chunks(fname, sparse, header_size): fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size) + + +def test_chunker_failing(): + SIZE = 4096 + data = bytes(2 * SIZE + 1000) + chunker = ChunkerFailing(SIZE, "rEErrr") # cut chunks, start failing at block 1, fail 2 times + with BytesIO(data) as fd: + ch = chunker.chunkify(fd) + c1 = next(ch) # block 0: ok + assert c1.meta["allocation"] == CH_DATA + assert c1.data == data[:SIZE] + with pytest.raises(OSError): # block 1: failure 1 + next(ch) + with BytesIO(data) as fd: + ch = chunker.chunkify(fd) + with pytest.raises(OSError): # block 2: failure 2 + next(ch) + with BytesIO(data) as fd: + ch = chunker.chunkify(fd) + c1 = next(ch) # block 3: success! + c2 = next(ch) # block 4: success! + c3 = next(ch) # block 5: success! + assert c1.meta["allocation"] == c2.meta["allocation"] == c3.meta["allocation"] == CH_DATA + assert c1.data == data[:SIZE] + assert c2.data == data[SIZE : 2 * SIZE] + assert c3.data == data[2 * SIZE :] From 11fd6afb0fea28105d72c974e57dd7000ac4e8cc Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Feb 2023 01:35:26 +0100 Subject: [PATCH 2/2] use "fail" chunker to test erroneous input file skipping if a file can't be read (like here: there is a simulated I/O error in the 2nd chunk of file2), it should be logged with "E" status, skipped and backup shall proceed with next file(s). also, check that the repo has no orphan chunks (exception handling code needs to deal with 1st chunk of file2 which already has been written / incref'd in the repo). --- src/borg/testsuite/archiver/create_cmd.py | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/borg/testsuite/archiver/create_cmd.py b/src/borg/testsuite/archiver/create_cmd.py index 82cf8a35b..63310b6b6 100644 --- a/src/borg/testsuite/archiver/create_cmd.py +++ b/src/borg/testsuite/archiver/create_cmd.py @@ -191,6 +191,32 @@ def test_create_stdin_checkpointing(self): out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True) assert out == input_data + def test_create_erroneous_file(self): + chunk_size = 1000 # fixed chunker with this size, also volume based checkpointing after that volume + self.create_regular_file(os.path.join(self.input_path, "file1"), size=chunk_size * 2) + self.create_regular_file(os.path.join(self.input_path, "file2"), size=chunk_size * 2) + self.create_regular_file(os.path.join(self.input_path, "file3"), size=chunk_size * 2) + self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) + flist = "".join(f"input/file{n}\n" for n in range(1, 4)) + out = self.cmd( + f"--repo={self.repository_location}", + "create", + f"--chunker-params=fail,{chunk_size},RRRERRR", + "--paths-from-stdin", + "--list", + "test", + input=flist.encode(), + exit_code=1, + ) + assert "E input/file2" in out + # repo looking good overall? checks for rc == 0. + self.cmd(f"--repo={self.repository_location}", "check", "--debug") + # check files in created archive + out = self.cmd(f"--repo={self.repository_location}", "list", "test") + assert "input/file1" in out + assert "input/file2" not in out + assert "input/file3" in out + def test_create_content_from_command(self): self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION) input_data = "some test content"