implement "fail" chunker for testing purposes

--chunker-params=fail,4096,rrrEErrrr means: - cut chunks of 4096b fixed size (last chunk in a file can be less) - read chunks 0, 1 and 2 successfully - error at chunk 3 and 4 (simulated OSError(errno.EIO)) - read successfully again for the next 4 chunks Chunks are counted inside the chunker instance, starting from 0, always increasing while the same instance is used. Read chunks as well as failed chunks count up by 1.
2023-02-13 00:41:01 +01:00 · 2023-02-13 00:41:01 +01:00 · 7f973a5b34
parent 52040bc043
commit 7f973a5b34
6 changed files with 86 additions and 2 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -170,7 +170,7 @@ per_file_ignores =
    src/borg/testsuite/archiver/return_codes.py:F401,F405,F811
    src/borg/testsuite/benchmark.py:F401,F811
    src/borg/testsuite/chunker.py:E501,F405
-    src/borg/testsuite/chunker_pytest.py:F401
+    src/borg/testsuite/chunker_pytest.py:F401,F405
    src/borg/testsuite/chunker_slow.py:F405
    src/borg/testsuite/crypto.py:E126,E501,E741
    src/borg/testsuite/file_integrity.py:F401
--- a/src/borg/chunker.pyi
+++ b/src/borg/chunker.pyi
@ -17,6 +17,10 @@ fmap_entry = Tuple[int, int, bool]

 def sparsemap(fd: BinaryIO = None, fh: int = -1) -> List[fmap_entry]: ...

+class ChunkerFailing:
+    def __init__(self, block_size: int, map: str) -> None: ...
+    def chunkify(self, fd: BinaryIO = None, fh: int = -1) -> Iterator: ...
+
 class ChunkerFixed:
    def __init__(self, block_size: int, header_size: int = 0, sparse: bool = False) -> None: ...
    def chunkify(self, fd: BinaryIO = None, fh: int = -1, fmap: List[fmap_entry] = None) -> Iterator: ...
--- a/src/borg/chunker.pyx
+++ b/src/borg/chunker.pyx
@ -123,6 +123,53 @@ def sparsemap(fd=None, fh=-1):
        dseek(curr, os.SEEK_SET, fd, fh)


+class ChunkerFailing:
+    """
+    This is a very simple chunker for testing purposes.
+
+    Reads block_size chunks, starts failing at block <fail_start>, <fail_count> failures, then succeeds.
+    """
+    def __init__(self, block_size, map):
+        self.block_size = block_size
+        # one char per block: r/R = successful read, e/E = I/O Error, e.g.: "rrrrErrrEEr"
+        # blocks beyond the map will have same behaviour as the last map char indicates.
+        map = map.upper()
+        if not set(map).issubset({"R", "E"}):
+            raise ValueError("unsupported map character")
+        self.map = map
+        self.count = 0
+        self.chunking_time = 0.0  # not updated, just provided so that caller does not crash
+
+    def chunkify(self, fd=None, fh=-1):
+        """
+        Cut a file into chunks.
+
+        :param fd: Python file object
+        :param fh: OS-level file handle (if available),
+                   defaults to -1 which means not to use OS-level fd.
+        """
+        use_fh = fh >= 0
+        wanted = self.block_size
+        while True:
+            data = os.read(fh, wanted) if use_fh else fd.read(wanted)
+            got = len(data)
+            if got > 0:
+                idx = self.count if self.count < len(self.map) else -1
+                behaviour = self.map[idx]
+                if behaviour == "E":
+                    self.count += 1
+                    fname = None if use_fh else getattr(fd, "name", None)
+                    raise OSError(errno.EIO, "simulated I/O error", fname)
+                elif behaviour == "R":
+                    self.count += 1
+                    yield Chunk(data, size=got, allocation=CH_DATA)
+                else:
+                    raise ValueError("unsupported map character")
+            if got < wanted:
+                # we did not get enough data, looks like EOF.
+                return
+
+
 class ChunkerFixed:
    """
    This is a simple chunker for input data with data usually staying at same
@ -294,6 +341,8 @@ def get_chunker(algo, *params, **kw):
    if algo == 'fixed':
        sparse = kw['sparse']
        return ChunkerFixed(*params, sparse=sparse)
+    if algo == 'fail':
+        return ChunkerFailing(*params)
    raise TypeError('unsupported chunker algo %r' % algo)


--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@ -78,6 +78,7 @@ FD_MAX_AGE = 4 * 60  # 4 minutes
 # chunker algorithms
 CH_BUZHASH = "buzhash"
 CH_FIXED = "fixed"
+CH_FAIL = "fail"

 # buzhash chunker params
 CHUNK_MIN_EXP = 19  # 2**19 == 512kiB
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@ -139,6 +139,10 @@ def ChunkerParams(s):
    if count == 0:
        raise ValueError("no chunker params given")
    algo = params[0].lower()
+    if algo == CH_FAIL and count == 3:
+        block_size = int(params[1])
+        fail_map = str(params[2])
+        return algo, block_size, fail_map
    if algo == CH_FIXED and 2 <= count <= 3:  # fixed, block_size[, header_size]
        block_size = int(params[1])
        header_size = int(params[2]) if count == 3 else 0
--- a/src/borg/testsuite/chunker_pytest.py
+++ b/src/borg/testsuite/chunker_pytest.py
@ -5,7 +5,7 @@ import tempfile
 import pytest

 from .chunker import cf
-from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
+from ..chunker import ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing
 from ..constants import *  # NOQA

 BS = 4096  # fs block size
@ -125,3 +125,29 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
    fn = str(tmpdir / fname)
    make_sparsefile(fn, sparse_map, header_size=header_size)
    get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
+
+
+def test_chunker_failing():
+    SIZE = 4096
+    data = bytes(2 * SIZE + 1000)
+    chunker = ChunkerFailing(SIZE, "rEErrr")  # cut <SIZE> chunks, start failing at block 1, fail 2 times
+    with BytesIO(data) as fd:
+        ch = chunker.chunkify(fd)
+        c1 = next(ch)  # block 0: ok
+        assert c1.meta["allocation"] == CH_DATA
+        assert c1.data == data[:SIZE]
+        with pytest.raises(OSError):  # block 1: failure 1
+            next(ch)
+    with BytesIO(data) as fd:
+        ch = chunker.chunkify(fd)
+        with pytest.raises(OSError):  # block 2: failure 2
+            next(ch)
+    with BytesIO(data) as fd:
+        ch = chunker.chunkify(fd)
+        c1 = next(ch)  # block 3: success!
+        c2 = next(ch)  # block 4: success!
+        c3 = next(ch)  # block 5: success!
+        assert c1.meta["allocation"] == c2.meta["allocation"] == c3.meta["allocation"] == CH_DATA
+        assert c1.data == data[:SIZE]
+        assert c2.data == data[SIZE : 2 * SIZE]
+        assert c3.data == data[2 * SIZE :]