1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-25 15:33:39 +00:00

Merge pull request #7350 from ThomasWaldmann/testing-chunker

implement "fail" chunker for testing purposes
This commit is contained in:
TW 2023-02-13 18:17:56 +01:00 committed by GitHub
commit 6cfe77ebaf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 112 additions and 2 deletions

View file

@ -170,7 +170,7 @@ per_file_ignores =
src/borg/testsuite/archiver/return_codes.py:F401,F405,F811
src/borg/testsuite/benchmark.py:F401,F811
src/borg/testsuite/chunker.py:E501,F405
src/borg/testsuite/chunker_pytest.py:F401
src/borg/testsuite/chunker_pytest.py:F401,F405
src/borg/testsuite/chunker_slow.py:F405
src/borg/testsuite/crypto.py:E126,E501,E741
src/borg/testsuite/file_integrity.py:F401

View file

@ -17,6 +17,10 @@ fmap_entry = Tuple[int, int, bool]
def sparsemap(fd: BinaryIO = None, fh: int = -1) -> List[fmap_entry]: ...
class ChunkerFailing:
def __init__(self, block_size: int, map: str) -> None: ...
def chunkify(self, fd: BinaryIO = None, fh: int = -1) -> Iterator: ...
class ChunkerFixed:
def __init__(self, block_size: int, header_size: int = 0, sparse: bool = False) -> None: ...
def chunkify(self, fd: BinaryIO = None, fh: int = -1, fmap: List[fmap_entry] = None) -> Iterator: ...

View file

@ -123,6 +123,53 @@ def sparsemap(fd=None, fh=-1):
dseek(curr, os.SEEK_SET, fd, fh)
class ChunkerFailing:
"""
This is a very simple chunker for testing purposes.
Reads block_size chunks, starts failing at block <fail_start>, <fail_count> failures, then succeeds.
"""
def __init__(self, block_size, map):
self.block_size = block_size
# one char per block: r/R = successful read, e/E = I/O Error, e.g.: "rrrrErrrEEr"
# blocks beyond the map will have same behaviour as the last map char indicates.
map = map.upper()
if not set(map).issubset({"R", "E"}):
raise ValueError("unsupported map character")
self.map = map
self.count = 0
self.chunking_time = 0.0 # not updated, just provided so that caller does not crash
def chunkify(self, fd=None, fh=-1):
"""
Cut a file into chunks.
:param fd: Python file object
:param fh: OS-level file handle (if available),
defaults to -1 which means not to use OS-level fd.
"""
use_fh = fh >= 0
wanted = self.block_size
while True:
data = os.read(fh, wanted) if use_fh else fd.read(wanted)
got = len(data)
if got > 0:
idx = self.count if self.count < len(self.map) else -1
behaviour = self.map[idx]
if behaviour == "E":
self.count += 1
fname = None if use_fh else getattr(fd, "name", None)
raise OSError(errno.EIO, "simulated I/O error", fname)
elif behaviour == "R":
self.count += 1
yield Chunk(data, size=got, allocation=CH_DATA)
else:
raise ValueError("unsupported map character")
if got < wanted:
# we did not get enough data, looks like EOF.
return
class ChunkerFixed:
"""
This is a simple chunker for input data with data usually staying at same
@ -294,6 +341,8 @@ def get_chunker(algo, *params, **kw):
if algo == 'fixed':
sparse = kw['sparse']
return ChunkerFixed(*params, sparse=sparse)
if algo == 'fail':
return ChunkerFailing(*params)
raise TypeError('unsupported chunker algo %r' % algo)

View file

@ -78,6 +78,7 @@
# chunker algorithms
CH_BUZHASH = "buzhash"
CH_FIXED = "fixed"
CH_FAIL = "fail"
# buzhash chunker params
CHUNK_MIN_EXP = 19 # 2**19 == 512kiB

View file

@ -139,6 +139,10 @@ def ChunkerParams(s):
if count == 0:
raise ValueError("no chunker params given")
algo = params[0].lower()
if algo == CH_FAIL and count == 3:
block_size = int(params[1])
fail_map = str(params[2])
return algo, block_size, fail_map
if algo == CH_FIXED and 2 <= count <= 3: # fixed, block_size[, header_size]
block_size = int(params[1])
header_size = int(params[2]) if count == 3 else 0

View file

@ -191,6 +191,32 @@ def test_create_stdin_checkpointing(self):
out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True)
assert out == input_data
def test_create_erroneous_file(self):
chunk_size = 1000 # fixed chunker with this size, also volume based checkpointing after that volume
self.create_regular_file(os.path.join(self.input_path, "file1"), size=chunk_size * 2)
self.create_regular_file(os.path.join(self.input_path, "file2"), size=chunk_size * 2)
self.create_regular_file(os.path.join(self.input_path, "file3"), size=chunk_size * 2)
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
flist = "".join(f"input/file{n}\n" for n in range(1, 4))
out = self.cmd(
f"--repo={self.repository_location}",
"create",
f"--chunker-params=fail,{chunk_size},RRRERRR",
"--paths-from-stdin",
"--list",
"test",
input=flist.encode(),
exit_code=1,
)
assert "E input/file2" in out
# repo looking good overall? checks for rc == 0.
self.cmd(f"--repo={self.repository_location}", "check", "--debug")
# check files in created archive
out = self.cmd(f"--repo={self.repository_location}", "list", "test")
assert "input/file1" in out
assert "input/file2" not in out
assert "input/file3" in out
def test_create_content_from_command(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
input_data = "some test content"

View file

@ -5,7 +5,7 @@
import pytest
from .chunker import cf
from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
from ..chunker import ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing
from ..constants import * # NOQA
BS = 4096 # fs block size
@ -125,3 +125,29 @@ def get_chunks(fname, sparse, header_size):
fn = str(tmpdir / fname)
make_sparsefile(fn, sparse_map, header_size=header_size)
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
def test_chunker_failing():
SIZE = 4096
data = bytes(2 * SIZE + 1000)
chunker = ChunkerFailing(SIZE, "rEErrr") # cut <SIZE> chunks, start failing at block 1, fail 2 times
with BytesIO(data) as fd:
ch = chunker.chunkify(fd)
c1 = next(ch) # block 0: ok
assert c1.meta["allocation"] == CH_DATA
assert c1.data == data[:SIZE]
with pytest.raises(OSError): # block 1: failure 1
next(ch)
with BytesIO(data) as fd:
ch = chunker.chunkify(fd)
with pytest.raises(OSError): # block 2: failure 2
next(ch)
with BytesIO(data) as fd:
ch = chunker.chunkify(fd)
c1 = next(ch) # block 3: success!
c2 = next(ch) # block 4: success!
c3 = next(ch) # block 5: success!
assert c1.meta["allocation"] == c2.meta["allocation"] == c3.meta["allocation"] == CH_DATA
assert c1.data == data[:SIZE]
assert c2.data == data[SIZE : 2 * SIZE]
assert c3.data == data[2 * SIZE :]