mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-25 15:33:39 +00:00
Merge pull request #7350 from ThomasWaldmann/testing-chunker
implement "fail" chunker for testing purposes
This commit is contained in:
commit
6cfe77ebaf
7 changed files with 112 additions and 2 deletions
|
@ -170,7 +170,7 @@ per_file_ignores =
|
|||
src/borg/testsuite/archiver/return_codes.py:F401,F405,F811
|
||||
src/borg/testsuite/benchmark.py:F401,F811
|
||||
src/borg/testsuite/chunker.py:E501,F405
|
||||
src/borg/testsuite/chunker_pytest.py:F401
|
||||
src/borg/testsuite/chunker_pytest.py:F401,F405
|
||||
src/borg/testsuite/chunker_slow.py:F405
|
||||
src/borg/testsuite/crypto.py:E126,E501,E741
|
||||
src/borg/testsuite/file_integrity.py:F401
|
||||
|
|
|
@ -17,6 +17,10 @@ fmap_entry = Tuple[int, int, bool]
|
|||
|
||||
def sparsemap(fd: BinaryIO = None, fh: int = -1) -> List[fmap_entry]: ...
|
||||
|
||||
class ChunkerFailing:
|
||||
def __init__(self, block_size: int, map: str) -> None: ...
|
||||
def chunkify(self, fd: BinaryIO = None, fh: int = -1) -> Iterator: ...
|
||||
|
||||
class ChunkerFixed:
|
||||
def __init__(self, block_size: int, header_size: int = 0, sparse: bool = False) -> None: ...
|
||||
def chunkify(self, fd: BinaryIO = None, fh: int = -1, fmap: List[fmap_entry] = None) -> Iterator: ...
|
||||
|
|
|
@ -123,6 +123,53 @@ def sparsemap(fd=None, fh=-1):
|
|||
dseek(curr, os.SEEK_SET, fd, fh)
|
||||
|
||||
|
||||
class ChunkerFailing:
|
||||
"""
|
||||
This is a very simple chunker for testing purposes.
|
||||
|
||||
Reads block_size chunks, starts failing at block <fail_start>, <fail_count> failures, then succeeds.
|
||||
"""
|
||||
def __init__(self, block_size, map):
|
||||
self.block_size = block_size
|
||||
# one char per block: r/R = successful read, e/E = I/O Error, e.g.: "rrrrErrrEEr"
|
||||
# blocks beyond the map will have same behaviour as the last map char indicates.
|
||||
map = map.upper()
|
||||
if not set(map).issubset({"R", "E"}):
|
||||
raise ValueError("unsupported map character")
|
||||
self.map = map
|
||||
self.count = 0
|
||||
self.chunking_time = 0.0 # not updated, just provided so that caller does not crash
|
||||
|
||||
def chunkify(self, fd=None, fh=-1):
|
||||
"""
|
||||
Cut a file into chunks.
|
||||
|
||||
:param fd: Python file object
|
||||
:param fh: OS-level file handle (if available),
|
||||
defaults to -1 which means not to use OS-level fd.
|
||||
"""
|
||||
use_fh = fh >= 0
|
||||
wanted = self.block_size
|
||||
while True:
|
||||
data = os.read(fh, wanted) if use_fh else fd.read(wanted)
|
||||
got = len(data)
|
||||
if got > 0:
|
||||
idx = self.count if self.count < len(self.map) else -1
|
||||
behaviour = self.map[idx]
|
||||
if behaviour == "E":
|
||||
self.count += 1
|
||||
fname = None if use_fh else getattr(fd, "name", None)
|
||||
raise OSError(errno.EIO, "simulated I/O error", fname)
|
||||
elif behaviour == "R":
|
||||
self.count += 1
|
||||
yield Chunk(data, size=got, allocation=CH_DATA)
|
||||
else:
|
||||
raise ValueError("unsupported map character")
|
||||
if got < wanted:
|
||||
# we did not get enough data, looks like EOF.
|
||||
return
|
||||
|
||||
|
||||
class ChunkerFixed:
|
||||
"""
|
||||
This is a simple chunker for input data with data usually staying at same
|
||||
|
@ -294,6 +341,8 @@ def get_chunker(algo, *params, **kw):
|
|||
if algo == 'fixed':
|
||||
sparse = kw['sparse']
|
||||
return ChunkerFixed(*params, sparse=sparse)
|
||||
if algo == 'fail':
|
||||
return ChunkerFailing(*params)
|
||||
raise TypeError('unsupported chunker algo %r' % algo)
|
||||
|
||||
|
||||
|
|
|
@ -78,6 +78,7 @@
|
|||
# chunker algorithms
|
||||
CH_BUZHASH = "buzhash"
|
||||
CH_FIXED = "fixed"
|
||||
CH_FAIL = "fail"
|
||||
|
||||
# buzhash chunker params
|
||||
CHUNK_MIN_EXP = 19 # 2**19 == 512kiB
|
||||
|
|
|
@ -139,6 +139,10 @@ def ChunkerParams(s):
|
|||
if count == 0:
|
||||
raise ValueError("no chunker params given")
|
||||
algo = params[0].lower()
|
||||
if algo == CH_FAIL and count == 3:
|
||||
block_size = int(params[1])
|
||||
fail_map = str(params[2])
|
||||
return algo, block_size, fail_map
|
||||
if algo == CH_FIXED and 2 <= count <= 3: # fixed, block_size[, header_size]
|
||||
block_size = int(params[1])
|
||||
header_size = int(params[2]) if count == 3 else 0
|
||||
|
|
|
@ -191,6 +191,32 @@ def test_create_stdin_checkpointing(self):
|
|||
out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True)
|
||||
assert out == input_data
|
||||
|
||||
def test_create_erroneous_file(self):
|
||||
chunk_size = 1000 # fixed chunker with this size, also volume based checkpointing after that volume
|
||||
self.create_regular_file(os.path.join(self.input_path, "file1"), size=chunk_size * 2)
|
||||
self.create_regular_file(os.path.join(self.input_path, "file2"), size=chunk_size * 2)
|
||||
self.create_regular_file(os.path.join(self.input_path, "file3"), size=chunk_size * 2)
|
||||
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
|
||||
flist = "".join(f"input/file{n}\n" for n in range(1, 4))
|
||||
out = self.cmd(
|
||||
f"--repo={self.repository_location}",
|
||||
"create",
|
||||
f"--chunker-params=fail,{chunk_size},RRRERRR",
|
||||
"--paths-from-stdin",
|
||||
"--list",
|
||||
"test",
|
||||
input=flist.encode(),
|
||||
exit_code=1,
|
||||
)
|
||||
assert "E input/file2" in out
|
||||
# repo looking good overall? checks for rc == 0.
|
||||
self.cmd(f"--repo={self.repository_location}", "check", "--debug")
|
||||
# check files in created archive
|
||||
out = self.cmd(f"--repo={self.repository_location}", "list", "test")
|
||||
assert "input/file1" in out
|
||||
assert "input/file2" not in out
|
||||
assert "input/file3" in out
|
||||
|
||||
def test_create_content_from_command(self):
|
||||
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
|
||||
input_data = "some test content"
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
import pytest
|
||||
|
||||
from .chunker import cf
|
||||
from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
|
||||
from ..chunker import ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing
|
||||
from ..constants import * # NOQA
|
||||
|
||||
BS = 4096 # fs block size
|
||||
|
@ -125,3 +125,29 @@ def get_chunks(fname, sparse, header_size):
|
|||
fn = str(tmpdir / fname)
|
||||
make_sparsefile(fn, sparse_map, header_size=header_size)
|
||||
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
|
||||
|
||||
|
||||
def test_chunker_failing():
|
||||
SIZE = 4096
|
||||
data = bytes(2 * SIZE + 1000)
|
||||
chunker = ChunkerFailing(SIZE, "rEErrr") # cut <SIZE> chunks, start failing at block 1, fail 2 times
|
||||
with BytesIO(data) as fd:
|
||||
ch = chunker.chunkify(fd)
|
||||
c1 = next(ch) # block 0: ok
|
||||
assert c1.meta["allocation"] == CH_DATA
|
||||
assert c1.data == data[:SIZE]
|
||||
with pytest.raises(OSError): # block 1: failure 1
|
||||
next(ch)
|
||||
with BytesIO(data) as fd:
|
||||
ch = chunker.chunkify(fd)
|
||||
with pytest.raises(OSError): # block 2: failure 2
|
||||
next(ch)
|
||||
with BytesIO(data) as fd:
|
||||
ch = chunker.chunkify(fd)
|
||||
c1 = next(ch) # block 3: success!
|
||||
c2 = next(ch) # block 4: success!
|
||||
c3 = next(ch) # block 5: success!
|
||||
assert c1.meta["allocation"] == c2.meta["allocation"] == c3.meta["allocation"] == CH_DATA
|
||||
assert c1.data == data[:SIZE]
|
||||
assert c2.data == data[SIZE : 2 * SIZE]
|
||||
assert c3.data == data[2 * SIZE :]
|
||||
|
|
Loading…
Reference in a new issue