mirror of https://github.com/borgbackup/borg.git
new read_data param for repository.get() and .get_many()
True (default): return full chunk (client can decrypt meta and data) False: return enough so client can decrypt only the meta
This commit is contained in:
parent
74ffceabf4
commit
106abbe4d9
|
@ -1001,12 +1001,12 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
|
|||
def flags_many(self, ids, mask=0xFFFFFFFF, value=None):
|
||||
"""actual remoting is done via self.call in the @api decorator"""
|
||||
|
||||
def get(self, id):
|
||||
for resp in self.get_many([id]):
|
||||
def get(self, id, read_data=True):
|
||||
for resp in self.get_many([id], read_data=read_data):
|
||||
return resp
|
||||
|
||||
def get_many(self, ids, is_preloaded=False):
|
||||
yield from self.call_many("get", [{"id": id} for id in ids], is_preloaded=is_preloaded)
|
||||
def get_many(self, ids, read_data=True, is_preloaded=False):
|
||||
yield from self.call_many("get", [{"id": id, "read_data": read_data} for id in ids], is_preloaded=is_preloaded)
|
||||
|
||||
@api(since=parse_version("1.0.0"))
|
||||
def put(self, id, data, wait=True):
|
||||
|
@ -1148,11 +1148,11 @@ class RepositoryNoCache:
|
|||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def get(self, key):
|
||||
return next(self.get_many([key], cache=False))
|
||||
def get(self, key, read_data=True):
|
||||
return next(self.get_many([key], read_data=read_data, cache=False))
|
||||
|
||||
def get_many(self, keys, cache=True):
|
||||
for key, data in zip(keys, self.repository.get_many(keys)):
|
||||
def get_many(self, keys, read_data=True, cache=True):
|
||||
for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data)):
|
||||
yield self.transform(key, data)
|
||||
|
||||
def log_instrumentation(self):
|
||||
|
@ -1250,9 +1250,11 @@ class RepositoryCache(RepositoryNoCache):
|
|||
self.cache.clear()
|
||||
shutil.rmtree(self.basedir)
|
||||
|
||||
def get_many(self, keys, cache=True):
|
||||
def get_many(self, keys, read_data=True, cache=True):
|
||||
# TODO: this currently always requests the full chunk from self.repository (read_data=True).
|
||||
# It could use different cache keys depending on read_data and cache full vs. meta-only chunks.
|
||||
unknown_keys = [key for key in keys if key not in self.cache]
|
||||
repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys))
|
||||
repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys, read_data=True))
|
||||
for key in keys:
|
||||
if key in self.cache:
|
||||
file = self.key_filename(key)
|
||||
|
@ -1269,7 +1271,7 @@ class RepositoryCache(RepositoryNoCache):
|
|||
else:
|
||||
# slow path: eviction during this get_many removed this key from the cache
|
||||
t0 = time.perf_counter()
|
||||
data = self.repository.get(key)
|
||||
data = self.repository.get(key, read_data=True)
|
||||
self.slow_lat += time.perf_counter() - t0
|
||||
transformed = self.add_entry(key, data, cache)
|
||||
self.slow_misses += 1
|
||||
|
|
|
@ -25,6 +25,7 @@ from .locking import Lock, LockError, LockErrorT
|
|||
from .logger import create_logger
|
||||
from .manifest import Manifest
|
||||
from .platform import SaveFile, SyncFile, sync_dir, safe_fadvise
|
||||
from .repoobj import RepoObj
|
||||
from .checksums import crc32, StreamingXXH64
|
||||
from .crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
|
||||
|
||||
|
@ -1268,18 +1269,18 @@ class Repository:
|
|||
def flags_many(self, ids, mask=0xFFFFFFFF, value=None):
|
||||
return [self.flags(id_, mask, value) for id_ in ids]
|
||||
|
||||
def get(self, id):
|
||||
def get(self, id, read_data=True):
|
||||
if not self.index:
|
||||
self.index = self.open_index(self.get_transaction_id())
|
||||
try:
|
||||
in_index = NSIndexEntry(*((self.index[id] + (None,))[:3])) # legacy: index entries have no size element
|
||||
return self.io.read(in_index.segment, in_index.offset, id, expected_size=in_index.size)
|
||||
return self.io.read(in_index.segment, in_index.offset, id, expected_size=in_index.size, read_data=read_data)
|
||||
except KeyError:
|
||||
raise self.ObjectNotFound(id, self.path) from None
|
||||
|
||||
def get_many(self, ids, is_preloaded=False):
|
||||
def get_many(self, ids, read_data=True, is_preloaded=False):
|
||||
for id_ in ids:
|
||||
yield self.get(id_)
|
||||
yield self.get(id_, read_data=read_data)
|
||||
|
||||
def put(self, id, data, wait=True):
|
||||
"""put a repo object
|
||||
|
@ -1659,13 +1660,12 @@ class LoggedIO:
|
|||
|
||||
See the _read() docstring about confidence in the returned data.
|
||||
"""
|
||||
assert read_data is True # False is not used (yet)
|
||||
if segment == self.segment and self._write_fd:
|
||||
self._write_fd.sync()
|
||||
fd = self.get_fd(segment)
|
||||
fd.seek(offset)
|
||||
header = fd.read(self.header_fmt.size)
|
||||
size, tag, key, data = self._read(fd, header, segment, offset, (TAG_PUT2, TAG_PUT), read_data)
|
||||
size, tag, key, data = self._read(fd, header, segment, offset, (TAG_PUT2, TAG_PUT), read_data=read_data)
|
||||
if id != key:
|
||||
raise IntegrityError(
|
||||
"Invalid segment entry header, is not for wanted id [segment {}, offset {}]".format(segment, offset)
|
||||
|
@ -1686,6 +1686,11 @@ class LoggedIO:
|
|||
PUT2 tags, read_data == False: crc32 check (header)
|
||||
PUT tags, read_data == True: crc32 check (header+data)
|
||||
PUT tags, read_data == False: crc32 check can not be done, all data obtained must be considered informational
|
||||
|
||||
read_data == False behaviour:
|
||||
PUT2 tags: return enough of the chunk so that the client is able to decrypt the metadata,
|
||||
do not read, but just seek over the data.
|
||||
PUT tags: return None and just seek over the data.
|
||||
"""
|
||||
|
||||
def check_crc32(wanted, header, *data):
|
||||
|
@ -1746,7 +1751,31 @@ class LoggedIO:
|
|||
f"expected {self.ENTRY_HASH_SIZE}, got {len(entry_hash)} bytes"
|
||||
)
|
||||
check_crc32(crc, header, key, entry_hash)
|
||||
if not read_data: # seek over data
|
||||
if not read_data:
|
||||
if tag == TAG_PUT2:
|
||||
# PUT2 is only used in new repos and they also have different RepoObj layout,
|
||||
# supporting separately encrypted metadata and data.
|
||||
# In this case, we return enough bytes so the client can decrypt the metadata
|
||||
# and seek over the rest (over the encrypted data).
|
||||
meta_len_size = RepoObj.meta_len_hdr.size
|
||||
meta_len = fd.read(meta_len_size)
|
||||
length -= meta_len_size
|
||||
if len(meta_len) != meta_len_size:
|
||||
raise IntegrityError(
|
||||
f"Segment entry meta length short read [segment {segment}, offset {offset}]: "
|
||||
f"expected {meta_len_size}, got {len(meta_len)} bytes"
|
||||
)
|
||||
ml = RepoObj.meta_len_hdr.unpack(meta_len)[0]
|
||||
meta = fd.read(ml)
|
||||
length -= ml
|
||||
if len(meta) != ml:
|
||||
raise IntegrityError(
|
||||
f"Segment entry meta short read [segment {segment}, offset {offset}]: "
|
||||
f"expected {ml}, got {len(meta)} bytes"
|
||||
)
|
||||
data = meta_len + meta # shortened chunk - enough so the client can decrypt the metadata
|
||||
# we do not have a checksum for this data, but the client's AEAD crypto will check it.
|
||||
# in any case, we see over the remainder of the chunk
|
||||
oldpos = fd.tell()
|
||||
seeked = fd.seek(length, os.SEEK_CUR) - oldpos
|
||||
if seeked != length:
|
||||
|
|
|
@ -15,6 +15,7 @@ from ..helpers import msgpack
|
|||
from ..locking import Lock, LockFailed
|
||||
from ..remote import RemoteRepository, InvalidRPCMethod, PathNotAllowed, handle_remote_line
|
||||
from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT2, TAG_PUT, TAG_COMMIT
|
||||
from ..repoobj import RepoObj
|
||||
from . import BaseTestCase
|
||||
from .hashindex import H
|
||||
|
||||
|
@ -22,6 +23,29 @@ from .hashindex import H
|
|||
UNSPECIFIED = object() # for default values where we can't use None
|
||||
|
||||
|
||||
def fchunk(data, meta=b""):
|
||||
# create a raw chunk that has valid RepoObj layout, but does not use encryption or compression.
|
||||
meta_len = RepoObj.meta_len_hdr.pack(len(meta))
|
||||
assert isinstance(data, bytes)
|
||||
chunk = meta_len + meta + data
|
||||
return chunk
|
||||
|
||||
|
||||
def pchunk(chunk):
|
||||
# parse data and meta from a raw chunk made by fchunk
|
||||
meta_len_size = RepoObj.meta_len_hdr.size
|
||||
meta_len = chunk[:meta_len_size]
|
||||
meta_len = RepoObj.meta_len_hdr.unpack(meta_len)[0]
|
||||
meta = chunk[meta_len_size : meta_len_size + meta_len]
|
||||
data = chunk[meta_len_size + meta_len :]
|
||||
return data, meta
|
||||
|
||||
|
||||
def pdchunk(chunk):
|
||||
# parse only data from a raw chunk made by fchunk
|
||||
return pchunk(chunk)[0]
|
||||
|
||||
|
||||
class RepositoryTestCaseBase(BaseTestCase):
|
||||
key_size = 32
|
||||
exclusive = True
|
||||
|
@ -46,12 +70,12 @@ class RepositoryTestCaseBase(BaseTestCase):
|
|||
self.repository = self.open(exclusive=exclusive)
|
||||
|
||||
def add_keys(self):
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(1), b"bar")
|
||||
self.repository.put(H(3), b"bar")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.put(H(1), fchunk(b"bar"))
|
||||
self.repository.put(H(3), fchunk(b"bar"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.put(H(1), b"bar2")
|
||||
self.repository.put(H(2), b"boo")
|
||||
self.repository.put(H(1), fchunk(b"bar2"))
|
||||
self.repository.put(H(2), fchunk(b"boo"))
|
||||
self.repository.delete(H(3))
|
||||
|
||||
def repo_dump(self, label=None):
|
||||
|
@ -68,9 +92,9 @@ class RepositoryTestCaseBase(BaseTestCase):
|
|||
class RepositoryTestCase(RepositoryTestCaseBase):
|
||||
def test1(self):
|
||||
for x in range(100):
|
||||
self.repository.put(H(x), b"SOMEDATA")
|
||||
self.repository.put(H(x), fchunk(b"SOMEDATA"))
|
||||
key50 = H(50)
|
||||
self.assert_equal(self.repository.get(key50), b"SOMEDATA")
|
||||
self.assert_equal(pdchunk(self.repository.get(key50)), b"SOMEDATA")
|
||||
self.repository.delete(key50)
|
||||
self.assert_raises(Repository.ObjectNotFound, lambda: self.repository.get(key50))
|
||||
self.repository.commit(compact=False)
|
||||
|
@ -80,55 +104,66 @@ class RepositoryTestCase(RepositoryTestCaseBase):
|
|||
for x in range(100):
|
||||
if x == 50:
|
||||
continue
|
||||
self.assert_equal(repository2.get(H(x)), b"SOMEDATA")
|
||||
self.assert_equal(pdchunk(repository2.get(H(x))), b"SOMEDATA")
|
||||
|
||||
def test2(self):
|
||||
"""Test multiple sequential transactions"""
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(1), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.put(H(1), fchunk(b"foo"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.delete(H(0))
|
||||
self.repository.put(H(1), b"bar")
|
||||
self.repository.put(H(1), fchunk(b"bar"))
|
||||
self.repository.commit(compact=False)
|
||||
self.assert_equal(self.repository.get(H(1)), b"bar")
|
||||
self.assert_equal(pdchunk(self.repository.get(H(1))), b"bar")
|
||||
|
||||
def test_read_data(self):
|
||||
meta, data = b"meta", b"data"
|
||||
meta_len = RepoObj.meta_len_hdr.pack(len(meta))
|
||||
chunk_complete = meta_len + meta + data
|
||||
chunk_short = meta_len + meta
|
||||
self.repository.put(H(0), chunk_complete)
|
||||
self.repository.commit(compact=False)
|
||||
self.assert_equal(self.repository.get(H(0)), chunk_complete)
|
||||
self.assert_equal(self.repository.get(H(0), read_data=True), chunk_complete)
|
||||
self.assert_equal(self.repository.get(H(0), read_data=False), chunk_short)
|
||||
|
||||
def test_consistency(self):
|
||||
"""Test cache consistency"""
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.assert_equal(self.repository.get(H(0)), b"foo")
|
||||
self.repository.put(H(0), b"foo2")
|
||||
self.assert_equal(self.repository.get(H(0)), b"foo2")
|
||||
self.repository.put(H(0), b"bar")
|
||||
self.assert_equal(self.repository.get(H(0)), b"bar")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo2"))
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), b"foo2")
|
||||
self.repository.put(H(0), fchunk(b"bar"))
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), b"bar")
|
||||
self.repository.delete(H(0))
|
||||
self.assert_raises(Repository.ObjectNotFound, lambda: self.repository.get(H(0)))
|
||||
|
||||
def test_consistency2(self):
|
||||
"""Test cache consistency2"""
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.assert_equal(self.repository.get(H(0)), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), b"foo")
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.put(H(0), b"foo2")
|
||||
self.assert_equal(self.repository.get(H(0)), b"foo2")
|
||||
self.repository.put(H(0), fchunk(b"foo2"))
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), b"foo2")
|
||||
self.repository.rollback()
|
||||
self.assert_equal(self.repository.get(H(0)), b"foo")
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), b"foo")
|
||||
|
||||
def test_overwrite_in_same_transaction(self):
|
||||
"""Test cache consistency2"""
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(0), b"foo2")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.put(H(0), fchunk(b"foo2"))
|
||||
self.repository.commit(compact=False)
|
||||
self.assert_equal(self.repository.get(H(0)), b"foo2")
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), b"foo2")
|
||||
|
||||
def test_single_kind_transactions(self):
|
||||
# put
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.close()
|
||||
# replace
|
||||
self.repository = self.open()
|
||||
with self.repository:
|
||||
self.repository.put(H(0), b"bar")
|
||||
self.repository.put(H(0), fchunk(b"bar"))
|
||||
self.repository.commit(compact=False)
|
||||
# delete
|
||||
self.repository = self.open()
|
||||
|
@ -138,7 +173,7 @@ class RepositoryTestCase(RepositoryTestCaseBase):
|
|||
|
||||
def test_list(self):
|
||||
for x in range(100):
|
||||
self.repository.put(H(x), b"SOMEDATA")
|
||||
self.repository.put(H(x), fchunk(b"SOMEDATA"))
|
||||
self.repository.commit(compact=False)
|
||||
all = self.repository.list()
|
||||
self.assert_equal(len(all), 100)
|
||||
|
@ -152,7 +187,7 @@ class RepositoryTestCase(RepositoryTestCaseBase):
|
|||
|
||||
def test_scan(self):
|
||||
for x in range(100):
|
||||
self.repository.put(H(x), b"SOMEDATA")
|
||||
self.repository.put(H(x), fchunk(b"SOMEDATA"))
|
||||
self.repository.commit(compact=False)
|
||||
all = self.repository.scan()
|
||||
assert len(all) == 100
|
||||
|
@ -168,14 +203,14 @@ class RepositoryTestCase(RepositoryTestCaseBase):
|
|||
assert all[x] == H(x)
|
||||
|
||||
def test_max_data_size(self):
|
||||
max_data = b"x" * MAX_DATA_SIZE
|
||||
self.repository.put(H(0), max_data)
|
||||
self.assert_equal(self.repository.get(H(0)), max_data)
|
||||
self.assert_raises(IntegrityError, lambda: self.repository.put(H(1), max_data + b"x"))
|
||||
max_data = b"x" * (MAX_DATA_SIZE - RepoObj.meta_len_hdr.size)
|
||||
self.repository.put(H(0), fchunk(max_data))
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), max_data)
|
||||
self.assert_raises(IntegrityError, lambda: self.repository.put(H(1), fchunk(max_data + b"x")))
|
||||
|
||||
def test_set_flags(self):
|
||||
id = H(0)
|
||||
self.repository.put(id, b"")
|
||||
self.repository.put(id, fchunk(b""))
|
||||
self.assert_equal(self.repository.flags(id), 0x00000000) # init == all zero
|
||||
self.repository.flags(id, mask=0x00000001, value=0x00000001)
|
||||
self.assert_equal(self.repository.flags(id), 0x00000001)
|
||||
|
@ -188,7 +223,7 @@ class RepositoryTestCase(RepositoryTestCaseBase):
|
|||
|
||||
def test_get_flags(self):
|
||||
id = H(0)
|
||||
self.repository.put(id, b"")
|
||||
self.repository.put(id, fchunk(b""))
|
||||
self.assert_equal(self.repository.flags(id), 0x00000000) # init == all zero
|
||||
self.repository.flags(id, mask=0xC0000003, value=0x80000001)
|
||||
self.assert_equal(self.repository.flags(id, mask=0x00000001), 0x00000001)
|
||||
|
@ -199,7 +234,7 @@ class RepositoryTestCase(RepositoryTestCaseBase):
|
|||
def test_flags_many(self):
|
||||
ids_flagged = [H(0), H(1)]
|
||||
ids_default_flags = [H(2), H(3)]
|
||||
[self.repository.put(id, b"") for id in ids_flagged + ids_default_flags]
|
||||
[self.repository.put(id, fchunk(b"")) for id in ids_flagged + ids_default_flags]
|
||||
self.repository.flags_many(ids_flagged, mask=0xFFFFFFFF, value=0xDEADBEEF)
|
||||
self.assert_equal(list(self.repository.flags_many(ids_default_flags)), [0x00000000, 0x00000000])
|
||||
self.assert_equal(list(self.repository.flags_many(ids_flagged)), [0xDEADBEEF, 0xDEADBEEF])
|
||||
|
@ -207,8 +242,8 @@ class RepositoryTestCase(RepositoryTestCaseBase):
|
|||
self.assert_equal(list(self.repository.flags_many(ids_flagged, mask=0x0000FFFF)), [0x0000BEEF, 0x0000BEEF])
|
||||
|
||||
def test_flags_persistence(self):
|
||||
self.repository.put(H(0), b"default")
|
||||
self.repository.put(H(1), b"one one zero")
|
||||
self.repository.put(H(0), fchunk(b"default"))
|
||||
self.repository.put(H(1), fchunk(b"one one zero"))
|
||||
# we do not set flags for H(0), so we can later check their default state.
|
||||
self.repository.flags(H(1), mask=0x00000007, value=0x00000006)
|
||||
self.repository.commit(compact=False)
|
||||
|
@ -227,38 +262,39 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase):
|
|||
|
||||
def _assert_sparse(self):
|
||||
# The superseded 123456... PUT
|
||||
assert self.repository.compact[0] == 41 + 8 + 9
|
||||
assert self.repository.compact[0] == 41 + 8 + len(fchunk(b"123456789"))
|
||||
# a COMMIT
|
||||
assert self.repository.compact[1] == 9
|
||||
# The DELETE issued by the superseding PUT (or issued directly)
|
||||
assert self.repository.compact[2] == 41
|
||||
self.repository._rebuild_sparse(0)
|
||||
assert self.repository.compact[0] == 41 + 8 + 9
|
||||
assert self.repository.compact[0] == 41 + 8 + len(fchunk(b"123456789")) # 9 is chunk or commit?
|
||||
|
||||
def test_sparse1(self):
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(1), b"123456789")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.put(H(1), fchunk(b"123456789"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.put(H(1), b"bar")
|
||||
self.repository.put(H(1), fchunk(b"bar"))
|
||||
self._assert_sparse()
|
||||
|
||||
def test_sparse2(self):
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(1), b"123456789")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.put(H(1), fchunk(b"123456789"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.delete(H(1))
|
||||
self._assert_sparse()
|
||||
|
||||
def test_sparse_delete(self):
|
||||
self.repository.put(H(0), b"1245")
|
||||
ch0 = fchunk(b"1245")
|
||||
self.repository.put(H(0), ch0)
|
||||
self.repository.delete(H(0))
|
||||
self.repository.io._write_fd.sync()
|
||||
|
||||
# The on-line tracking works on a per-object basis...
|
||||
assert self.repository.compact[0] == 41 + 8 + 41 + 4
|
||||
assert self.repository.compact[0] == 41 + 8 + 41 + len(ch0)
|
||||
self.repository._rebuild_sparse(0)
|
||||
# ...while _rebuild_sparse can mark whole segments as completely sparse (which then includes the segment magic)
|
||||
assert self.repository.compact[0] == 41 + 8 + 41 + 4 + len(MAGIC)
|
||||
assert self.repository.compact[0] == 41 + 8 + 41 + len(ch0) + len(MAGIC)
|
||||
|
||||
self.repository.commit(compact=True)
|
||||
assert 0 not in [segment for segment, _ in self.repository.io.segment_iterator()]
|
||||
|
@ -266,7 +302,7 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase):
|
|||
def test_uncommitted_garbage(self):
|
||||
# uncommitted garbage should be no problem, it is cleaned up automatically.
|
||||
# we just have to be careful with invalidation of cached FDs in LoggedIO.
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.commit(compact=False)
|
||||
# write some crap to a uncommitted segment file
|
||||
last_segment = self.repository.io.get_latest_segment()
|
||||
|
@ -276,7 +312,7 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase):
|
|||
# usually, opening the repo and starting a transaction should trigger a cleanup.
|
||||
self.repository = self.open()
|
||||
with self.repository:
|
||||
self.repository.put(H(0), b"bar") # this may trigger compact_segments()
|
||||
self.repository.put(H(0), fchunk(b"bar")) # this may trigger compact_segments()
|
||||
self.repository.commit(compact=True)
|
||||
# the point here is that nothing blows up with an exception.
|
||||
|
||||
|
@ -363,8 +399,8 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
|
|||
assert not io.is_committed_segment(io.get_latest_segment())
|
||||
|
||||
def test_moved_deletes_are_tracked(self):
|
||||
self.repository.put(H(1), b"1")
|
||||
self.repository.put(H(2), b"2")
|
||||
self.repository.put(H(1), fchunk(b"1"))
|
||||
self.repository.put(H(2), fchunk(b"2"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repo_dump("p1 p2 c")
|
||||
self.repository.delete(H(1))
|
||||
|
@ -378,7 +414,7 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
|
|||
num_deletes += 1
|
||||
assert num_deletes == 1
|
||||
assert last_segment in self.repository.compact
|
||||
self.repository.put(H(3), b"3")
|
||||
self.repository.put(H(3), fchunk(b"3"))
|
||||
self.repository.commit(compact=True)
|
||||
self.repo_dump("p3 cc")
|
||||
assert last_segment not in self.repository.compact
|
||||
|
@ -393,7 +429,7 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
|
|||
|
||||
def test_shadowed_entries_are_preserved(self):
|
||||
get_latest_segment = self.repository.io.get_latest_segment
|
||||
self.repository.put(H(1), b"1")
|
||||
self.repository.put(H(1), fchunk(b"1"))
|
||||
# This is the segment with our original PUT of interest
|
||||
put_segment = get_latest_segment()
|
||||
self.repository.commit(compact=False)
|
||||
|
@ -401,7 +437,7 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
|
|||
# We now delete H(1), and force this segment to not be compacted, which can happen
|
||||
# if it's not sparse enough (symbolized by H(2) here).
|
||||
self.repository.delete(H(1))
|
||||
self.repository.put(H(2), b"1")
|
||||
self.repository.put(H(2), fchunk(b"1"))
|
||||
delete_segment = get_latest_segment()
|
||||
|
||||
# We pretend these are mostly dense (not sparse) and won't be compacted
|
||||
|
@ -426,7 +462,7 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
|
|||
assert H(1) not in self.repository
|
||||
|
||||
def test_shadow_index_rollback(self):
|
||||
self.repository.put(H(1), b"1")
|
||||
self.repository.put(H(1), fchunk(b"1"))
|
||||
self.repository.delete(H(1))
|
||||
assert self.repository.shadow_index[H(1)] == [0]
|
||||
self.repository.commit(compact=True)
|
||||
|
@ -440,7 +476,7 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
|
|||
assert self.repository.shadow_index[H(1)] == [4]
|
||||
self.repository.rollback()
|
||||
self.repo_dump("r")
|
||||
self.repository.put(H(2), b"1")
|
||||
self.repository.put(H(2), fchunk(b"1"))
|
||||
# After the rollback segment 4 shouldn't be considered anymore
|
||||
assert self.repository.shadow_index[H(1)] == [] # because the delete is considered unstable
|
||||
|
||||
|
@ -459,19 +495,19 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
|
|||
def segments_in_repository():
|
||||
return len(list(self.repository.io.segment_iterator()))
|
||||
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.commit(compact=False)
|
||||
|
||||
self.repository.append_only = False
|
||||
assert segments_in_repository() == 2
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.commit(compact=True)
|
||||
# normal: compact squashes the data together, only one segment
|
||||
assert segments_in_repository() == 2
|
||||
|
||||
self.repository.append_only = True
|
||||
assert segments_in_repository() == 2
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.commit(compact=False)
|
||||
# append only: does not compact, only new segments written
|
||||
assert segments_in_repository() == 4
|
||||
|
@ -485,7 +521,7 @@ class RepositoryFreeSpaceTestCase(RepositoryTestCaseBase):
|
|||
self.reopen()
|
||||
|
||||
with self.repository:
|
||||
self.repository.put(H(0), b"foobar")
|
||||
self.repository.put(H(0), fchunk(b"foobar"))
|
||||
with pytest.raises(Repository.InsufficientFreeSpaceError):
|
||||
self.repository.commit(compact=False)
|
||||
assert os.path.exists(self.repository.path)
|
||||
|
@ -500,45 +536,52 @@ class RepositoryFreeSpaceTestCase(RepositoryTestCaseBase):
|
|||
class QuotaTestCase(RepositoryTestCaseBase):
|
||||
def test_tracking(self):
|
||||
assert self.repository.storage_quota_use == 0
|
||||
self.repository.put(H(1), bytes(1234))
|
||||
assert self.repository.storage_quota_use == 1234 + 41 + 8
|
||||
self.repository.put(H(2), bytes(5678))
|
||||
assert self.repository.storage_quota_use == 1234 + 5678 + 2 * (41 + 8)
|
||||
ch1 = fchunk(bytes(1234))
|
||||
self.repository.put(H(1), ch1)
|
||||
assert self.repository.storage_quota_use == len(ch1) + 41 + 8
|
||||
ch2 = fchunk(bytes(5678))
|
||||
self.repository.put(H(2), ch2)
|
||||
assert self.repository.storage_quota_use == len(ch1) + len(ch2) + 2 * (41 + 8)
|
||||
self.repository.delete(H(1))
|
||||
assert self.repository.storage_quota_use == 1234 + 5678 + 2 * (41 + 8) # we have not compacted yet
|
||||
assert self.repository.storage_quota_use == len(ch1) + len(ch2) + 2 * (41 + 8) # we have not compacted yet
|
||||
self.repository.commit(compact=False)
|
||||
assert self.repository.storage_quota_use == 1234 + 5678 + 2 * (41 + 8) # we have not compacted yet
|
||||
assert self.repository.storage_quota_use == len(ch1) + len(ch2) + 2 * (41 + 8) # we have not compacted yet
|
||||
self.reopen()
|
||||
with self.repository:
|
||||
# Open new transaction; hints and thus quota data is not loaded unless needed.
|
||||
self.repository.put(H(3), b"")
|
||||
ch3 = fchunk(b"")
|
||||
self.repository.put(H(3), ch3)
|
||||
self.repository.delete(H(3))
|
||||
assert self.repository.storage_quota_use == 1234 + 5678 + 3 * (41 + 8) # we have not compacted yet
|
||||
assert self.repository.storage_quota_use == len(ch1) + len(ch2) + len(ch3) + 3 * (
|
||||
41 + 8
|
||||
) # we have not compacted yet
|
||||
self.repository.commit(compact=True)
|
||||
assert self.repository.storage_quota_use == 5678 + 41 + 8
|
||||
assert self.repository.storage_quota_use == len(ch2) + 41 + 8
|
||||
|
||||
def test_exceed_quota(self):
|
||||
assert self.repository.storage_quota_use == 0
|
||||
self.repository.storage_quota = 80
|
||||
self.repository.put(H(1), b"")
|
||||
assert self.repository.storage_quota_use == 41 + 8
|
||||
ch1 = fchunk(b"x" * 7)
|
||||
self.repository.put(H(1), ch1)
|
||||
assert self.repository.storage_quota_use == len(ch1) + 41 + 8
|
||||
self.repository.commit(compact=False)
|
||||
with pytest.raises(Repository.StorageQuotaExceeded):
|
||||
self.repository.put(H(2), b"")
|
||||
assert self.repository.storage_quota_use == (41 + 8) * 2
|
||||
ch2 = fchunk(b"y" * 13)
|
||||
self.repository.put(H(2), ch2)
|
||||
assert self.repository.storage_quota_use == len(ch1) + len(ch2) + (41 + 8) * 2 # check ch2!?
|
||||
with pytest.raises(Repository.StorageQuotaExceeded):
|
||||
self.repository.commit(compact=False)
|
||||
assert self.repository.storage_quota_use == (41 + 8) * 2
|
||||
assert self.repository.storage_quota_use == len(ch1) + len(ch2) + (41 + 8) * 2 # check ch2!?
|
||||
self.reopen()
|
||||
with self.repository:
|
||||
self.repository.storage_quota = 150
|
||||
# Open new transaction; hints and thus quota data is not loaded unless needed.
|
||||
self.repository.put(H(1), b"")
|
||||
self.repository.put(H(1), ch1)
|
||||
assert (
|
||||
self.repository.storage_quota_use == (41 + 8) * 2
|
||||
self.repository.storage_quota_use == len(ch1) * 2 + (41 + 8) * 2
|
||||
) # we have 2 puts for H(1) here and not yet compacted.
|
||||
self.repository.commit(compact=True)
|
||||
assert self.repository.storage_quota_use == 41 + 8 # now we have compacted.
|
||||
assert self.repository.storage_quota_use == len(ch1) + 41 + 8 # now we have compacted.
|
||||
|
||||
|
||||
class NonceReservation(RepositoryTestCaseBase):
|
||||
|
@ -586,13 +629,13 @@ class NonceReservation(RepositoryTestCaseBase):
|
|||
class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.close()
|
||||
|
||||
def do_commit(self):
|
||||
with self.repository:
|
||||
self.repository.put(H(0), b"fox")
|
||||
self.repository.put(H(0), fchunk(b"fox"))
|
||||
self.repository.commit(compact=False)
|
||||
|
||||
def test_corrupted_hints(self):
|
||||
|
@ -648,7 +691,7 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
|
|||
# Data corruption is detected due to mismatching checksums
|
||||
# and fixed by rebuilding the index.
|
||||
assert len(self.repository) == 1
|
||||
assert self.repository.get(H(0)) == b"foo"
|
||||
assert pdchunk(self.repository.get(H(0))) == b"foo"
|
||||
|
||||
def test_index_corrupted_without_integrity(self):
|
||||
self._corrupt_index()
|
||||
|
@ -684,17 +727,17 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
|
|||
with self.repository:
|
||||
# No issues accessing the repository
|
||||
assert len(self.repository) == 1
|
||||
assert self.repository.get(H(0)) == b"foo"
|
||||
assert pdchunk(self.repository.get(H(0))) == b"foo"
|
||||
|
||||
def _subtly_corrupted_hints_setup(self):
|
||||
with self.repository:
|
||||
self.repository.append_only = True
|
||||
assert len(self.repository) == 1
|
||||
assert self.repository.get(H(0)) == b"foo"
|
||||
self.repository.put(H(1), b"bar")
|
||||
self.repository.put(H(2), b"baz")
|
||||
assert pdchunk(self.repository.get(H(0))) == b"foo"
|
||||
self.repository.put(H(1), fchunk(b"bar"))
|
||||
self.repository.put(H(2), fchunk(b"baz"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.put(H(2), b"bazz")
|
||||
self.repository.put(H(2), fchunk(b"bazz"))
|
||||
self.repository.commit(compact=False)
|
||||
|
||||
hints_path = os.path.join(self.repository.path, "hints.5")
|
||||
|
@ -711,14 +754,14 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
|
|||
self._subtly_corrupted_hints_setup()
|
||||
with self.repository:
|
||||
self.repository.append_only = False
|
||||
self.repository.put(H(3), b"1234")
|
||||
self.repository.put(H(3), fchunk(b"1234"))
|
||||
# Do a compaction run. Succeeds, since the failed checksum prompted a rebuild of the index+hints.
|
||||
self.repository.commit(compact=True)
|
||||
|
||||
assert len(self.repository) == 4
|
||||
assert self.repository.get(H(0)) == b"foo"
|
||||
assert self.repository.get(H(1)) == b"bar"
|
||||
assert self.repository.get(H(2)) == b"bazz"
|
||||
assert pdchunk(self.repository.get(H(0))) == b"foo"
|
||||
assert pdchunk(self.repository.get(H(1))) == b"bar"
|
||||
assert pdchunk(self.repository.get(H(2))) == b"bazz"
|
||||
|
||||
def test_subtly_corrupted_hints_without_integrity(self):
|
||||
self._subtly_corrupted_hints_setup()
|
||||
|
@ -726,7 +769,7 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
|
|||
os.unlink(integrity_path)
|
||||
with self.repository:
|
||||
self.repository.append_only = False
|
||||
self.repository.put(H(3), b"1234")
|
||||
self.repository.put(H(3), fchunk(b"1234"))
|
||||
# Do a compaction run. Fails, since the corrupted refcount was not detected and leads to an assertion failure.
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
self.repository.commit(compact=True)
|
||||
|
@ -748,12 +791,12 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
|
|||
|
||||
def get_objects(self, *ids):
|
||||
for id_ in ids:
|
||||
self.repository.get(H(id_))
|
||||
pdchunk(self.repository.get(H(id_)))
|
||||
|
||||
def add_objects(self, segments):
|
||||
for ids in segments:
|
||||
for id_ in ids:
|
||||
self.repository.put(H(id_), b"data")
|
||||
self.repository.put(H(id_), fchunk(b"data"))
|
||||
self.repository.commit(compact=False)
|
||||
|
||||
def get_head(self):
|
||||
|
@ -859,8 +902,8 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
|
|||
self.assert_equal({1, 2, 3, 4, 5, 6}, self.list_objects())
|
||||
|
||||
def test_crash_before_compact(self):
|
||||
self.repository.put(H(0), b"data")
|
||||
self.repository.put(H(0), b"data2")
|
||||
self.repository.put(H(0), fchunk(b"data"))
|
||||
self.repository.put(H(0), fchunk(b"data2"))
|
||||
# Simulate a crash before compact
|
||||
with patch.object(Repository, "compact_segments") as compact:
|
||||
self.repository.commit(compact=True)
|
||||
|
@ -868,12 +911,12 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
|
|||
self.reopen()
|
||||
with self.repository:
|
||||
self.check(repair=True)
|
||||
self.assert_equal(self.repository.get(H(0)), b"data2")
|
||||
self.assert_equal(pdchunk(self.repository.get(H(0))), b"data2")
|
||||
|
||||
|
||||
class RepositoryHintsTestCase(RepositoryTestCaseBase):
|
||||
def test_hints_persistence(self):
|
||||
self.repository.put(H(0), b"data")
|
||||
self.repository.put(H(0), fchunk(b"data"))
|
||||
self.repository.delete(H(0))
|
||||
self.repository.commit(compact=False)
|
||||
shadow_index_expected = self.repository.shadow_index
|
||||
|
@ -884,7 +927,7 @@ class RepositoryHintsTestCase(RepositoryTestCaseBase):
|
|||
self.reopen()
|
||||
with self.repository:
|
||||
# see also do_compact()
|
||||
self.repository.put(H(42), b"foobar") # this will call prepare_txn() and load the hints data
|
||||
self.repository.put(H(42), fchunk(b"foobar")) # this will call prepare_txn() and load the hints data
|
||||
# check if hints persistence worked:
|
||||
self.assert_equal(shadow_index_expected, self.repository.shadow_index)
|
||||
self.assert_equal(compact_expected, self.repository.compact)
|
||||
|
@ -892,7 +935,7 @@ class RepositoryHintsTestCase(RepositoryTestCaseBase):
|
|||
self.assert_equal(segments_expected, self.repository.segments)
|
||||
|
||||
def test_hints_behaviour(self):
|
||||
self.repository.put(H(0), b"data")
|
||||
self.repository.put(H(0), fchunk(b"data"))
|
||||
self.assert_equal(self.repository.shadow_index, {})
|
||||
assert len(self.repository.compact) == 0
|
||||
self.repository.delete(H(0))
|
||||
|
@ -901,7 +944,7 @@ class RepositoryHintsTestCase(RepositoryTestCaseBase):
|
|||
self.assert_in(H(0), self.repository.shadow_index)
|
||||
self.assert_equal(len(self.repository.shadow_index[H(0)]), 1)
|
||||
self.assert_in(0, self.repository.compact) # segment 0 can be compacted
|
||||
self.repository.put(H(42), b"foobar") # see also do_compact()
|
||||
self.repository.put(H(42), fchunk(b"foobar")) # see also do_compact()
|
||||
self.repository.commit(compact=True, threshold=0.0) # compact completely!
|
||||
# nothing to compact any more! no info left about stuff that does not exist any more:
|
||||
self.assert_not_in(H(0), self.repository.shadow_index)
|
||||
|
@ -1041,13 +1084,13 @@ class RemoteLegacyFree(RepositoryTestCaseBase):
|
|||
|
||||
def test_legacy_free(self):
|
||||
# put
|
||||
self.repository.put(H(0), b"foo")
|
||||
self.repository.put(H(0), fchunk(b"foo"))
|
||||
self.repository.commit(compact=False)
|
||||
self.repository.close()
|
||||
# replace
|
||||
self.repository = self.open()
|
||||
with self.repository:
|
||||
self.repository.put(H(0), b"bar")
|
||||
self.repository.put(H(0), fchunk(b"bar"))
|
||||
self.repository.commit(compact=False)
|
||||
# delete
|
||||
self.repository = self.open()
|
||||
|
|
Loading…
Reference in New Issue