1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-24 08:45:13 +00:00

Hashindex header work, fixes #6960 (#7064)

support reading new, improved hashindex header format, fixes #6960

Bit of a pain to work with that code:
- C code
- needs to still be able to read the old hashindex file format,
- while also supporting the new file format.
- the hash computed while reading the file causes additional problems because
  it expects all places in the file get read exactly once and in sequential order.
  I solved this by separately opening the file in the python part of the code and
  checking for the magic.
  BORG_IDX means the legacy file format and legacy layout of the hashtable,
  BORG2IDX means the new file format and the new layout of the hashtable.

Done:
- added a version int32 directly after the magic and set it to 2 (like borg 2).
  the old header had no version info, but could be denoted as version 1 in case
  we ever need it (currently it decides based on the magic).
- added num_empty as indicated by a TODO in count_empty, so it does not need a
  full hashtable scan to determine the amount of empty buckets.
- to keep it simpler, I just filled the HashHeader struct with a
  `char reserved[1024 - 32];`
  1024 being the desired overall header size and 32 being the currently used size.
  this alignment might be useful in case we mmap() the hashindex file one day.
This commit is contained in:
TW 2022-10-02 14:35:21 +02:00 committed by GitHub
parent 5edc53d4d4
commit c29d4a096b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 268 additions and 84 deletions

View file

@ -19,7 +19,8 @@
# define BORG_PACKED(x) x __attribute__((packed))
#endif
#define MAGIC "BORG_IDX"
#define MAGIC "BORG2IDX"
#define MAGIC1 "BORG_IDX" // legacy
#define MAGIC_LEN 8
#define DEBUG 0
@ -39,6 +40,18 @@ typedef struct {
int32_t num_buckets;
int8_t key_size;
int8_t value_size;
}) HashHeader1;
BORG_PACKED(
typedef struct {
char magic[MAGIC_LEN];
int32_t version;
int32_t num_entries;
int32_t num_buckets;
int32_t num_empty;
int32_t key_size;
int32_t value_size;
char reserved[1024 - 32]; // filler to 1024 bytes total
}) HashHeader;
typedef struct {
@ -110,8 +123,8 @@ static int hash_sizes[] = {
#define EPRINTF_PATH(path, msg, ...) fprintf(stderr, "hashindex: %s: " msg " (%s)\n", path, ##__VA_ARGS__, strerror(errno))
#ifndef BORG_NO_PYTHON
static HashIndex *hashindex_read(PyObject *file_py, int permit_compact);
static void hashindex_write(HashIndex *index, PyObject *file_py);
static HashIndex *hashindex_read(PyObject *file_py, int permit_compact, int legacy);
static void hashindex_write(HashIndex *index, PyObject *file_py, int legacy);
#endif
static uint64_t hashindex_compact(HashIndex *index);
@ -265,9 +278,7 @@ int shrink_size(int current){
int
count_empty(HashIndex *index)
{ /* count empty (never used) buckets. this does NOT include deleted buckets (tombstones).
* TODO: if we ever change HashHeader, save the count there so we do not need this function.
*/
{ /* count empty (never used) buckets. this does NOT include deleted buckets (tombstones). */
int i, count = 0, capacity = index->num_buckets;
for(i = 0; i < capacity; i++) {
if(BUCKET_IS_EMPTY(index, i))
@ -276,19 +287,16 @@ count_empty(HashIndex *index)
return count;
}
/* Public API */
#ifndef BORG_NO_PYTHON
static HashIndex *
hashindex_read(PyObject *file_py, int permit_compact)
HashIndex *
read_hashheader1(PyObject *file_py)
{
Py_ssize_t length, buckets_length, bytes_read;
Py_ssize_t bytes_read, length, buckets_length;
Py_buffer header_buffer;
PyObject *header_bytes, *length_object, *bucket_bytes, *tmp;
HashHeader *header;
PyObject *header_bytes, *length_object, *tmp;
HashIndex *index = NULL;
HashHeader1 *header;
header_bytes = PyObject_CallMethod(file_py, "read", "n", (Py_ssize_t)sizeof(HashHeader));
header_bytes = PyObject_CallMethod(file_py, "read", "n", (Py_ssize_t)sizeof(*header));
if(!header_bytes) {
assert(PyErr_Occurred());
goto fail;
@ -299,11 +307,11 @@ hashindex_read(PyObject *file_py, int permit_compact)
/* TypeError, not a bytes() object */
goto fail_decref_header;
}
if(bytes_read != sizeof(HashHeader)) {
if(bytes_read != sizeof(*header)) {
/* Truncated file */
/* Note: %zd is the format for Py_ssize_t, %zu is for size_t */
PyErr_Format(PyExc_ValueError, "Could not read header (expected %zu, but read %zd bytes)",
sizeof(HashHeader), bytes_read);
sizeof(*header), bytes_read);
goto fail_decref_header;
}
@ -334,7 +342,111 @@ hashindex_read(PyObject *file_py, int permit_compact)
goto fail_decref_header;
}
tmp = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)sizeof(HashHeader), SEEK_SET);
tmp = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)sizeof(*header), SEEK_SET);
Py_XDECREF(tmp);
if(PyErr_Occurred()) {
goto fail_decref_header;
}
/* Set up the in-memory header */
if(!(index = malloc(sizeof(HashIndex)))) {
PyErr_NoMemory();
goto fail_decref_header;
}
PyObject_GetBuffer(header_bytes, &header_buffer, PyBUF_SIMPLE);
if(PyErr_Occurred()) {
goto fail_free_index;
}
header = (HashHeader1*) header_buffer.buf;
if(memcmp(header->magic, MAGIC1, MAGIC_LEN)) {
PyErr_Format(PyExc_ValueError, "Unknown MAGIC in header");
goto fail_release_header_buffer;
}
buckets_length = (Py_ssize_t)_le32toh(header->num_buckets) * (header->key_size + header->value_size);
if((Py_ssize_t)length != (Py_ssize_t)sizeof(*header) + buckets_length) {
PyErr_Format(PyExc_ValueError, "Incorrect file length (expected %zd, got %zd)",
sizeof(*header) + buckets_length, length);
goto fail_release_header_buffer;
}
index->num_entries = _le32toh(header->num_entries);
index->num_buckets = _le32toh(header->num_buckets);
index->num_empty = -1; // unknown, needs counting
index->key_size = header->key_size;
index->value_size = header->value_size;
fail_release_header_buffer:
PyBuffer_Release(&header_buffer);
fail_free_index:
if(PyErr_Occurred()) {
free(index);
index = NULL;
}
fail_decref_header:
Py_DECREF(header_bytes);
fail:
return index;
}
HashIndex *
read_hashheader(PyObject *file_py)
{
Py_ssize_t bytes_read, length, buckets_length;
Py_buffer header_buffer;
PyObject *header_bytes, *length_object, *tmp;
HashIndex *index = NULL;
HashHeader *header;
header_bytes = PyObject_CallMethod(file_py, "read", "n", (Py_ssize_t)sizeof(*header));
if(!header_bytes) {
assert(PyErr_Occurred());
goto fail;
}
bytes_read = PyBytes_Size(header_bytes);
if(PyErr_Occurred()) {
/* TypeError, not a bytes() object */
goto fail_decref_header;
}
if(bytes_read != sizeof(*header)) {
/* Truncated file */
/* Note: %zd is the format for Py_ssize_t, %zu is for size_t */
PyErr_Format(PyExc_ValueError, "Could not read header (expected %zu, but read %zd bytes)",
sizeof(*header), bytes_read);
goto fail_decref_header;
}
/*
* Hash the header
* If the header is corrupted this bails before doing something stupid (like allocating 3.8 TB of memory)
*/
tmp = PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader");
Py_XDECREF(tmp);
if(PyErr_Occurred()) {
if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
/* Be able to work with regular file objects which do not have a hash_part method. */
PyErr_Clear();
} else {
goto fail_decref_header;
}
}
/* Find length of file */
length_object = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)0, SEEK_END);
if(PyErr_Occurred()) {
goto fail_decref_header;
}
length = PyNumber_AsSsize_t(length_object, PyExc_OverflowError);
Py_DECREF(length_object);
if(PyErr_Occurred()) {
/* This shouldn't generally happen; but can if seek() returns something that's not a number */
goto fail_decref_header;
}
tmp = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)sizeof(*header), SEEK_SET);
Py_XDECREF(tmp);
if(PyErr_Occurred()) {
goto fail_decref_header;
@ -357,17 +469,58 @@ hashindex_read(PyObject *file_py, int permit_compact)
goto fail_release_header_buffer;
}
buckets_length = (Py_ssize_t)_le32toh(header->num_buckets) * (header->key_size + header->value_size);
if((Py_ssize_t)length != (Py_ssize_t)sizeof(HashHeader) + buckets_length) {
buckets_length = (Py_ssize_t)_le32toh(header->num_buckets) *
(_le32toh(header->key_size) + _le32toh(header->value_size));
if ((Py_ssize_t)length != (Py_ssize_t)sizeof(*header) + buckets_length) {
PyErr_Format(PyExc_ValueError, "Incorrect file length (expected %zd, got %zd)",
sizeof(HashHeader) + buckets_length, length);
sizeof(*header) + buckets_length, length);
goto fail_release_header_buffer;
}
index->num_entries = _le32toh(header->num_entries);
index->num_buckets = _le32toh(header->num_buckets);
index->key_size = header->key_size;
index->value_size = header->value_size;
index->num_empty = _le32toh(header->num_empty);
index->key_size = _le32toh(header->key_size);
index->value_size = _le32toh(header->value_size);
int header_version = _le32toh(header->version);
if (header_version != 2) {
PyErr_Format(PyExc_ValueError, "Unsupported header version (expected %d, got %d)",
2, header_version);
goto fail_release_header_buffer;
}
fail_release_header_buffer:
PyBuffer_Release(&header_buffer);
fail_free_index:
if(PyErr_Occurred()) {
free(index);
index = NULL;
}
fail_decref_header:
Py_DECREF(header_bytes);
fail:
return index;
}
/* Public API */
#ifndef BORG_NO_PYTHON
static HashIndex *
hashindex_read(PyObject *file_py, int permit_compact, int legacy)
{
Py_ssize_t buckets_length, bytes_read;
PyObject *bucket_bytes;
HashIndex *index = NULL;
if (legacy)
index = read_hashheader1(file_py);
else
index = read_hashheader(file_py);
if (!index)
goto fail;
index->bucket_size = index->key_size + index->value_size;
index->lower_limit = get_lower_limit(index->num_buckets);
index->upper_limit = get_upper_limit(index->num_buckets);
@ -381,10 +534,11 @@ hashindex_read(PyObject *file_py, int permit_compact)
* will issue multiple underlying reads if necessary. This supports indices
* >2 GB on Linux. We also compare lengths later.
*/
buckets_length = (Py_ssize_t)(index->num_buckets) * (index->key_size + index->value_size);
bucket_bytes = PyObject_CallMethod(file_py, "read", "n", buckets_length);
if(!bucket_bytes) {
assert(PyErr_Occurred());
goto fail_release_header_buffer;
goto fail_free_index;
}
bytes_read = PyBytes_Size(bucket_bytes);
if(PyErr_Occurred()) {
@ -404,7 +558,8 @@ hashindex_read(PyObject *file_py, int permit_compact)
if(!permit_compact) {
index->min_empty = get_min_empty(index->num_buckets);
index->num_empty = count_empty(index);
if (index->num_empty == -1) // we read a legacy index without num_empty value
index->num_empty = count_empty(index);
if(index->num_empty < index->min_empty) {
/* too many tombstones here / not enough empty buckets, do a same-size rebuild */
@ -426,15 +581,11 @@ fail_free_buckets:
}
fail_decref_buckets:
Py_DECREF(bucket_bytes);
fail_release_header_buffer:
PyBuffer_Release(&header_buffer);
fail_free_index:
if(PyErr_Occurred()) {
free(index);
index = NULL;
}
fail_decref_header:
Py_DECREF(header_bytes);
fail:
return index;
}
@ -481,33 +632,37 @@ hashindex_free(HashIndex *index)
free(index);
}
#ifndef BORG_NO_PYTHON
static void
hashindex_write(HashIndex *index, PyObject *file_py)
int
write_hashheader(HashIndex *index, PyObject *file_py)
{
PyObject *length_object, *buckets_view, *tmp;
PyObject *length_object, *tmp;
Py_ssize_t length;
Py_ssize_t buckets_length = (Py_ssize_t)index->num_buckets * index->bucket_size;
_Static_assert(sizeof(HashHeader) == 1024, "HashHeader struct should be exactly 1024 bytes in size");
HashHeader header = {
.magic = MAGIC,
.version = _htole32(2),
.num_entries = _htole32(index->num_entries),
.num_buckets = _htole32(index->num_buckets),
.key_size = index->key_size,
.value_size = index->value_size
.num_empty = _htole32(index->num_empty),
.key_size = _htole32(index->key_size),
.value_size = _htole32(index->value_size),
.reserved = {0}
};
length_object = PyObject_CallMethod(file_py, "write", "y#", &header, (Py_ssize_t)sizeof(HashHeader));
length_object = PyObject_CallMethod(file_py, "write", "y#", &header, (Py_ssize_t)sizeof(header));
if(PyErr_Occurred()) {
return;
return 0;
}
length = PyNumber_AsSsize_t(length_object, PyExc_OverflowError);
Py_DECREF(length_object);
if(PyErr_Occurred()) {
return;
return 0;
}
if(length != sizeof(HashHeader)) {
if(length != sizeof(header)) {
PyErr_SetString(PyExc_ValueError, "Failed to write header");
return;
return 0;
}
/*
@ -520,9 +675,24 @@ hashindex_write(HashIndex *index, PyObject *file_py)
/* Be able to work with regular file objects which do not have a hash_part method. */
PyErr_Clear();
} else {
return;
return 0;
}
}
return 1;
}
#ifndef BORG_NO_PYTHON
static void
hashindex_write(HashIndex *index, PyObject *file_py, int legacy)
{
PyObject *length_object, *buckets_view;
Py_ssize_t length;
Py_ssize_t buckets_length = (Py_ssize_t)index->num_buckets * index->bucket_size;
assert(!legacy); // we do not ever write legacy hashindexes
if(!write_hashheader(index, file_py))
return;
/* Note: explicitly construct view; BuildValue can convert (pointer, length) to Python objects, but copies them for doing so */
buckets_view = PyMemoryView_FromMemory((char*)index->buckets, buckets_length, PyBUF_READ);
@ -698,6 +868,7 @@ hashindex_compact(HashIndex *index)
}
index->num_buckets = index->num_entries;
index->num_empty = 0;
return saved_size;
}

View file

@ -17,12 +17,12 @@ cdef extern from "_hashindex.c":
uint32_t version
char hash[16]
HashIndex *hashindex_read(object file_py, int permit_compact) except *
HashIndex *hashindex_read(object file_py, int permit_compact, int legacy) except *
HashIndex *hashindex_init(int capacity, int key_size, int value_size)
void hashindex_free(HashIndex *index)
int hashindex_len(HashIndex *index)
int hashindex_size(HashIndex *index)
void hashindex_write(HashIndex *index, object file_py) except *
void hashindex_write(HashIndex *index, object file_py, int legacy) except *
unsigned char *hashindex_get(HashIndex *index, unsigned char *key)
unsigned char *hashindex_next_key(HashIndex *index, unsigned char *key)
int hashindex_delete(HashIndex *index, unsigned char *key)
@ -75,21 +75,21 @@ assert _MAX_VALUE % 2 == 1
def hashindex_variant(fn):
"""peek into an index file and find out what it is"""
with open(fn, 'rb') as f:
hh = f.read(18) # len(HashHeader)
magic = hh[0:8]
magic = f.read(8) # MAGIC_LEN
if magic == b'BORG_IDX':
key_size = hh[16]
value_size = hh[17]
return f'k{key_size}_v{value_size}'
return 1 # legacy
if magic == b'BORG2IDX':
return 2
if magic == b'12345678': # used by unit tests
return 'k32_v16' # just return the current variant
raise ValueError(f'unknown hashindex format, magic: {magic!r}')
return 2 # just return the current variant
raise ValueError(f'unknown hashindex magic: {magic!r}')
@cython.internal
cdef class IndexBase:
cdef HashIndex *index
cdef int key_size
legacy = 0
_key_size = 32
@ -101,9 +101,9 @@ cdef class IndexBase:
if path:
if isinstance(path, (str, bytes)):
with open(path, 'rb') as fd:
self.index = hashindex_read(fd, permit_compact)
self.index = hashindex_read(fd, permit_compact, self.legacy)
else:
self.index = hashindex_read(path, permit_compact)
self.index = hashindex_read(path, permit_compact, self.legacy)
assert self.index, 'hashindex_read() returned NULL with no exception set'
else:
if usable is not None:
@ -123,9 +123,9 @@ cdef class IndexBase:
def write(self, path):
if isinstance(path, (str, bytes)):
with open(path, 'wb') as fd:
hashindex_write(self.index, fd)
hashindex_write(self.index, fd, self.legacy)
else:
hashindex_write(self.index, path)
hashindex_write(self.index, path, self.legacy)
def clear(self):
hashindex_free(self.index)
@ -314,6 +314,7 @@ cdef class NSKeyIterator:
cdef class NSIndex1(IndexBase): # legacy borg 1.x
legacy = 1
value_size = 8
def __getitem__(self, key):

View file

@ -543,9 +543,9 @@ def open_index(self, transaction_id, auto_recover=True):
integrity_data = self._read_integrity(transaction_id, "index")
try:
with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
if variant == "k32_v16":
if variant == 2:
return NSIndex.read(fd)
if variant == "k32_v8": # legacy
if variant == 1: # legacy
return NSIndex1.read(fd)
except (ValueError, OSError, FileIntegrityError) as exc:
logger.warning("Repository index missing or corrupted, trying to recover from: %s", exc)

View file

@ -108,15 +108,21 @@ def test_corrupted_ancillary(self, index, sync, data, error):
def make_index_with_refcount(self, refcount):
index_data = io.BytesIO()
index_data.write(b"BORG_IDX")
index_data.write(b"BORG2IDX")
# version
index_data.write((2).to_bytes(4, "little"))
# num_entries
index_data.write((1).to_bytes(4, "little"))
# num_buckets
index_data.write((1).to_bytes(4, "little"))
# num_empty
index_data.write((0).to_bytes(4, "little"))
# key_size
index_data.write((32).to_bytes(1, "little"))
index_data.write((32).to_bytes(4, "little"))
# value_size
index_data.write((3 * 4).to_bytes(1, "little"))
index_data.write((3 * 4).to_bytes(4, "little"))
# reserved
index_data.write(bytes(1024 - 32))
index_data.write(H(0))
index_data.write(refcount.to_bytes(4, "little"))

View file

@ -86,12 +86,12 @@ def _generic_test(self, cls, make_value, sha):
def test_nsindex(self):
self._generic_test(
NSIndex, lambda x: (x, x, x), "7d70671d0b7e9d2f51b2691ecf35184b9f8ecc1202cceb2748c905c8fc04c256"
NSIndex, lambda x: (x, x, x), "0d7880dbe02b64f03c471e60e193a1333879b4f23105768b10c9222accfeac5e"
)
def test_chunkindex(self):
self._generic_test(
ChunkIndex, lambda x: (x, x), "85f72b036c692c8266e4f51ccf0cff2147204282b5e316ae508d30a448d88fef"
ChunkIndex, lambda x: (x, x), "5915fcf986da12e5f3ac68e05242b9c729e6101b0460b1d4e4a9e9f7cdf1b7da"
)
def test_resize(self):
@ -252,7 +252,7 @@ def test_chunk_indexer(self):
class HashIndexSizeTestCase(BaseTestCase):
def test_size_on_disk(self):
idx = ChunkIndex()
assert idx.size() == 18 + 1031 * (32 + 2 * 4)
assert idx.size() == 1024 + 1031 * (32 + 2 * 4)
def test_size_on_disk_accurate(self):
idx = ChunkIndex()
@ -368,12 +368,12 @@ def test_keyerror(self):
class HashIndexDataTestCase(BaseTestCase):
# This bytestring was created with borg2-pre 2022-06-10
# This bytestring was created with borg2-pre 2022-09-30
HASHINDEX = (
b"eJzt0LEJg1AYhdE/JqBjOEJMNhBBrAQrO9ewc+HsoG+CPMsEz1cfbnHbceqXoZvvEVE+IuoqMu2pnOE4"
b"juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4juM4"
b"juM4juM4juM4jruie36vuSVT5N0rzW0n9t7r5z9+4TiO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO"
b"4ziO4ziO4ziO4ziO4ziO437LHbSVHGw="
b"eJzt0DEKgwAMQNFoBXsMj9DqDUQoToKTR3Hzwr2DZi+0HS19HwIZHhnST/OjHYeljIhLTl1FVDlN7te"
b"Q9M/tGcdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHM"
b"dxHMdxHMdxHMdxHMdxHMdxHPfqbu+7F2nKz67Nc9sX97r1+Rt/4TiO4ziO4ziO4ziO4ziO4ziO4ziO4"
b"ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO487lDoRvHEk="
)
def _serialize_hashindex(self, idx):
@ -439,17 +439,23 @@ def test_integrity_checked_file(self):
class HashIndexCompactTestCase(HashIndexDataTestCase):
def index(self, num_entries, num_buckets):
def index(self, num_entries, num_buckets, num_empty):
index_data = io.BytesIO()
index_data.write(b"BORG_IDX")
index_data.write(b"BORG2IDX")
# version
index_data.write((2).to_bytes(4, "little"))
# num_entries
index_data.write(num_entries.to_bytes(4, "little"))
# num_buckets
index_data.write(num_buckets.to_bytes(4, "little"))
# num_empty
index_data.write(num_empty.to_bytes(4, "little"))
# key_size
index_data.write((32).to_bytes(1, "little"))
index_data.write((32).to_bytes(4, "little"))
# value_size
index_data.write((3 * 4).to_bytes(1, "little"))
index_data.write((3 * 4).to_bytes(4, "little"))
# reserved
index_data.write(bytes(1024 - 32))
self.index_data = index_data
@ -481,7 +487,7 @@ def write_deleted(self, key):
self.write_entry(key, 0xFFFFFFFE, 0, 0)
def test_simple(self):
self.index(num_entries=3, num_buckets=6)
self.index(num_entries=3, num_buckets=6, num_empty=2)
self.write_entry(H2(0), 1, 2, 3)
self.write_deleted(H2(1))
self.write_empty(H2(2))
@ -491,14 +497,14 @@ def test_simple(self):
compact_index = self.index_from_data_compact_to_data()
self.index(num_entries=3, num_buckets=3)
self.index(num_entries=3, num_buckets=3, num_empty=0)
self.write_entry(H2(0), 1, 2, 3)
self.write_entry(H2(3), 5, 6, 7)
self.write_entry(H2(4), 8, 9, 10)
assert compact_index == self.index_data.getvalue()
def test_first_empty(self):
self.index(num_entries=3, num_buckets=6)
self.index(num_entries=3, num_buckets=6, num_empty=2)
self.write_deleted(H2(1))
self.write_entry(H2(0), 1, 2, 3)
self.write_empty(H2(2))
@ -508,14 +514,14 @@ def test_first_empty(self):
compact_index = self.index_from_data_compact_to_data()
self.index(num_entries=3, num_buckets=3)
self.index(num_entries=3, num_buckets=3, num_empty=0)
self.write_entry(H2(0), 1, 2, 3)
self.write_entry(H2(3), 5, 6, 7)
self.write_entry(H2(4), 8, 9, 10)
assert compact_index == self.index_data.getvalue()
def test_last_used(self):
self.index(num_entries=3, num_buckets=6)
self.index(num_entries=3, num_buckets=6, num_empty=2)
self.write_deleted(H2(1))
self.write_entry(H2(0), 1, 2, 3)
self.write_empty(H2(2))
@ -525,14 +531,14 @@ def test_last_used(self):
compact_index = self.index_from_data_compact_to_data()
self.index(num_entries=3, num_buckets=3)
self.index(num_entries=3, num_buckets=3, num_empty=0)
self.write_entry(H2(0), 1, 2, 3)
self.write_entry(H2(3), 5, 6, 7)
self.write_entry(H2(4), 8, 9, 10)
assert compact_index == self.index_data.getvalue()
def test_too_few_empty_slots(self):
self.index(num_entries=3, num_buckets=6)
self.index(num_entries=3, num_buckets=6, num_empty=2)
self.write_deleted(H2(1))
self.write_entry(H2(0), 1, 2, 3)
self.write_entry(H2(3), 5, 6, 7)
@ -542,14 +548,14 @@ def test_too_few_empty_slots(self):
compact_index = self.index_from_data_compact_to_data()
self.index(num_entries=3, num_buckets=3)
self.index(num_entries=3, num_buckets=3, num_empty=0)
self.write_entry(H2(0), 1, 2, 3)
self.write_entry(H2(3), 5, 6, 7)
self.write_entry(H2(4), 8, 9, 10)
assert compact_index == self.index_data.getvalue()
def test_empty(self):
self.index(num_entries=0, num_buckets=6)
self.index(num_entries=0, num_buckets=6, num_empty=3)
self.write_deleted(H2(1))
self.write_empty(H2(0))
self.write_deleted(H2(3))
@ -559,7 +565,7 @@ def test_empty(self):
compact_index = self.index_from_data_compact_to_data()
self.index(num_entries=0, num_buckets=0)
self.index(num_entries=0, num_buckets=0, num_empty=0)
assert compact_index == self.index_data.getvalue()
def test_merge(self):
@ -569,7 +575,7 @@ def test_merge(self):
idx1[H(2)] = 2, 200
idx1[H(3)] = 3, 300
idx1.compact()
assert idx1.size() == 18 + 3 * (32 + 2 * 4)
assert idx1.size() == 1024 + 3 * (32 + 2 * 4)
master.merge(idx1)
assert master[H(1)] == (1, 100)
@ -612,7 +618,7 @@ def HH(x, y, z):
for y in range(700): # stay below max load to not trigger resize
idx[HH(0, y, 0)] = (0, y, 0)
assert idx.size() == 1031 * 48 + 18 # 1031 buckets + header
assert idx.size() == 1024 + 1031 * 48 # header + 1031 buckets
# delete lots of the collisions, creating lots of tombstones
for y in range(400): # stay above min load to not trigger resize