mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-25 23:43:49 +00:00
hashindex: add tests, misc. minor fixes/changes
test hashtable expansion/rebuild. hashindex_lookup: - return -2 for a compact / completely full hashtable - return -1 and (via start_idx pointer) the deleted/tombstone bucket index. fix size assertion (we add 1 element to trigger rebuild) fix upper_limit check - since we'll be adding 1 to num_entries below, the condition should be >=: hashindex_compact: set min_empty/upper_limit Co-authored-by: Dan Christensen <jdc+github@uwo.ca>
This commit is contained in:
parent
1ed8ac0408
commit
3d57dc0590
2 changed files with 39 additions and 8 deletions
|
@ -191,11 +191,14 @@ hashindex_lookup(HashIndex *index, const unsigned char *key, int *start_idx)
|
|||
/* When idx == start, we have done a full pass over all buckets.
|
||||
* - We did not find a bucket with the key we searched for.
|
||||
* - We did not find an empty bucket either.
|
||||
* So all buckets are either full or deleted/tombstones.
|
||||
* This is an invalid state we never should get into, see
|
||||
* upper_limit and min_empty.
|
||||
* - We may have found a deleted/tombstone bucket, though.
|
||||
* This can easily happen if we have a compact hashtable.
|
||||
*/
|
||||
assert(idx != start);
|
||||
if(idx == start) {
|
||||
if(didx != -1)
|
||||
break; /* we have found a deleted/tombstone bucket at least */
|
||||
return -2; /* HT is completely full, no empty or deleted buckets. */
|
||||
}
|
||||
}
|
||||
/* we get here if we did not find a bucket with the key we searched for. */
|
||||
if (start_idx != NULL) {
|
||||
|
@ -745,8 +748,8 @@ hashindex_set(HashIndex *index, const unsigned char *key, const void *value)
|
|||
uint8_t *ptr;
|
||||
if(idx < 0)
|
||||
{
|
||||
if(index->num_entries > index->upper_limit) {
|
||||
/* hashtable too full, grow it! */
|
||||
if(index->num_entries >= index->upper_limit || idx == -2) {
|
||||
/* hashtable too full or even a compact hashtable, grow/rebuild it! */
|
||||
if(!hashindex_resize(index, grow_size(index->num_buckets))) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -754,7 +757,7 @@ hashindex_set(HashIndex *index, const unsigned char *key, const void *value)
|
|||
* so we only have EMPTY or USED buckets, but no DELETED ones any more.
|
||||
*/
|
||||
idx = hashindex_lookup(index, key, &start_idx);
|
||||
assert(idx < 0);
|
||||
assert(idx == -1);
|
||||
assert(BUCKET_IS_EMPTY(index, start_idx));
|
||||
}
|
||||
idx = start_idx;
|
||||
|
@ -768,7 +771,7 @@ hashindex_set(HashIndex *index, const unsigned char *key, const void *value)
|
|||
* so we only have EMPTY or USED buckets, but no DELETED ones any more.
|
||||
*/
|
||||
idx = hashindex_lookup(index, key, &start_idx);
|
||||
assert(idx < 0);
|
||||
assert(idx == -1);
|
||||
assert(BUCKET_IS_EMPTY(index, start_idx));
|
||||
idx = start_idx;
|
||||
}
|
||||
|
@ -879,6 +882,8 @@ hashindex_compact(HashIndex *index)
|
|||
|
||||
index->num_buckets = index->num_entries;
|
||||
index->num_empty = 0;
|
||||
index->min_empty = 0;
|
||||
index->upper_limit = index->num_entries; /* triggers a resize/rebuild when a new entry is added */
|
||||
return saved_size;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,3 +40,29 @@ def test_hashindex_stress():
|
|||
#define HASH_MAX_EFF_LOAD .999
|
||||
"""
|
||||
make_hashtables(entries=10000, loops=1000) # we do quite some assertions while making them
|
||||
|
||||
|
||||
def test_hashindex_compact():
|
||||
"""test that we do not lose or corrupt data by the compaction nor by expanding/rebuilding"""
|
||||
idx, kv = make_hashtables(entries=5000, loops=5)
|
||||
size_noncompact = idx.size()
|
||||
# compact the hashtable (remove empty/tombstone buckets)
|
||||
saved_space = idx.compact()
|
||||
# did we actually compact (reduce space usage)?
|
||||
size_compact = idx.size()
|
||||
assert saved_space > 0
|
||||
assert size_noncompact - size_compact == saved_space
|
||||
# did we lose anything?
|
||||
for k, v in kv.items():
|
||||
assert k in idx and idx[k] == (v, v, v)
|
||||
assert len(idx) == len(kv)
|
||||
# now expand the hashtable again. trigger a resize/rebuild by adding an entry.
|
||||
k = b"x" * 32
|
||||
idx[k] = (0, 0, 0)
|
||||
kv[k] = 0
|
||||
size_rebuilt = idx.size()
|
||||
assert size_rebuilt > size_compact + 1
|
||||
# did we lose anything?
|
||||
for k, v in kv.items():
|
||||
assert k in idx and idx[k] == (v, v, v)
|
||||
assert len(idx) == len(kv)
|
||||
|
|
Loading…
Reference in a new issue