1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-07 06:59:38 +00:00

misc. hash table tuning

BUCKET_UPPER_LIMIT: 90% load degrades hash table performance severely,
so I lowered that to 75% (which is a usual value - java uses 75%, python uses 66%).
I chose the higher value of both because we also should not consume too much
memory, considering the RAM usage already is rather high.

MIN_BUCKETS: I can't explain why, but benchmarks showed that choosing 2^N as
table size severely degrades performance (by 3 orders of magnitude!). So a prime
start value improves this a lot, even if we later still use the grow-by-2x algorithm.

hashindex_resize: removed the hashindex_get() call as we already know that the values
come at key + key_size address.

hashindex_init: do not calloc X*Y elements of size 1, but rather X elements of size Y.
Makes the code simpler, not sure if it affects performance.

The tests needed fixing as the resulting hashtable blob is now of course different due
to the above changes, so its sha hash changed.
This commit is contained in:
Thomas Waldmann 2015-12-01 21:18:58 +01:00
parent a8227aeda0
commit 610300c1ce
2 changed files with 8 additions and 9 deletions

View file

@ -44,8 +44,8 @@ typedef struct {
#define DELETED _htole32(0xfffffffe)
#define MAX_BUCKET_SIZE 512
#define BUCKET_LOWER_LIMIT .25
#define BUCKET_UPPER_LIMIT .90
#define MIN_BUCKETS 1024
#define BUCKET_UPPER_LIMIT .75 /* don't go higher than 0.75, otherwise performance severely suffers! */
#define MIN_BUCKETS 1031 /* must be prime, otherwise performance breaks down! */
#define MAX(x, y) ((x) > (y) ? (x): (y))
#define BUCKET_ADDR(index, idx) (index->buckets + (idx * index->bucket_size))
@ -113,12 +113,13 @@ hashindex_resize(HashIndex *index, int capacity)
{
HashIndex *new;
void *key = NULL;
int32_t key_size = index->key_size;
if(!(new = hashindex_init(capacity, index->key_size, index->value_size))) {
if(!(new = hashindex_init(capacity, key_size, index->value_size))) {
return 0;
}
while((key = hashindex_next_key(index, key))) {
hashindex_set(new, key, hashindex_get(index, key));
hashindex_set(new, key, key + key_size);
}
free(index->buckets);
index->buckets = new->buckets;
@ -218,7 +219,6 @@ fail:
static HashIndex *
hashindex_init(int capacity, int key_size, int value_size)
{
off_t buckets_length;
HashIndex *index;
int i;
capacity = MAX(MIN_BUCKETS, capacity);
@ -227,8 +227,7 @@ hashindex_init(int capacity, int key_size, int value_size)
EPRINTF("malloc header failed");
return NULL;
}
buckets_length = (off_t)capacity * (key_size + value_size);
if(!(index->buckets = calloc(buckets_length, 1))) {
if(!(index->buckets = calloc(capacity, key_size + value_size))) {
EPRINTF("malloc buckets failed");
free(index);
return NULL;

View file

@ -51,11 +51,11 @@ def _generic_test(self, cls, make_value, sha):
def test_nsindex(self):
self._generic_test(NSIndex, lambda x: (x, x),
'861d6d60069ea45e39d36bed2bdc1d0c07981e0641955f897ac6848be429abac')
'80fba5b40f8cf12f1486f1ba33c9d852fb2b41a5b5961d3b9d1228cf2aa9c4c9')
def test_chunkindex(self):
self._generic_test(ChunkIndex, lambda x: (x, x, x),
'69464bd0ebbc5866b9f95d838bc48617d21bfe3dcf294682a5c21a2ef6b9dc0b')
'1d71865e72e3c3af18d3c7216b6fa7b014695eaa3ed7f14cf9cd02fba75d1c95')
def test_resize(self):
n = 2000 # Must be >= MIN_BUCKETS