mirror of https://github.com/borgbackup/borg.git
reimplement the chunk index merging in C
the python code could take a rather long time and likely most of it was converting stuff from python to C and back.
This commit is contained in:
parent
7e21d95ded
commit
a1e039ba21
|
@ -385,3 +385,22 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
|
|||
*total_unique_chunks = unique_chunks;
|
||||
*total_chunks = chunks;
|
||||
}
|
||||
|
||||
static void
|
||||
hashindex_merge(HashIndex *index, HashIndex *other)
|
||||
{
|
||||
int32_t key_size = index->key_size;
|
||||
const int32_t *other_values;
|
||||
int32_t *my_values;
|
||||
void *key = NULL;
|
||||
|
||||
while((key = hashindex_next_key(other, key))) {
|
||||
other_values = key + key_size;
|
||||
my_values = hashindex_get(index, key);
|
||||
if(my_values == NULL) {
|
||||
hashindex_set(index, key, other_values);
|
||||
} else {
|
||||
*my_values += *other_values;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -309,8 +309,7 @@ class Cache:
|
|||
tf_in.extract(archive_id_hex, tmp_dir)
|
||||
chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
|
||||
archive_chunk_idx = ChunkIndex.read(chunk_idx_path)
|
||||
for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems():
|
||||
add(chunk_idx, chunk_id, size, csize, incr=count)
|
||||
chunk_idx.merge(archive_chunk_idx)
|
||||
os.unlink(chunk_idx_path)
|
||||
|
||||
self.begin_txn()
|
||||
|
|
|
@ -14,6 +14,7 @@ cdef extern from "_hashindex.c":
|
|||
void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
|
||||
long long *unique_size, long long *unique_csize,
|
||||
long long *total_unique_chunks, long long *total_chunks)
|
||||
void hashindex_merge(HashIndex *index, HashIndex *other)
|
||||
int hashindex_get_size(HashIndex *index)
|
||||
int hashindex_write(HashIndex *index, char *path)
|
||||
void *hashindex_get(HashIndex *index, void *key)
|
||||
|
@ -190,6 +191,9 @@ cdef class ChunkIndex(IndexBase):
|
|||
&total_unique_chunks, &total_chunks)
|
||||
return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
|
||||
|
||||
def merge(self, ChunkIndex other):
|
||||
hashindex_merge(self.index, other.index)
|
||||
|
||||
|
||||
cdef class ChunkKeyIterator:
|
||||
cdef ChunkIndex idx
|
||||
|
|
|
@ -6,6 +6,11 @@ from ..hashindex import NSIndex, ChunkIndex
|
|||
from . import BaseTestCase
|
||||
|
||||
|
||||
def H(x):
|
||||
# make some 32byte long thing that depends on x
|
||||
return bytes('%-0.32d' % x, 'ascii')
|
||||
|
||||
|
||||
class HashIndexTestCase(BaseTestCase):
|
||||
|
||||
def _generic_test(self, cls, make_value, sha):
|
||||
|
@ -78,3 +83,20 @@ class HashIndexTestCase(BaseTestCase):
|
|||
second_half = list(idx.iteritems(marker=all[49][0]))
|
||||
self.assert_equal(len(second_half), 50)
|
||||
self.assert_equal(second_half, all[50:])
|
||||
|
||||
def test_chunkindex_merge(self):
|
||||
idx1 = ChunkIndex()
|
||||
idx1[H(1)] = 1, 100, 100
|
||||
idx1[H(2)] = 2, 200, 200
|
||||
idx1[H(3)] = 3, 300, 300
|
||||
# no H(4) entry
|
||||
idx2 = ChunkIndex()
|
||||
idx2[H(1)] = 4, 100, 100
|
||||
idx2[H(2)] = 5, 200, 200
|
||||
# no H(3) entry
|
||||
idx2[H(4)] = 6, 400, 400
|
||||
idx1.merge(idx2)
|
||||
assert idx1[H(1)] == (5, 100, 100)
|
||||
assert idx1[H(2)] == (7, 200, 200)
|
||||
assert idx1[H(3)] == (3, 300, 300)
|
||||
assert idx1[H(4)] == (6, 400, 400)
|
||||
|
|
Loading…
Reference in New Issue