From 720fc49498f1dbe8a1b142e93ca3283a6142a806 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 7 Dec 2015 19:13:58 +0100 Subject: [PATCH] hashindex_add C implementation this was also the loop contents of hashindex_merge, but we also need it callable from Cython/Python code. this saves some cycles, esp. if the key is already present in the index. --- borg/_hashindex.c | 21 ++++++++++++--------- borg/cache.py | 13 +++---------- borg/hashindex.pyx | 9 +++++++++ 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index a61c644b2..16adbdfc4 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -390,21 +390,24 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs *total_chunks = chunks; } +static void +hashindex_add(HashIndex *index, const void *key, int32_t *other_values) +{ + int32_t *my_values = (int32_t *)hashindex_get(index, key); + if(my_values == NULL) { + hashindex_set(index, key, other_values); + } else { + *my_values += *other_values; + } +} + static void hashindex_merge(HashIndex *index, HashIndex *other) { int32_t key_size = index->key_size; - const int32_t *other_values; - int32_t *my_values; void *key = NULL; while((key = hashindex_next_key(other, key))) { - other_values = key + key_size; - my_values = (int32_t *)hashindex_get(index, key); - if(my_values == NULL) { - hashindex_set(index, key, other_values); - } else { - *my_values += *other_values; - } + hashindex_add(index, key, key + key_size); } } diff --git a/borg/cache.py b/borg/cache.py index c911283db..eaefc990e 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -255,18 +255,11 @@ def cleanup_outdated(ids): for id in ids: os.unlink(mkpath(id)) - def add(chunk_idx, id, size, csize, incr=1): - try: - count, size, csize = chunk_idx[id] - chunk_idx[id] = count + incr, size, csize - except KeyError: - chunk_idx[id] = incr, size, csize - def fetch_and_build_idx(archive_id, repository, key): chunk_idx = ChunkIndex() cdata = repository.get(archive_id) data = key.decrypt(archive_id, cdata) - add(chunk_idx, archive_id, len(data), len(cdata)) + chunk_idx.add(archive_id, 1, len(data), len(cdata)) archive = msgpack.unpackb(data) if archive[b'version'] != 1: raise Exception('Unknown archive metadata version') @@ -274,7 +267,7 @@ def fetch_and_build_idx(archive_id, repository, key): unpacker = msgpack.Unpacker() for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): data = key.decrypt(item_id, chunk) - add(chunk_idx, item_id, len(data), len(chunk)) + chunk_idx.add(item_id, 1, len(data), len(chunk)) unpacker.feed(data) for item in unpacker: if not isinstance(item, dict): @@ -282,7 +275,7 @@ def fetch_and_build_idx(archive_id, repository, key): continue if b'chunks' in item: for chunk_id, size, csize in item[b'chunks']: - add(chunk_idx, chunk_id, size, csize) + chunk_idx.add(chunk_id, 1, size, csize) if self.do_cache: fn = mkpath(archive_id) fn_tmp = mkpath(archive_id, suffix='.tmp') diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 0b4dc2605..5fc8d6e4b 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -15,6 +15,7 @@ cdef extern from "_hashindex.c": long long *unique_size, long long *unique_csize, long long *total_unique_chunks, long long *total_chunks) void hashindex_merge(HashIndex *index, HashIndex *other) + void hashindex_add(HashIndex *index, void *key, void *value) int hashindex_get_size(HashIndex *index) int hashindex_write(HashIndex *index, char *path) void *hashindex_get(HashIndex *index, void *key) @@ -196,6 +197,14 @@ cdef class ChunkIndex(IndexBase): &total_unique_chunks, &total_chunks) return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks + def add(self, key, refs, size, csize): + assert len(key) == self.key_size + cdef int[3] data + data[0] = _htole32(refs) + data[1] = _htole32(size) + data[2] = _htole32(csize) + hashindex_add(self.index, key, data) + def merge(self, ChunkIndex other): hashindex_merge(self.index, other.index)