hashindex_add C implementation

this was also the loop contents of hashindex_merge, but we also need it callable from Cython/Python code. this saves some cycles, esp. if the key is already present in the index.
2025-03-03 10:17:40 +00:00 · 2015-12-07 19:13:58 +01:00 · 2015-12-07 19:13:58 +01:00 · 720fc49498
commit 720fc49498
parent 2e5baa6ec2
3 changed files with 24 additions and 19 deletions
--- a/borg/_hashindex.c
+++ b/borg/_hashindex.c
@ -390,21 +390,24 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
    *total_chunks = chunks;
 }

+static void
+hashindex_add(HashIndex *index, const void *key, int32_t *other_values)
+{
+    int32_t *my_values = (int32_t *)hashindex_get(index, key);
+    if(my_values == NULL) {
+        hashindex_set(index, key, other_values);
+    } else {
+        *my_values += *other_values;
+    }
+}
+
 static void
 hashindex_merge(HashIndex *index, HashIndex *other)
 {
    int32_t key_size = index->key_size;
-    const int32_t *other_values;
-    int32_t *my_values;
    void *key = NULL;

    while((key = hashindex_next_key(other, key))) {
-        other_values = key + key_size;
-        my_values = (int32_t *)hashindex_get(index, key);
-        if(my_values == NULL) {
-            hashindex_set(index, key, other_values);
-        } else {
-            *my_values += *other_values;
-        }
+        hashindex_add(index, key, key + key_size);
    }
 }
--- a/borg/cache.py
+++ b/borg/cache.py
@ -255,18 +255,11 @@ def cleanup_outdated(ids):
            for id in ids:
                os.unlink(mkpath(id))

-        def add(chunk_idx, id, size, csize, incr=1):
-            try:
-                count, size, csize = chunk_idx[id]
-                chunk_idx[id] = count + incr, size, csize
-            except KeyError:
-                chunk_idx[id] = incr, size, csize
-
        def fetch_and_build_idx(archive_id, repository, key):
            chunk_idx = ChunkIndex()
            cdata = repository.get(archive_id)
            data = key.decrypt(archive_id, cdata)
-            add(chunk_idx, archive_id, len(data), len(cdata))
+            chunk_idx.add(archive_id, 1, len(data), len(cdata))
            archive = msgpack.unpackb(data)
            if archive[b'version'] != 1:
                raise Exception('Unknown archive metadata version')
@ -274,7 +267,7 @@ def fetch_and_build_idx(archive_id, repository, key):
            unpacker = msgpack.Unpacker()
            for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
                data = key.decrypt(item_id, chunk)
-                add(chunk_idx, item_id, len(data), len(chunk))
+                chunk_idx.add(item_id, 1, len(data), len(chunk))
                unpacker.feed(data)
                for item in unpacker:
                    if not isinstance(item, dict):
@ -282,7 +275,7 @@ def fetch_and_build_idx(archive_id, repository, key):
                        continue
                    if b'chunks' in item:
                        for chunk_id, size, csize in item[b'chunks']:
-                            add(chunk_idx, chunk_id, size, csize)
+                            chunk_idx.add(chunk_id, 1, size, csize)
            if self.do_cache:
                fn = mkpath(archive_id)
                fn_tmp = mkpath(archive_id, suffix='.tmp')
--- a/borg/hashindex.pyx
+++ b/borg/hashindex.pyx
@ -15,6 +15,7 @@ cdef extern from "_hashindex.c":
                             long long *unique_size, long long *unique_csize,
                             long long *total_unique_chunks, long long *total_chunks)
    void hashindex_merge(HashIndex *index, HashIndex *other)
+    void hashindex_add(HashIndex *index, void *key, void *value)
    int hashindex_get_size(HashIndex *index)
    int hashindex_write(HashIndex *index, char *path)
    void *hashindex_get(HashIndex *index, void *key)
@ -196,6 +197,14 @@ cdef class ChunkIndex(IndexBase):
                            &total_unique_chunks, &total_chunks)
        return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks

+    def add(self, key, refs, size, csize):
+        assert len(key) == self.key_size
+        cdef int[3] data
+        data[0] = _htole32(refs)
+        data[1] = _htole32(size)
+        data[2] = _htole32(csize)
+        hashindex_add(self.index, <char *>key, data)
+
    def merge(self, ChunkIndex other):
        hashindex_merge(self.index, other.index)