generalize hashindex code for any key length

currently, we only use sha256 hashes as key, so key length is always 32.
but instead of hardcoding 32 everywhere, using key_length is just better
readable and also more flexible for the future.
This commit is contained in:
Thomas Waldmann 2015-08-16 14:51:15 +02:00
parent 608c0935e0
commit b180158876
1 changed files with 24 additions and 19 deletions

View File

@ -32,9 +32,10 @@ cimport cython
@cython.internal
cdef class IndexBase:
cdef HashIndex *index
key_size = 32
cdef int key_size
def __cinit__(self, capacity=0, path=None):
def __cinit__(self, capacity=0, path=None, key_size=32):
self.key_size = key_size
if path:
self.index = hashindex_read(os.fsencode(path))
if not self.index:
@ -67,7 +68,7 @@ cdef class IndexBase:
self[key] = value
def __delitem__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
if not hashindex_delete(self.index, <char *>key):
raise Exception('hashindex_delete failed')
@ -96,14 +97,14 @@ cdef class NSIndex(IndexBase):
value_size = 8
def __getitem__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError
return _le32toh(data[0]), _le32toh(data[1])
def __setitem__(self, key, value):
assert len(key) == 32
assert len(key) == self.key_size
cdef int[2] data
data[0] = _htole32(value[0])
data[1] = _htole32(value[1])
@ -111,20 +112,20 @@ cdef class NSIndex(IndexBase):
raise Exception('hashindex_set failed')
def __contains__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
return data != NULL
def iteritems(self, marker=None):
cdef const void *key
iter = NSKeyIterator()
iter = NSKeyIterator(self.key_size)
iter.idx = self
iter.index = self.index
if marker:
key = hashindex_get(self.index, <char *>marker)
if marker is None:
raise IndexError
iter.key = key - 32
iter.key = key - self.key_size
return iter
@ -132,9 +133,11 @@ cdef class NSKeyIterator:
cdef NSIndex idx
cdef HashIndex *index
cdef const void *key
cdef int key_size
def __cinit__(self):
def __cinit__(self, key_size):
self.key = NULL
self.key_size = key_size
def __iter__(self):
return self
@ -143,8 +146,8 @@ cdef class NSKeyIterator:
self.key = hashindex_next_key(self.index, <char *>self.key)
if not self.key:
raise StopIteration
cdef int *value = <int *>(self.key + 32)
return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
cdef int *value = <int *>(self.key + self.key_size)
return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
cdef class ChunkIndex(IndexBase):
@ -152,14 +155,14 @@ cdef class ChunkIndex(IndexBase):
value_size = 12
def __getitem__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError
return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
def __setitem__(self, key, value):
assert len(key) == 32
assert len(key) == self.key_size
cdef int[3] data
data[0] = _htole32(value[0])
data[1] = _htole32(value[1])
@ -168,20 +171,20 @@ cdef class ChunkIndex(IndexBase):
raise Exception('hashindex_set failed')
def __contains__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
return data != NULL
def iteritems(self, marker=None):
cdef const void *key
iter = ChunkKeyIterator()
iter = ChunkKeyIterator(self.key_size)
iter.idx = self
iter.index = self.index
if marker:
key = hashindex_get(self.index, <char *>marker)
if marker is None:
raise IndexError
iter.key = key - 32
iter.key = key - self.key_size
return iter
def summarize(self):
@ -199,9 +202,11 @@ cdef class ChunkKeyIterator:
cdef ChunkIndex idx
cdef HashIndex *index
cdef const void *key
cdef int key_size
def __cinit__(self):
def __cinit__(self, key_size):
self.key = NULL
self.key_size = key_size
def __iter__(self):
return self
@ -210,5 +215,5 @@ cdef class ChunkKeyIterator:
self.key = hashindex_next_key(self.index, <char *>self.key)
if not self.key:
raise StopIteration
cdef int *value = <int *>(self.key + 32)
return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
cdef int *value = <int *>(self.key + self.key_size)
return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))