2010-12-16 19:23:22 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2013-06-29 12:22:05 +00:00
|
|
|
import os
|
|
|
|
|
2014-07-10 13:32:12 +00:00
|
|
|
API_VERSION = 2
|
2014-03-18 21:04:08 +00:00
|
|
|
|
2010-12-16 19:23:22 +00:00
|
|
|
|
2013-05-28 12:35:55 +00:00
|
|
|
cdef extern from "_hashindex.c":
|
2010-12-16 19:23:22 +00:00
|
|
|
ctypedef struct HashIndex:
|
|
|
|
pass
|
|
|
|
|
2014-07-10 13:32:12 +00:00
|
|
|
HashIndex *hashindex_read(char *path)
|
|
|
|
HashIndex *hashindex_init(int capacity, int key_size, int value_size)
|
|
|
|
void hashindex_free(HashIndex *index)
|
2014-03-18 20:42:03 +00:00
|
|
|
void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
|
2010-12-16 19:23:22 +00:00
|
|
|
int hashindex_get_size(HashIndex *index)
|
2014-07-10 13:32:12 +00:00
|
|
|
int hashindex_write(HashIndex *index, char *path)
|
2010-12-16 19:23:22 +00:00
|
|
|
void *hashindex_get(HashIndex *index, void *key)
|
|
|
|
void *hashindex_next_key(HashIndex *index, void *key)
|
2013-07-03 10:19:16 +00:00
|
|
|
int hashindex_delete(HashIndex *index, void *key)
|
|
|
|
int hashindex_set(HashIndex *index, void *key, void *value)
|
2014-01-29 20:34:21 +00:00
|
|
|
int _htole32(int v)
|
|
|
|
int _le32toh(int v)
|
2010-12-16 19:23:22 +00:00
|
|
|
|
2013-05-28 12:35:55 +00:00
|
|
|
|
2011-06-18 09:26:20 +00:00
|
|
|
_NoDefault = object()
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
cdef class IndexBase:
|
|
|
|
cdef HashIndex *index
|
2013-07-03 10:19:16 +00:00
|
|
|
key_size = 32
|
2010-12-16 19:23:22 +00:00
|
|
|
|
2014-07-10 13:32:12 +00:00
|
|
|
def __cinit__(self, capacity=0, path=None):
|
|
|
|
if path:
|
|
|
|
self.index = hashindex_read(<bytes>os.fsencode(path))
|
|
|
|
if not self.index:
|
|
|
|
raise Exception('hashindex_read failed')
|
|
|
|
else:
|
|
|
|
self.index = hashindex_init(capacity, self.key_size, self.value_size)
|
|
|
|
if not self.index:
|
|
|
|
raise Exception('hashindex_init failed')
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
def __dealloc__(self):
|
2013-06-03 11:45:48 +00:00
|
|
|
if self.index:
|
2014-07-10 13:32:12 +00:00
|
|
|
hashindex_free(self.index)
|
2013-07-03 10:19:16 +00:00
|
|
|
|
|
|
|
@classmethod
|
2014-07-10 13:32:12 +00:00
|
|
|
def read(cls, path):
|
|
|
|
return cls(path=path)
|
2010-12-16 19:23:22 +00:00
|
|
|
|
2014-07-10 13:32:12 +00:00
|
|
|
def write(self, path):
|
|
|
|
if not hashindex_write(self.index, <bytes>os.fsencode(path)):
|
|
|
|
raise Exception('hashindex_write failed')
|
2011-06-18 09:26:20 +00:00
|
|
|
|
2014-07-10 13:32:12 +00:00
|
|
|
def clear(self):
|
|
|
|
hashindex_free(self.index)
|
|
|
|
self.index = hashindex_init(0, self.key_size, self.value_size)
|
|
|
|
if not self.index:
|
|
|
|
raise Exception('hashindex_init failed')
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
def setdefault(self, key, value):
|
|
|
|
if not key in self:
|
|
|
|
self[key] = value
|
|
|
|
|
2013-07-03 10:19:16 +00:00
|
|
|
def __delitem__(self, key):
|
|
|
|
assert len(key) == 32
|
|
|
|
if not hashindex_delete(self.index, <char *>key):
|
|
|
|
raise Exception('hashindex_delete failed')
|
|
|
|
|
2010-12-21 20:29:09 +00:00
|
|
|
def get(self, key, default=None):
|
|
|
|
try:
|
|
|
|
return self[key]
|
|
|
|
except KeyError:
|
|
|
|
return default
|
|
|
|
|
2011-06-18 09:26:20 +00:00
|
|
|
def pop(self, key, default=_NoDefault):
|
|
|
|
try:
|
|
|
|
value = self[key]
|
|
|
|
del self[key]
|
|
|
|
return value
|
|
|
|
except KeyError:
|
|
|
|
if default != _NoDefault:
|
|
|
|
return default
|
|
|
|
raise
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return hashindex_get_size(self.index)
|
|
|
|
|
|
|
|
|
|
|
|
cdef class NSIndex(IndexBase):
|
|
|
|
|
2013-07-03 10:19:16 +00:00
|
|
|
value_size = 8
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
assert len(key) == 32
|
|
|
|
data = <int *>hashindex_get(self.index, <char *>key)
|
|
|
|
if not data:
|
|
|
|
raise KeyError
|
2014-01-29 20:34:21 +00:00
|
|
|
return _le32toh(data[0]), _le32toh(data[1])
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
def __setitem__(self, key, value):
|
|
|
|
assert len(key) == 32
|
|
|
|
cdef int[2] data
|
2014-01-29 20:34:21 +00:00
|
|
|
data[0] = _htole32(value[0])
|
|
|
|
data[1] = _htole32(value[1])
|
2013-07-03 10:19:16 +00:00
|
|
|
if not hashindex_set(self.index, <char *>key, data):
|
|
|
|
raise Exception('hashindex_set failed')
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
def __contains__(self, key):
|
|
|
|
assert len(key) == 32
|
|
|
|
data = <int *>hashindex_get(self.index, <char *>key)
|
|
|
|
return data != NULL
|
|
|
|
|
2014-02-09 21:05:33 +00:00
|
|
|
def iteritems(self, marker=None):
|
|
|
|
cdef const void *key
|
2010-12-16 19:23:22 +00:00
|
|
|
iter = NSKeyIterator()
|
2014-02-04 22:49:10 +00:00
|
|
|
iter.idx = self
|
2010-12-16 19:23:22 +00:00
|
|
|
iter.index = self.index
|
2014-02-09 21:05:33 +00:00
|
|
|
if marker:
|
|
|
|
key = hashindex_get(self.index, <char *>marker)
|
|
|
|
if marker is None:
|
|
|
|
raise IndexError
|
|
|
|
iter.key = key - 32
|
2010-12-16 19:23:22 +00:00
|
|
|
return iter
|
|
|
|
|
|
|
|
|
|
|
|
cdef class NSKeyIterator:
|
2014-02-04 22:49:10 +00:00
|
|
|
cdef NSIndex idx
|
2010-12-16 19:23:22 +00:00
|
|
|
cdef HashIndex *index
|
2014-02-09 21:05:33 +00:00
|
|
|
cdef const void *key
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
def __cinit__(self):
|
|
|
|
self.key = NULL
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __next__(self):
|
2014-02-09 21:05:33 +00:00
|
|
|
self.key = hashindex_next_key(self.index, <char *>self.key)
|
2010-12-16 19:23:22 +00:00
|
|
|
if not self.key:
|
|
|
|
raise StopIteration
|
|
|
|
cdef int *value = <int *>(self.key + 32)
|
2014-02-09 21:05:33 +00:00
|
|
|
return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
|
2010-12-16 19:23:22 +00:00
|
|
|
|
|
|
|
|
2011-07-30 19:13:48 +00:00
|
|
|
cdef class ChunkIndex(IndexBase):
|
|
|
|
|
2013-07-03 10:19:16 +00:00
|
|
|
value_size = 12
|
2011-07-30 19:13:48 +00:00
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
assert len(key) == 32
|
|
|
|
data = <int *>hashindex_get(self.index, <char *>key)
|
|
|
|
if not data:
|
|
|
|
raise KeyError
|
2014-01-29 20:34:21 +00:00
|
|
|
return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
|
2011-07-30 19:13:48 +00:00
|
|
|
|
|
|
|
def __setitem__(self, key, value):
|
|
|
|
assert len(key) == 32
|
|
|
|
cdef int[3] data
|
2014-01-29 20:34:21 +00:00
|
|
|
data[0] = _htole32(value[0])
|
|
|
|
data[1] = _htole32(value[1])
|
|
|
|
data[2] = _htole32(value[2])
|
2013-07-03 10:19:16 +00:00
|
|
|
if not hashindex_set(self.index, <char *>key, data):
|
|
|
|
raise Exception('hashindex_set failed')
|
2011-07-30 19:13:48 +00:00
|
|
|
|
|
|
|
def __contains__(self, key):
|
|
|
|
assert len(key) == 32
|
|
|
|
data = <int *>hashindex_get(self.index, <char *>key)
|
|
|
|
return data != NULL
|
|
|
|
|
2014-02-09 21:05:33 +00:00
|
|
|
def iteritems(self, marker=None):
|
|
|
|
cdef const void *key
|
2011-07-30 19:13:48 +00:00
|
|
|
iter = ChunkKeyIterator()
|
2014-02-04 22:49:10 +00:00
|
|
|
iter.idx = self
|
2011-07-30 19:13:48 +00:00
|
|
|
iter.index = self.index
|
2014-02-09 21:05:33 +00:00
|
|
|
if marker:
|
|
|
|
key = hashindex_get(self.index, <char *>marker)
|
|
|
|
if marker is None:
|
|
|
|
raise IndexError
|
|
|
|
iter.key = key - 32
|
2011-07-30 19:13:48 +00:00
|
|
|
return iter
|
|
|
|
|
2014-03-18 20:42:03 +00:00
|
|
|
def summarize(self):
|
|
|
|
cdef long long total_size, total_csize, unique_size, unique_csize
|
|
|
|
hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
|
|
|
|
return total_size, total_csize, unique_size, unique_csize
|
|
|
|
|
2011-07-30 19:13:48 +00:00
|
|
|
|
|
|
|
cdef class ChunkKeyIterator:
|
2014-02-04 22:49:10 +00:00
|
|
|
cdef ChunkIndex idx
|
2011-07-30 19:13:48 +00:00
|
|
|
cdef HashIndex *index
|
2014-02-09 21:05:33 +00:00
|
|
|
cdef const void *key
|
2011-07-30 19:13:48 +00:00
|
|
|
|
|
|
|
def __cinit__(self):
|
|
|
|
self.key = NULL
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __next__(self):
|
2014-02-09 21:05:33 +00:00
|
|
|
self.key = hashindex_next_key(self.index, <char *>self.key)
|
2011-07-30 19:13:48 +00:00
|
|
|
if not self.key:
|
|
|
|
raise StopIteration
|
|
|
|
cdef int *value = <int *>(self.key + 32)
|
2014-02-09 21:05:33 +00:00
|
|
|
return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
|