2022-03-02 01:37:00 +00:00
|
|
|
import zlib
|
|
|
|
|
2022-03-16 23:24:49 +00:00
|
|
|
from .platformflags import is_darwin
|
|
|
|
from .helpers import bin_to_hex
|
2016-12-20 16:54:46 +00:00
|
|
|
|
|
|
|
from libc.stdint cimport uint32_t
|
|
|
|
from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
|
2017-05-31 19:47:07 +00:00
|
|
|
from cpython.bytes cimport PyBytes_FromStringAndSize
|
2016-12-20 16:54:46 +00:00
|
|
|
|
|
|
|
|
2022-02-26 21:18:12 +00:00
|
|
|
cdef extern from "xxhash.h":
|
2017-05-31 19:47:07 +00:00
|
|
|
ctypedef struct XXH64_canonical_t:
|
|
|
|
char digest[8]
|
|
|
|
|
|
|
|
ctypedef struct XXH64_state_t:
|
|
|
|
pass # opaque
|
|
|
|
|
|
|
|
ctypedef unsigned long long XXH64_hash_t
|
|
|
|
|
|
|
|
ctypedef enum XXH_errorcode:
|
|
|
|
XXH_OK,
|
|
|
|
XXH_ERROR
|
|
|
|
|
2022-03-08 20:15:24 +00:00
|
|
|
XXH64_state_t* XXH64_createState()
|
|
|
|
XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
|
|
|
|
XXH64_hash_t XXH64(const void* input, size_t length, unsigned long long seed)
|
2017-05-31 19:47:07 +00:00
|
|
|
|
2022-03-08 20:15:24 +00:00
|
|
|
XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
|
|
|
|
XXH_errorcode XXH64_update(XXH64_state_t* statePtr, const void* input, size_t length)
|
|
|
|
XXH64_hash_t XXH64_digest(const XXH64_state_t* statePtr)
|
2017-05-31 19:47:07 +00:00
|
|
|
|
2022-03-08 20:15:24 +00:00
|
|
|
void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
|
|
|
|
XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
|
2017-05-31 19:47:07 +00:00
|
|
|
|
|
|
|
|
2016-12-20 16:54:46 +00:00
|
|
|
cdef Py_buffer ro_buffer(object data) except *:
|
|
|
|
cdef Py_buffer view
|
|
|
|
PyObject_GetBuffer(data, &view, PyBUF_SIMPLE)
|
|
|
|
return view
|
|
|
|
|
|
|
|
|
2022-07-04 18:26:42 +00:00
|
|
|
# borg 2.0's new repos do not compute crc32 over big amounts of data,
|
|
|
|
# so speed does not matter much any more and we can just use zlib.crc32.
|
|
|
|
crc32 = zlib.crc32
|
2017-05-31 19:47:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
def xxh64(data, seed=0):
|
|
|
|
cdef unsigned long long _seed = seed
|
|
|
|
cdef XXH64_hash_t hash
|
|
|
|
cdef XXH64_canonical_t digest
|
|
|
|
cdef Py_buffer data_buf = ro_buffer(data)
|
|
|
|
try:
|
|
|
|
hash = XXH64(data_buf.buf, data_buf.len, _seed)
|
|
|
|
finally:
|
|
|
|
PyBuffer_Release(&data_buf)
|
|
|
|
XXH64_canonicalFromHash(&digest, hash)
|
|
|
|
return PyBytes_FromStringAndSize(<const char*> digest.digest, 8)
|
|
|
|
|
|
|
|
|
|
|
|
cdef class StreamingXXH64:
|
2019-06-03 21:34:20 +00:00
|
|
|
cdef XXH64_state_t* state
|
2017-05-31 19:47:07 +00:00
|
|
|
|
|
|
|
def __cinit__(self, seed=0):
|
2019-06-03 21:34:20 +00:00
|
|
|
self.state = XXH64_createState()
|
2017-05-31 19:47:07 +00:00
|
|
|
cdef unsigned long long _seed = seed
|
2019-06-03 21:34:20 +00:00
|
|
|
if XXH64_reset(self.state, _seed) != XXH_OK:
|
2017-05-31 19:47:07 +00:00
|
|
|
raise Exception('XXH64_reset failed')
|
|
|
|
|
2019-06-03 21:34:20 +00:00
|
|
|
def __dealloc__(self):
|
|
|
|
XXH64_freeState(self.state)
|
|
|
|
|
2017-05-31 19:47:07 +00:00
|
|
|
def update(self, data):
|
|
|
|
cdef Py_buffer data_buf = ro_buffer(data)
|
|
|
|
try:
|
2019-06-03 21:34:20 +00:00
|
|
|
if XXH64_update(self.state, data_buf.buf, data_buf.len) != XXH_OK:
|
2017-05-31 19:47:07 +00:00
|
|
|
raise Exception('XXH64_update failed')
|
|
|
|
finally:
|
|
|
|
PyBuffer_Release(&data_buf)
|
|
|
|
|
|
|
|
def digest(self):
|
|
|
|
cdef XXH64_hash_t hash
|
|
|
|
cdef XXH64_canonical_t digest
|
2019-06-03 21:34:20 +00:00
|
|
|
hash = XXH64_digest(self.state)
|
2017-05-31 19:47:07 +00:00
|
|
|
XXH64_canonicalFromHash(&digest, hash)
|
|
|
|
return PyBytes_FromStringAndSize(<const char*> digest.digest, 8)
|
|
|
|
|
|
|
|
def hexdigest(self):
|
|
|
|
return bin_to_hex(self.digest())
|