add a wrapper around liblz4

This commit is contained in:
Thomas Waldmann 2015-08-01 15:07:54 +02:00
parent 3be55bedd3
commit 27de1b0a43
5 changed files with 87 additions and 4 deletions

1
.gitignore vendored
View File

@ -6,6 +6,7 @@ env
.tox
hashindex.c
chunker.c
compress.c
crypto.c
platform_darwin.c
platform_freebsd.c

67
borg/compress.pyx Normal file
View File

@ -0,0 +1,67 @@
"""
A thin liblz4 wrapper for raw LZ4 compression / decompression.
Features:
- lz4 is super fast
- wrapper releases CPython's GIL to support multithreaded code
- helper buffer only allocated once at instance creation and then reused
But beware:
- this is not very generic, you MUST know the maximum uncompressed input
data size you will feed into the compressor / get from the decompressor!
- you must not do method calls to the same LZ4 instance from different
threads at the same time - create one LZ4 instance per thread!
- compress returns raw compressed data without adding any frame metadata
(like checksums, magics, length of data, etc.)
- decompress expects such raw compressed data as input
"""
from libc.stdlib cimport malloc, free
cdef extern from "lz4.h":
int LZ4_compressBound(int inputSize)
int LZ4_compress(const char* source, char* dest, int inputSize) nogil
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
cdef class LZ4:
cdef char *buffer # helper buffer for (de)compression output
cdef int bufsize # size of this buffer
cdef int max_isize # maximum compressor input size safe for this bufsize
def __cinit__(self, int max_isize):
self.max_isize = max_isize
# compute worst case bufsize for not compressible data:
self.bufsize = LZ4_compressBound(max_isize)
self.buffer = <char *>malloc(self.bufsize)
if not self.buffer:
raise MemoryError
def __dealloc__(self):
free(self.buffer)
def compress(self, idata):
cdef int isize = len(idata)
if isize > self.max_isize:
raise Exception('lz4 buffer might be too small, increase max_isize!')
cdef int osize
cdef char *source = idata
cdef char *dest = self.buffer
with nogil:
osize = LZ4_compress(source, dest, isize)
if not osize:
raise Exception('lz4 compress failed')
return dest[:osize]
def decompress(self, idata):
cdef int isize = len(idata)
cdef int osize = self.bufsize
cdef char *source = idata # <-- does not work for memoryview idata, wants bytes
cdef char *dest = self.buffer
with nogil:
osize = LZ4_decompress_safe(source, dest, isize, osize)
if osize < 0:
# malformed input data, buffer too small, ...
raise Exception('lz4 decompress failed')
return dest[:osize]

View File

@ -13,6 +13,7 @@
.. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
.. _ACL: https://en.wikipedia.org/wiki/Access_control_list
.. _libacl: http://savannah.nongnu.org/projects/acl/
.. _liblz4: https://github.com/Cyan4973/lz4
.. _OpenSSL: https://www.openssl.org/
.. _Python: http://www.python.org/
.. _Buzhash: https://en.wikipedia.org/wiki/Buzhash

View File

@ -9,6 +9,7 @@ Installation
* Python_ >= 3.2
* OpenSSL_ >= 1.0.0
* libacl_
* liblz4_
* some python dependencies, see install_requires in setup.py
General notes
@ -59,6 +60,9 @@ Some of the steps detailled below might be useful also for non-git installs.
# ACL support Headers + Library
apt-get install libacl1-dev libacl1
# lz4 super fast compression support Headers + Library
apt-get install liblz4-dev liblz4-1
# if you do not have gcc / make / etc. yet
apt-get install build-essential
@ -107,13 +111,16 @@ Some of the steps detailled below might be useful also for non-git installs.
# ACL support Headers + Library
sudo dnf install libacl-devel libacl
# lz4 super fast compression support Headers + Library
sudo dnf install lz4
# optional: lowlevel FUSE py binding - to mount backup archives
sudo dnf install python3-llfuse fuse
# optional: for unit testing
sudo dnf install fakeroot
# get |project_name| from github, install it
git clone |git_url|
@ -148,6 +155,7 @@ You'll need at least (use the cygwin installer to fetch/install these):
gcc-core
git
libopenssl
liblz4_1 liblz4-devel # from cygwinports.org
make
openssh
openssl-devel

View File

@ -19,6 +19,7 @@ if sys.version_info < min_python:
from setuptools import setup, Extension
compress_source = 'borg/compress.pyx'
crypto_source = 'borg/crypto.pyx'
chunker_source = 'borg/chunker.pyx'
hashindex_source = 'borg/hashindex.pyx'
@ -38,6 +39,7 @@ try:
def make_distribution(self):
self.filelist.extend([
'borg/compress.c',
'borg/crypto.c',
'borg/chunker.c', 'borg/_chunker.c',
'borg/hashindex.c', 'borg/_hashindex.c',
@ -52,6 +54,7 @@ except ImportError:
def __init__(self, *args, **kwargs):
raise Exception('Cython is required to run sdist')
compress_source = compress_source.replace('.pyx', '.c')
crypto_source = crypto_source.replace('.pyx', '.c')
chunker_source = chunker_source.replace('.pyx', '.c')
hashindex_source = hashindex_source.replace('.pyx', '.c')
@ -59,7 +62,9 @@ except ImportError:
platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
from distutils.command.build_ext import build_ext
if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
if not all(os.path.exists(path) for path in [
compress_source, crypto_source, chunker_source, hashindex_source,
platform_linux_source, platform_freebsd_source]):
raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')
@ -89,6 +94,7 @@ cmdclass = versioneer.get_cmdclass()
cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
ext_modules = [
Extension('borg.compress', [compress_source], libraries=['lz4']),
Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
Extension('borg.chunker', [chunker_source]),
Extension('borg.hashindex', [hashindex_source])