From 27de1b0a438d0b63299ee6b9aa973d07d5922021 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 1 Aug 2015 15:07:54 +0200 Subject: [PATCH] add a wrapper around liblz4 --- .gitignore | 1 + borg/compress.pyx | 67 +++++++++++++++++++++++++++++++++++++++++++ docs/global.rst.inc | 1 + docs/installation.rst | 14 +++++++-- setup.py | 8 +++++- 5 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 borg/compress.pyx diff --git a/.gitignore b/.gitignore index f3564a42..f6b10cf7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ env .tox hashindex.c chunker.c +compress.c crypto.c platform_darwin.c platform_freebsd.c diff --git a/borg/compress.pyx b/borg/compress.pyx new file mode 100644 index 00000000..5bd5fdfc --- /dev/null +++ b/borg/compress.pyx @@ -0,0 +1,67 @@ +""" +A thin liblz4 wrapper for raw LZ4 compression / decompression. + +Features: + - lz4 is super fast + - wrapper releases CPython's GIL to support multithreaded code + - helper buffer only allocated once at instance creation and then reused + +But beware: + - this is not very generic, you MUST know the maximum uncompressed input + data size you will feed into the compressor / get from the decompressor! + - you must not do method calls to the same LZ4 instance from different + threads at the same time - create one LZ4 instance per thread! + - compress returns raw compressed data without adding any frame metadata + (like checksums, magics, length of data, etc.) + - decompress expects such raw compressed data as input +""" + +from libc.stdlib cimport malloc, free + + +cdef extern from "lz4.h": + int LZ4_compressBound(int inputSize) + int LZ4_compress(const char* source, char* dest, int inputSize) nogil + int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil + + +cdef class LZ4: + cdef char *buffer # helper buffer for (de)compression output + cdef int bufsize # size of this buffer + cdef int max_isize # maximum compressor input size safe for this bufsize + + def __cinit__(self, int max_isize): + self.max_isize = max_isize + # compute worst case bufsize for not compressible data: + self.bufsize = LZ4_compressBound(max_isize) + self.buffer = malloc(self.bufsize) + if not self.buffer: + raise MemoryError + + def __dealloc__(self): + free(self.buffer) + + def compress(self, idata): + cdef int isize = len(idata) + if isize > self.max_isize: + raise Exception('lz4 buffer might be too small, increase max_isize!') + cdef int osize + cdef char *source = idata + cdef char *dest = self.buffer + with nogil: + osize = LZ4_compress(source, dest, isize) + if not osize: + raise Exception('lz4 compress failed') + return dest[:osize] + + def decompress(self, idata): + cdef int isize = len(idata) + cdef int osize = self.bufsize + cdef char *source = idata # <-- does not work for memoryview idata, wants bytes + cdef char *dest = self.buffer + with nogil: + osize = LZ4_decompress_safe(source, dest, isize, osize) + if osize < 0: + # malformed input data, buffer too small, ... + raise Exception('lz4 decompress failed') + return dest[:osize] diff --git a/docs/global.rst.inc b/docs/global.rst.inc index c0629a14..c8c49049 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -13,6 +13,7 @@ .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list .. _libacl: http://savannah.nongnu.org/projects/acl/ +.. _liblz4: https://github.com/Cyan4973/lz4 .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash diff --git a/docs/installation.rst b/docs/installation.rst index 90bd33f8..5a027b2c 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -9,6 +9,7 @@ Installation * Python_ >= 3.2 * OpenSSL_ >= 1.0.0 * libacl_ +* liblz4_ * some python dependencies, see install_requires in setup.py General notes @@ -59,6 +60,9 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library apt-get install libacl1-dev libacl1 + # lz4 super fast compression support Headers + Library + apt-get install liblz4-dev liblz4-1 + # if you do not have gcc / make / etc. yet apt-get install build-essential @@ -107,13 +111,16 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library sudo dnf install libacl-devel libacl - + + # lz4 super fast compression support Headers + Library + sudo dnf install lz4 + # optional: lowlevel FUSE py binding - to mount backup archives sudo dnf install python3-llfuse fuse - + # optional: for unit testing sudo dnf install fakeroot - + # get |project_name| from github, install it git clone |git_url| @@ -148,6 +155,7 @@ You'll need at least (use the cygwin installer to fetch/install these): gcc-core git libopenssl + liblz4_1 liblz4-devel # from cygwinports.org make openssh openssl-devel diff --git a/setup.py b/setup.py index edd75dc1..87de52b7 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ if sys.version_info < min_python: from setuptools import setup, Extension +compress_source = 'borg/compress.pyx' crypto_source = 'borg/crypto.pyx' chunker_source = 'borg/chunker.pyx' hashindex_source = 'borg/hashindex.pyx' @@ -38,6 +39,7 @@ try: def make_distribution(self): self.filelist.extend([ + 'borg/compress.c', 'borg/crypto.c', 'borg/chunker.c', 'borg/_chunker.c', 'borg/hashindex.c', 'borg/_hashindex.c', @@ -52,6 +54,7 @@ except ImportError: def __init__(self, *args, **kwargs): raise Exception('Cython is required to run sdist') + compress_source = compress_source.replace('.pyx', '.c') crypto_source = crypto_source.replace('.pyx', '.c') chunker_source = chunker_source.replace('.pyx', '.c') hashindex_source = hashindex_source.replace('.pyx', '.c') @@ -59,7 +62,9 @@ except ImportError: platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c') platform_darwin_source = platform_darwin_source.replace('.pyx', '.c') from distutils.command.build_ext import build_ext - if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]): + if not all(os.path.exists(path) for path in [ + compress_source, crypto_source, chunker_source, hashindex_source, + platform_linux_source, platform_freebsd_source]): raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version') @@ -89,6 +94,7 @@ cmdclass = versioneer.get_cmdclass() cmdclass.update({'build_ext': build_ext, 'sdist': Sdist}) ext_modules = [ + Extension('borg.compress', [compress_source], libraries=['lz4']), Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), Extension('borg.chunker', [chunker_source]), Extension('borg.hashindex', [hashindex_source])