add a wrapper around liblz4

This commit is contained in:
Thomas Waldmann 2015-08-01 15:07:54 +02:00
parent 3be55bedd3
commit 27de1b0a43
5 changed files with 87 additions and 4 deletions

1
.gitignore vendored
View File

@ -6,6 +6,7 @@ env
.tox .tox
hashindex.c hashindex.c
chunker.c chunker.c
compress.c
crypto.c crypto.c
platform_darwin.c platform_darwin.c
platform_freebsd.c platform_freebsd.c

67
borg/compress.pyx Normal file
View File

@ -0,0 +1,67 @@
"""
A thin liblz4 wrapper for raw LZ4 compression / decompression.
Features:
- lz4 is super fast
- wrapper releases CPython's GIL to support multithreaded code
- helper buffer only allocated once at instance creation and then reused
But beware:
- this is not very generic, you MUST know the maximum uncompressed input
data size you will feed into the compressor / get from the decompressor!
- you must not do method calls to the same LZ4 instance from different
threads at the same time - create one LZ4 instance per thread!
- compress returns raw compressed data without adding any frame metadata
(like checksums, magics, length of data, etc.)
- decompress expects such raw compressed data as input
"""
from libc.stdlib cimport malloc, free
cdef extern from "lz4.h":
int LZ4_compressBound(int inputSize)
int LZ4_compress(const char* source, char* dest, int inputSize) nogil
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
cdef class LZ4:
cdef char *buffer # helper buffer for (de)compression output
cdef int bufsize # size of this buffer
cdef int max_isize # maximum compressor input size safe for this bufsize
def __cinit__(self, int max_isize):
self.max_isize = max_isize
# compute worst case bufsize for not compressible data:
self.bufsize = LZ4_compressBound(max_isize)
self.buffer = <char *>malloc(self.bufsize)
if not self.buffer:
raise MemoryError
def __dealloc__(self):
free(self.buffer)
def compress(self, idata):
cdef int isize = len(idata)
if isize > self.max_isize:
raise Exception('lz4 buffer might be too small, increase max_isize!')
cdef int osize
cdef char *source = idata
cdef char *dest = self.buffer
with nogil:
osize = LZ4_compress(source, dest, isize)
if not osize:
raise Exception('lz4 compress failed')
return dest[:osize]
def decompress(self, idata):
cdef int isize = len(idata)
cdef int osize = self.bufsize
cdef char *source = idata # <-- does not work for memoryview idata, wants bytes
cdef char *dest = self.buffer
with nogil:
osize = LZ4_decompress_safe(source, dest, isize, osize)
if osize < 0:
# malformed input data, buffer too small, ...
raise Exception('lz4 decompress failed')
return dest[:osize]

View File

@ -13,6 +13,7 @@
.. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
.. _ACL: https://en.wikipedia.org/wiki/Access_control_list .. _ACL: https://en.wikipedia.org/wiki/Access_control_list
.. _libacl: http://savannah.nongnu.org/projects/acl/ .. _libacl: http://savannah.nongnu.org/projects/acl/
.. _liblz4: https://github.com/Cyan4973/lz4
.. _OpenSSL: https://www.openssl.org/ .. _OpenSSL: https://www.openssl.org/
.. _Python: http://www.python.org/ .. _Python: http://www.python.org/
.. _Buzhash: https://en.wikipedia.org/wiki/Buzhash .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash

View File

@ -9,6 +9,7 @@ Installation
* Python_ >= 3.2 * Python_ >= 3.2
* OpenSSL_ >= 1.0.0 * OpenSSL_ >= 1.0.0
* libacl_ * libacl_
* liblz4_
* some python dependencies, see install_requires in setup.py * some python dependencies, see install_requires in setup.py
General notes General notes
@ -59,6 +60,9 @@ Some of the steps detailled below might be useful also for non-git installs.
# ACL support Headers + Library # ACL support Headers + Library
apt-get install libacl1-dev libacl1 apt-get install libacl1-dev libacl1
# lz4 super fast compression support Headers + Library
apt-get install liblz4-dev liblz4-1
# if you do not have gcc / make / etc. yet # if you do not have gcc / make / etc. yet
apt-get install build-essential apt-get install build-essential
@ -107,13 +111,16 @@ Some of the steps detailled below might be useful also for non-git installs.
# ACL support Headers + Library # ACL support Headers + Library
sudo dnf install libacl-devel libacl sudo dnf install libacl-devel libacl
# lz4 super fast compression support Headers + Library
sudo dnf install lz4
# optional: lowlevel FUSE py binding - to mount backup archives # optional: lowlevel FUSE py binding - to mount backup archives
sudo dnf install python3-llfuse fuse sudo dnf install python3-llfuse fuse
# optional: for unit testing # optional: for unit testing
sudo dnf install fakeroot sudo dnf install fakeroot
# get |project_name| from github, install it # get |project_name| from github, install it
git clone |git_url| git clone |git_url|
@ -148,6 +155,7 @@ You'll need at least (use the cygwin installer to fetch/install these):
gcc-core gcc-core
git git
libopenssl libopenssl
liblz4_1 liblz4-devel # from cygwinports.org
make make
openssh openssh
openssl-devel openssl-devel

View File

@ -19,6 +19,7 @@ if sys.version_info < min_python:
from setuptools import setup, Extension from setuptools import setup, Extension
compress_source = 'borg/compress.pyx'
crypto_source = 'borg/crypto.pyx' crypto_source = 'borg/crypto.pyx'
chunker_source = 'borg/chunker.pyx' chunker_source = 'borg/chunker.pyx'
hashindex_source = 'borg/hashindex.pyx' hashindex_source = 'borg/hashindex.pyx'
@ -38,6 +39,7 @@ try:
def make_distribution(self): def make_distribution(self):
self.filelist.extend([ self.filelist.extend([
'borg/compress.c',
'borg/crypto.c', 'borg/crypto.c',
'borg/chunker.c', 'borg/_chunker.c', 'borg/chunker.c', 'borg/_chunker.c',
'borg/hashindex.c', 'borg/_hashindex.c', 'borg/hashindex.c', 'borg/_hashindex.c',
@ -52,6 +54,7 @@ except ImportError:
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
raise Exception('Cython is required to run sdist') raise Exception('Cython is required to run sdist')
compress_source = compress_source.replace('.pyx', '.c')
crypto_source = crypto_source.replace('.pyx', '.c') crypto_source = crypto_source.replace('.pyx', '.c')
chunker_source = chunker_source.replace('.pyx', '.c') chunker_source = chunker_source.replace('.pyx', '.c')
hashindex_source = hashindex_source.replace('.pyx', '.c') hashindex_source = hashindex_source.replace('.pyx', '.c')
@ -59,7 +62,9 @@ except ImportError:
platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c') platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
platform_darwin_source = platform_darwin_source.replace('.pyx', '.c') platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
from distutils.command.build_ext import build_ext from distutils.command.build_ext import build_ext
if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]): if not all(os.path.exists(path) for path in [
compress_source, crypto_source, chunker_source, hashindex_source,
platform_linux_source, platform_freebsd_source]):
raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version') raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')
@ -89,6 +94,7 @@ cmdclass = versioneer.get_cmdclass()
cmdclass.update({'build_ext': build_ext, 'sdist': Sdist}) cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
ext_modules = [ ext_modules = [
Extension('borg.compress', [compress_source], libraries=['lz4']),
Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
Extension('borg.chunker', [chunker_source]), Extension('borg.chunker', [chunker_source]),
Extension('borg.hashindex', [hashindex_source]) Extension('borg.hashindex', [hashindex_source])