From 01078328e26295045055eac9822c8dcf7d3a1d45 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 3 Dec 2017 05:38:23 +0100 Subject: [PATCH] zstd: use own Cython-based binding, remove python-zstandard dep currently requires an externally available libzstd >= 1.3.0, no bundled zstd yet. (cherry picked from commit aec36f64a2e7271ab04d70ed2cad2bceb33930d4) --- setup.py | 26 +++++++++- src/borg/algorithms/zstd-libselect.h | 5 ++ src/borg/compress.pyx | 73 ++++++++++++++++++++-------- 3 files changed, 81 insertions(+), 23 deletions(-) create mode 100644 src/borg/algorithms/zstd-libselect.h diff --git a/setup.py b/setup.py index 4307590cf..07740708e 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ on_rtd = os.environ.get('READTHEDOCS') # msgpack pure python data corruption was fixed in 0.4.6. # Also, we might use some rather recent API features. -install_requires = ['msgpack-python>=0.4.6', 'zstandard', ] +install_requires = ['msgpack-python>=0.4.6', ] # note for package maintainers: if you package borgbackup for distribution, # please add llfuse as a *requirement* on all platforms that have a working @@ -155,10 +155,20 @@ def detect_libb2(prefixes): return prefix +def detect_libzstd(prefixes): + for prefix in prefixes: + filename = os.path.join(prefix, 'include', 'zstd.h') + if os.path.exists(filename): + with open(filename, 'r') as fd: + if 'ZSTD_getFrameContentSize' in fd.read(): + return prefix + + include_dirs = [] library_dirs = [] define_macros = [] crypto_libraries = ['crypto'] +compression_libraries = ['lz4'] possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local', '/opt/pkg', ] @@ -194,6 +204,18 @@ if libb2_prefix: crypto_libraries.append('b2') define_macros.append(('BORG_USE_LIBB2', 'YES')) +possible_libzstd_prefixes = ['/usr', '/usr/local', '/usr/local/opt/libzstd', '/usr/local/libzstd', + '/usr/local/borg', '/opt/local', '/opt/pkg', ] +if os.environ.get('BORG_LIBZSTD_PREFIX'): + possible_libzstd_prefixes.insert(0, os.environ.get('BORG_LIBZSTD_PREFIX')) +libzstd_prefix = detect_libzstd(possible_libzstd_prefixes) +if libzstd_prefix: + print('Detected and preferring libzstd over bundled ZSTD') + include_dirs.append(os.path.join(libzstd_prefix, 'include')) + library_dirs.append(os.path.join(libzstd_prefix, 'lib')) + compression_libraries.append('zstd') + define_macros.append(('BORG_USE_LIBZSTD', 'YES')) + with open('README.rst', 'r') as fd: long_description = fd.read() @@ -754,7 +776,7 @@ cmdclass = { ext_modules = [] if not on_rtd: ext_modules += [ - Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros), + Extension('borg.compress', [compress_source], libraries=compression_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros), Extension('borg.crypto.low_level', [crypto_ll_source], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros), Extension('borg.hashindex', [hashindex_source]), Extension('borg.item', [item_source]), diff --git a/src/borg/algorithms/zstd-libselect.h b/src/borg/algorithms/zstd-libselect.h new file mode 100644 index 000000000..bb71553c1 --- /dev/null +++ b/src/borg/algorithms/zstd-libselect.h @@ -0,0 +1,5 @@ +#ifdef BORG_USE_LIBZSTD +#include +#else +#error "TODO" +#endif diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 475f967da..be2c3c3d1 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -22,11 +22,6 @@ try: except ImportError: lzma = None -try: - import zstd -except ImportError: - zstd = None - from .helpers import Buffer, DecompressionError @@ -38,6 +33,17 @@ cdef extern from "lz4.h": int LZ4_compressBound(int inputSize) nogil +cdef extern from "algorithms/zstd-libselect.h": + size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) nogil + size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t compressedSize) nogil + size_t ZSTD_compressBound(size_t srcSize) nogil + unsigned long long ZSTD_CONTENTSIZE_UNKNOWN + unsigned long long ZSTD_CONTENTSIZE_ERROR + unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) nogil + unsigned ZSTD_isError(size_t code) nogil + const char* ZSTD_getErrorName(size_t code) nogil + + buffer = Buffer(bytearray, size=0) @@ -203,25 +209,50 @@ class ZSTD(CompressorBase): def __init__(self, level=3, **kwargs): super().__init__(**kwargs) self.level = level - if zstd is None: - raise ValueError('No zstd support found.') - def compress(self, data): - if not isinstance(data, bytes): - data = bytes(data) # zstd < 0.9.0 does not work with memoryview - cctx = zstd.ZstdCompressor(level=self.level, write_content_size=True) - data = cctx.compress(data) - return super().compress(data) + def compress(self, idata): + if not isinstance(idata, bytes): + idata = bytes(idata) # code below does not work with memoryview + cdef int isize = len(idata) + cdef size_t osize + cdef char *source = idata + cdef char *dest + cdef int level = self.level + osize = ZSTD_compressBound(isize) + buf = buffer.get(osize) + dest = buf + with nogil: + osize = ZSTD_compress(dest, osize, source, isize, level) + if ZSTD_isError(osize): + raise Exception('zstd compress failed: %s' % ZSTD_getErrorName(osize)) + return super().compress(dest[:osize]) - def decompress(self, data): - if not isinstance(data, bytes): - data = bytes(data) # zstd < 0.9.0 does not work with memoryview - dctx = zstd.ZstdDecompressor() - data = super().decompress(data) + def decompress(self, idata): + if not isinstance(idata, bytes): + idata = bytes(idata) # code below does not work with memoryview + idata = super().decompress(idata) + cdef int isize = len(idata) + cdef unsigned long long osize + cdef unsigned long long rsize + cdef char *source = idata + cdef char *dest + osize = ZSTD_getFrameContentSize(source, isize) + if osize == ZSTD_CONTENTSIZE_ERROR: + raise DecompressionError('zstd get size failed: data was not compressed by zstd') + if osize == ZSTD_CONTENTSIZE_UNKNOWN: + raise DecompressionError('zstd get size failed: original size unknown') try: - return dctx.decompress(data) - except zstd.ZstdError as e: - raise DecompressionError(str(e)) from None + buf = buffer.get(osize) + except MemoryError: + raise DecompressionError('MemoryError') + dest = buf + with nogil: + rsize = ZSTD_decompress(dest, osize, source, isize) + if ZSTD_isError(rsize): + raise DecompressionError('zstd decompress failed: %s' % ZSTD_getErrorName(rsize)) + if rsize != osize: + raise DecompressionError('zstd decompress failed: size mismatch') + return dest[:osize] class ZLIB(CompressorBase):