From 6bad2395ddde38232c7669165105d47979632198 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Dec 2017 22:43:34 +0100 Subject: [PATCH] add zstd compression based on willyvmm's work in PR #3116, but some changes: - removed any mulithreading changes - add zstandard in setup.py install_requires - tests - fix: minimum compression level is 1 (not 0) - use 3 for the default compression level - use ID 03 00 for zstd - only convert to bytes if we don't have bytes yet - move zstd code so that code blocks are ordered by ID - other cosmetic fixes (cherry picked from commit 11b2311e6ebc43546cef88a39bce744d67940c5a) --- setup.py | 2 +- src/borg/compress.pyx | 55 ++++++++++++++++++++++++++++++++-- src/borg/helpers.py | 2 +- src/borg/testsuite/compress.py | 33 +++++++++++++++++++- 4 files changed, 86 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 1cd2bd006..4307590cf 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ on_rtd = os.environ.get('READTHEDOCS') # msgpack pure python data corruption was fixed in 0.4.6. # Also, we might use some rather recent API features. -install_requires = ['msgpack-python>=0.4.6', ] +install_requires = ['msgpack-python>=0.4.6', 'zstandard', ] # note for package maintainers: if you package borgbackup for distribution, # please add llfuse as a *requirement* on all platforms that have a working diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 8e509213e..475f967da 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -22,9 +22,15 @@ try: except ImportError: lzma = None +try: + import zstd +except ImportError: + zstd = None + + from .helpers import Buffer, DecompressionError -API_VERSION = '1.1_03' +API_VERSION = '1.1_04' cdef extern from "lz4.h": int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil @@ -186,6 +192,38 @@ class LZMA(CompressorBase): raise DecompressionError(str(e)) from None +class ZSTD(CompressorBase): + """zstd compression / decompression (pypi: zstandard, gh: python-zstandard)""" + # This is a NOT THREAD SAFE implementation. + # Only ONE python context must to be created at a time. + # It should work flawlessly as long as borg will call ONLY ONE compression job at time. + ID = b'\x03\x00' + name = 'zstd' + + def __init__(self, level=3, **kwargs): + super().__init__(**kwargs) + self.level = level + if zstd is None: + raise ValueError('No zstd support found.') + + def compress(self, data): + if not isinstance(data, bytes): + data = bytes(data) # zstd < 0.9.0 does not work with memoryview + cctx = zstd.ZstdCompressor(level=self.level, write_content_size=True) + data = cctx.compress(data) + return super().compress(data) + + def decompress(self, data): + if not isinstance(data, bytes): + data = bytes(data) # zstd < 0.9.0 does not work with memoryview + dctx = zstd.ZstdDecompressor() + data = super().decompress(data) + try: + return dctx.decompress(data) + except zstd.ZstdError as e: + raise DecompressionError(str(e)) from None + + class ZLIB(CompressorBase): """ zlib compression / decompression (python stdlib) @@ -289,9 +327,10 @@ COMPRESSOR_TABLE = { ZLIB.name: ZLIB, LZMA.name: LZMA, Auto.name: Auto, + ZSTD.name: ZSTD, } # List of possible compression types. Does not include Auto, since it is a meta-Compressor. -COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ] # check fast stuff first +COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ] # check fast stuff first def get_compressor(name, **kwargs): cls = COMPRESSOR_TABLE[name] @@ -344,6 +383,16 @@ class CompressionSpec: else: raise ValueError self.level = level + elif self.name in ('zstd', ): + if count < 2: + level = 3 # default compression level in zstd + elif count == 2: + level = int(values[1]) + if not 1 <= level <= 22: + raise ValueError + else: + raise ValueError + self.level = level elif self.name == 'auto': if 2 <= count <= 3: compression = ','.join(values[1:]) @@ -357,7 +406,7 @@ class CompressionSpec: def compressor(self): if self.name in ('none', 'lz4', ): return get_compressor(self.name) - elif self.name in ('zlib', 'lzma', ): + elif self.name in ('zlib', 'lzma', 'zstd', ): return get_compressor(self.name, level=self.level) elif self.name == 'auto': return get_compressor(self.name, compressor=self.inner.compressor) diff --git a/src/borg/helpers.py b/src/borg/helpers.py index cb3e10cac..3aea2833d 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -135,7 +135,7 @@ def check_extension_modules(): raise ExtensionModuleError if chunker.API_VERSION != '1.1_01': raise ExtensionModuleError - if compress.API_VERSION != '1.1_03': + if compress.API_VERSION != '1.1_04': raise ExtensionModuleError if borg.crypto.low_level.API_VERSION != '1.1_02': raise ExtensionModuleError diff --git a/src/borg/testsuite/compress.py b/src/borg/testsuite/compress.py index f881ad2c7..b995b0ec3 100644 --- a/src/borg/testsuite/compress.py +++ b/src/borg/testsuite/compress.py @@ -5,9 +5,14 @@ try: except ImportError: lzma = None +try: + import zstd +except ImportError: + zstd = None + import pytest -from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, Auto +from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, ZSTD, Auto buffer = bytes(2**16) @@ -69,6 +74,16 @@ def test_lzma(): assert data == Compressor(**params).decompress(cdata) # autodetect +def test_zstd(): + if zstd is None: + pytest.skip("No zstd support found.") + c = get_compressor(name='zstd') + cdata = c.compress(data) + assert len(cdata) < len(data) + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + def test_autodetect_invalid(): with pytest.raises(ValueError): Compressor(**params).decompress(b'\xff\xfftotalcrap') @@ -104,6 +119,12 @@ def test_compressor(): dict(name='lzma', level=6), # we do not test lzma on level 9 because of the huge memory needs ] + if zstd: + params_list += [ + dict(name='zstd', level=1), + dict(name='zstd', level=3), + # also avoiding high zstd levels, memory needs unclear + ] for params in params_list: c = Compressor(**params) assert data == c.decompress(c.compress(data)) @@ -154,6 +175,16 @@ def test_compression_specs(): assert isinstance(lzma, LZMA) assert lzma.level == 9 + zstd = CompressionSpec('zstd').compressor + assert isinstance(zstd, ZSTD) + assert zstd.level == 3 + zstd = CompressionSpec('zstd,1').compressor + assert isinstance(zstd, ZSTD) + assert zstd.level == 1 + zstd = CompressionSpec('zstd,22').compressor + assert isinstance(zstd, ZSTD) + assert zstd.level == 22 + with pytest.raises(ValueError): CompressionSpec('lzma,9,invalid') with pytest.raises(ValueError):