add zstd compression

based on willyvmm's work in PR #3116, but some changes: - removed any mulithreading changes - add zstandard in setup.py install_requires - tests - fix: minimum compression level is 1 (not 0) - use 3 for the default compression level - use ID 03 00 for zstd - only convert to bytes if we don't have bytes yet - move zstd code so that code blocks are ordered by ID - other cosmetic fixes (cherry picked from commit 11b2311e6e)
2017-12-02 22:43:34 +01:00 · 2017-12-02 22:43:34 +01:00 · 6bad2395dd
parent 4ac6ee221a
commit 6bad2395dd
4 changed files with 86 additions and 6 deletions
--- a/setup.py
+++ b/setup.py
@ -24,7 +24,7 @@ on_rtd = os.environ.get('READTHEDOCS')
 # msgpack pure python data corruption was fixed in 0.4.6.
 # Also, we might use some rather recent API features.
-install_requires = ['msgpack-python>=0.4.6', ]
+install_requires = ['msgpack-python>=0.4.6', 'zstandard', ]
 # note for package maintainers: if you package borgbackup for distribution,
 # please add llfuse as a *requirement* on all platforms that have a working
--- a/src/borg/compress.pyx
+++ b/src/borg/compress.pyx
@ -22,9 +22,15 @@ try:
 except ImportError:
    lzma = None
 try:
    import zstd
 except ImportError:
    zstd = None
 from .helpers import Buffer, DecompressionError
-API_VERSION = '1.1_03'
+API_VERSION = '1.1_04'
 cdef extern from "lz4.h":
    int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
@ -186,6 +192,38 @@ class LZMA(CompressorBase):
            raise DecompressionError(str(e)) from None
 class ZSTD(CompressorBase):
    """zstd compression / decompression (pypi: zstandard, gh: python-zstandard)"""
    # This is a NOT THREAD SAFE implementation.
    # Only ONE python context must to be created at a time.
    # It should work flawlessly as long as borg will call ONLY ONE compression job at time.
    ID = b'\x03\x00'
    name = 'zstd'
    def __init__(self, level=3, **kwargs):
        super().__init__(**kwargs)
        self.level = level
        if zstd is None:
            raise ValueError('No zstd support found.')
    def compress(self, data):
        if not isinstance(data, bytes):
            data = bytes(data)  # zstd < 0.9.0 does not work with memoryview
        cctx = zstd.ZstdCompressor(level=self.level, write_content_size=True)
        data = cctx.compress(data)
        return super().compress(data)
    def decompress(self, data):
        if not isinstance(data, bytes):
            data = bytes(data)  # zstd < 0.9.0 does not work with memoryview
        dctx = zstd.ZstdDecompressor()
        data = super().decompress(data)
        try:
            return dctx.decompress(data)
        except zstd.ZstdError as e:
            raise DecompressionError(str(e)) from None
 class ZLIB(CompressorBase):
    """
    zlib compression / decompression (python stdlib)
@ -289,9 +327,10 @@ COMPRESSOR_TABLE = {
    ZLIB.name: ZLIB,
    LZMA.name: LZMA,
    Auto.name: Auto,
    ZSTD.name: ZSTD,
 }
 # List of possible compression types. Does not include Auto, since it is a meta-Compressor.
-COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ]  # check fast stuff first
+COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ]  # check fast stuff first
 def get_compressor(name, **kwargs):
    cls = COMPRESSOR_TABLE[name]
@ -344,6 +383,16 @@ class CompressionSpec:
            else:
                raise ValueError
            self.level = level
        elif self.name in ('zstd', ):
            if count < 2:
                level = 3  # default compression level in zstd
            elif count == 2:
                level = int(values[1])
                if not 1 <= level <= 22:
                    raise ValueError
            else:
                raise ValueError
            self.level = level
        elif self.name == 'auto':
            if 2 <= count <= 3:
                compression = ','.join(values[1:])
@ -357,7 +406,7 @@ class CompressionSpec:
    def compressor(self):
        if self.name in ('none', 'lz4', ):
            return get_compressor(self.name)
-        elif self.name in ('zlib', 'lzma', ):
+        elif self.name in ('zlib', 'lzma', 'zstd', ):
            return get_compressor(self.name, level=self.level)
        elif self.name == 'auto':
            return get_compressor(self.name, compressor=self.inner.compressor)
--- a/src/borg/helpers.py
+++ b/src/borg/helpers.py
@ -135,7 +135,7 @@ def check_extension_modules():
        raise ExtensionModuleError
    if chunker.API_VERSION != '1.1_01':
        raise ExtensionModuleError
-    if compress.API_VERSION != '1.1_03':
+    if compress.API_VERSION != '1.1_04':
        raise ExtensionModuleError
    if borg.crypto.low_level.API_VERSION != '1.1_02':
        raise ExtensionModuleError
--- a/src/borg/testsuite/compress.py
+++ b/src/borg/testsuite/compress.py
@ -5,9 +5,14 @@ try:
 except ImportError:
    lzma = None
 try:
    import zstd
 except ImportError:
    zstd = None
 import pytest
-from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, Auto
+from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, ZSTD, Auto
 buffer = bytes(2**16)
@ -69,6 +74,16 @@ def test_lzma():
    assert data == Compressor(**params).decompress(cdata)  # autodetect
 def test_zstd():
    if zstd is None:
        pytest.skip("No zstd support found.")
    c = get_compressor(name='zstd')
    cdata = c.compress(data)
    assert len(cdata) < len(data)
    assert data == c.decompress(cdata)
    assert data == Compressor(**params).decompress(cdata)  # autodetect
 def test_autodetect_invalid():
    with pytest.raises(ValueError):
        Compressor(**params).decompress(b'\xff\xfftotalcrap')
@ -104,6 +119,12 @@ def test_compressor():
            dict(name='lzma', level=6),
            # we do not test lzma on level 9 because of the huge memory needs
        ]
    if zstd:
        params_list += [
            dict(name='zstd', level=1),
            dict(name='zstd', level=3),
            # also avoiding high zstd levels, memory needs unclear
        ]
    for params in params_list:
        c = Compressor(**params)
        assert data == c.decompress(c.compress(data))
@ -154,6 +175,16 @@ def test_compression_specs():
    assert isinstance(lzma, LZMA)
    assert lzma.level == 9
    zstd = CompressionSpec('zstd').compressor
    assert isinstance(zstd, ZSTD)
    assert zstd.level == 3
    zstd = CompressionSpec('zstd,1').compressor
    assert isinstance(zstd, ZSTD)
    assert zstd.level == 1
    zstd = CompressionSpec('zstd,22').compressor
    assert isinstance(zstd, ZSTD)
    assert zstd.level == 22
    with pytest.raises(ValueError):
        CompressionSpec('lzma,9,invalid')
    with pytest.raises(ValueError):