add zstd compression

based on willyvmm's work in PR #3116, but some changes:

- removed any mulithreading changes
- add zstandard in setup.py install_requires
- tests
- fix: minimum compression level is 1 (not 0)
- use 3 for the default compression level
- use ID 03 00 for zstd
- only convert to bytes if we don't have bytes yet
- move zstd code so that code blocks are ordered by ID
- other cosmetic fixes

(cherry picked from commit 11b2311e6e)
This commit is contained in:
Thomas Waldmann 2017-12-02 22:43:34 +01:00
parent 4ac6ee221a
commit 6bad2395dd
4 changed files with 86 additions and 6 deletions

View File

@ -24,7 +24,7 @@ on_rtd = os.environ.get('READTHEDOCS')
# msgpack pure python data corruption was fixed in 0.4.6.
# Also, we might use some rather recent API features.
install_requires = ['msgpack-python>=0.4.6', ]
install_requires = ['msgpack-python>=0.4.6', 'zstandard', ]
# note for package maintainers: if you package borgbackup for distribution,
# please add llfuse as a *requirement* on all platforms that have a working

View File

@ -22,9 +22,15 @@ try:
except ImportError:
lzma = None
try:
import zstd
except ImportError:
zstd = None
from .helpers import Buffer, DecompressionError
API_VERSION = '1.1_03'
API_VERSION = '1.1_04'
cdef extern from "lz4.h":
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
@ -186,6 +192,38 @@ class LZMA(CompressorBase):
raise DecompressionError(str(e)) from None
class ZSTD(CompressorBase):
"""zstd compression / decompression (pypi: zstandard, gh: python-zstandard)"""
# This is a NOT THREAD SAFE implementation.
# Only ONE python context must to be created at a time.
# It should work flawlessly as long as borg will call ONLY ONE compression job at time.
ID = b'\x03\x00'
name = 'zstd'
def __init__(self, level=3, **kwargs):
super().__init__(**kwargs)
self.level = level
if zstd is None:
raise ValueError('No zstd support found.')
def compress(self, data):
if not isinstance(data, bytes):
data = bytes(data) # zstd < 0.9.0 does not work with memoryview
cctx = zstd.ZstdCompressor(level=self.level, write_content_size=True)
data = cctx.compress(data)
return super().compress(data)
def decompress(self, data):
if not isinstance(data, bytes):
data = bytes(data) # zstd < 0.9.0 does not work with memoryview
dctx = zstd.ZstdDecompressor()
data = super().decompress(data)
try:
return dctx.decompress(data)
except zstd.ZstdError as e:
raise DecompressionError(str(e)) from None
class ZLIB(CompressorBase):
"""
zlib compression / decompression (python stdlib)
@ -289,9 +327,10 @@ COMPRESSOR_TABLE = {
ZLIB.name: ZLIB,
LZMA.name: LZMA,
Auto.name: Auto,
ZSTD.name: ZSTD,
}
# List of possible compression types. Does not include Auto, since it is a meta-Compressor.
COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ] # check fast stuff first
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ] # check fast stuff first
def get_compressor(name, **kwargs):
cls = COMPRESSOR_TABLE[name]
@ -344,6 +383,16 @@ class CompressionSpec:
else:
raise ValueError
self.level = level
elif self.name in ('zstd', ):
if count < 2:
level = 3 # default compression level in zstd
elif count == 2:
level = int(values[1])
if not 1 <= level <= 22:
raise ValueError
else:
raise ValueError
self.level = level
elif self.name == 'auto':
if 2 <= count <= 3:
compression = ','.join(values[1:])
@ -357,7 +406,7 @@ class CompressionSpec:
def compressor(self):
if self.name in ('none', 'lz4', ):
return get_compressor(self.name)
elif self.name in ('zlib', 'lzma', ):
elif self.name in ('zlib', 'lzma', 'zstd', ):
return get_compressor(self.name, level=self.level)
elif self.name == 'auto':
return get_compressor(self.name, compressor=self.inner.compressor)

View File

@ -135,7 +135,7 @@ def check_extension_modules():
raise ExtensionModuleError
if chunker.API_VERSION != '1.1_01':
raise ExtensionModuleError
if compress.API_VERSION != '1.1_03':
if compress.API_VERSION != '1.1_04':
raise ExtensionModuleError
if borg.crypto.low_level.API_VERSION != '1.1_02':
raise ExtensionModuleError

View File

@ -5,9 +5,14 @@ try:
except ImportError:
lzma = None
try:
import zstd
except ImportError:
zstd = None
import pytest
from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, Auto
from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, ZSTD, Auto
buffer = bytes(2**16)
@ -69,6 +74,16 @@ def test_lzma():
assert data == Compressor(**params).decompress(cdata) # autodetect
def test_zstd():
if zstd is None:
pytest.skip("No zstd support found.")
c = get_compressor(name='zstd')
cdata = c.compress(data)
assert len(cdata) < len(data)
assert data == c.decompress(cdata)
assert data == Compressor(**params).decompress(cdata) # autodetect
def test_autodetect_invalid():
with pytest.raises(ValueError):
Compressor(**params).decompress(b'\xff\xfftotalcrap')
@ -104,6 +119,12 @@ def test_compressor():
dict(name='lzma', level=6),
# we do not test lzma on level 9 because of the huge memory needs
]
if zstd:
params_list += [
dict(name='zstd', level=1),
dict(name='zstd', level=3),
# also avoiding high zstd levels, memory needs unclear
]
for params in params_list:
c = Compressor(**params)
assert data == c.decompress(c.compress(data))
@ -154,6 +175,16 @@ def test_compression_specs():
assert isinstance(lzma, LZMA)
assert lzma.level == 9
zstd = CompressionSpec('zstd').compressor
assert isinstance(zstd, ZSTD)
assert zstd.level == 3
zstd = CompressionSpec('zstd,1').compressor
assert isinstance(zstd, ZSTD)
assert zstd.level == 1
zstd = CompressionSpec('zstd,22').compressor
assert isinstance(zstd, ZSTD)
assert zstd.level == 22
with pytest.raises(ValueError):
CompressionSpec('lzma,9,invalid')
with pytest.raises(ValueError):