mirror of https://github.com/borgbackup/borg.git
add zstd compression
based on willyvmm's work in PR #3116, but some changes:
- removed any mulithreading changes
- add zstandard in setup.py install_requires
- tests
- fix: minimum compression level is 1 (not 0)
- use 3 for the default compression level
- use ID 03 00 for zstd
- only convert to bytes if we don't have bytes yet
- move zstd code so that code blocks are ordered by ID
- other cosmetic fixes
(cherry picked from commit 11b2311e6e
)
This commit is contained in:
parent
4ac6ee221a
commit
6bad2395dd
2
setup.py
2
setup.py
|
@ -24,7 +24,7 @@ on_rtd = os.environ.get('READTHEDOCS')
|
||||||
|
|
||||||
# msgpack pure python data corruption was fixed in 0.4.6.
|
# msgpack pure python data corruption was fixed in 0.4.6.
|
||||||
# Also, we might use some rather recent API features.
|
# Also, we might use some rather recent API features.
|
||||||
install_requires = ['msgpack-python>=0.4.6', ]
|
install_requires = ['msgpack-python>=0.4.6', 'zstandard', ]
|
||||||
|
|
||||||
# note for package maintainers: if you package borgbackup for distribution,
|
# note for package maintainers: if you package borgbackup for distribution,
|
||||||
# please add llfuse as a *requirement* on all platforms that have a working
|
# please add llfuse as a *requirement* on all platforms that have a working
|
||||||
|
|
|
@ -22,9 +22,15 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
lzma = None
|
lzma = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import zstd
|
||||||
|
except ImportError:
|
||||||
|
zstd = None
|
||||||
|
|
||||||
|
|
||||||
from .helpers import Buffer, DecompressionError
|
from .helpers import Buffer, DecompressionError
|
||||||
|
|
||||||
API_VERSION = '1.1_03'
|
API_VERSION = '1.1_04'
|
||||||
|
|
||||||
cdef extern from "lz4.h":
|
cdef extern from "lz4.h":
|
||||||
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||||
|
@ -186,6 +192,38 @@ class LZMA(CompressorBase):
|
||||||
raise DecompressionError(str(e)) from None
|
raise DecompressionError(str(e)) from None
|
||||||
|
|
||||||
|
|
||||||
|
class ZSTD(CompressorBase):
|
||||||
|
"""zstd compression / decompression (pypi: zstandard, gh: python-zstandard)"""
|
||||||
|
# This is a NOT THREAD SAFE implementation.
|
||||||
|
# Only ONE python context must to be created at a time.
|
||||||
|
# It should work flawlessly as long as borg will call ONLY ONE compression job at time.
|
||||||
|
ID = b'\x03\x00'
|
||||||
|
name = 'zstd'
|
||||||
|
|
||||||
|
def __init__(self, level=3, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.level = level
|
||||||
|
if zstd is None:
|
||||||
|
raise ValueError('No zstd support found.')
|
||||||
|
|
||||||
|
def compress(self, data):
|
||||||
|
if not isinstance(data, bytes):
|
||||||
|
data = bytes(data) # zstd < 0.9.0 does not work with memoryview
|
||||||
|
cctx = zstd.ZstdCompressor(level=self.level, write_content_size=True)
|
||||||
|
data = cctx.compress(data)
|
||||||
|
return super().compress(data)
|
||||||
|
|
||||||
|
def decompress(self, data):
|
||||||
|
if not isinstance(data, bytes):
|
||||||
|
data = bytes(data) # zstd < 0.9.0 does not work with memoryview
|
||||||
|
dctx = zstd.ZstdDecompressor()
|
||||||
|
data = super().decompress(data)
|
||||||
|
try:
|
||||||
|
return dctx.decompress(data)
|
||||||
|
except zstd.ZstdError as e:
|
||||||
|
raise DecompressionError(str(e)) from None
|
||||||
|
|
||||||
|
|
||||||
class ZLIB(CompressorBase):
|
class ZLIB(CompressorBase):
|
||||||
"""
|
"""
|
||||||
zlib compression / decompression (python stdlib)
|
zlib compression / decompression (python stdlib)
|
||||||
|
@ -289,9 +327,10 @@ COMPRESSOR_TABLE = {
|
||||||
ZLIB.name: ZLIB,
|
ZLIB.name: ZLIB,
|
||||||
LZMA.name: LZMA,
|
LZMA.name: LZMA,
|
||||||
Auto.name: Auto,
|
Auto.name: Auto,
|
||||||
|
ZSTD.name: ZSTD,
|
||||||
}
|
}
|
||||||
# List of possible compression types. Does not include Auto, since it is a meta-Compressor.
|
# List of possible compression types. Does not include Auto, since it is a meta-Compressor.
|
||||||
COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ] # check fast stuff first
|
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ] # check fast stuff first
|
||||||
|
|
||||||
def get_compressor(name, **kwargs):
|
def get_compressor(name, **kwargs):
|
||||||
cls = COMPRESSOR_TABLE[name]
|
cls = COMPRESSOR_TABLE[name]
|
||||||
|
@ -344,6 +383,16 @@ class CompressionSpec:
|
||||||
else:
|
else:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
self.level = level
|
self.level = level
|
||||||
|
elif self.name in ('zstd', ):
|
||||||
|
if count < 2:
|
||||||
|
level = 3 # default compression level in zstd
|
||||||
|
elif count == 2:
|
||||||
|
level = int(values[1])
|
||||||
|
if not 1 <= level <= 22:
|
||||||
|
raise ValueError
|
||||||
|
else:
|
||||||
|
raise ValueError
|
||||||
|
self.level = level
|
||||||
elif self.name == 'auto':
|
elif self.name == 'auto':
|
||||||
if 2 <= count <= 3:
|
if 2 <= count <= 3:
|
||||||
compression = ','.join(values[1:])
|
compression = ','.join(values[1:])
|
||||||
|
@ -357,7 +406,7 @@ class CompressionSpec:
|
||||||
def compressor(self):
|
def compressor(self):
|
||||||
if self.name in ('none', 'lz4', ):
|
if self.name in ('none', 'lz4', ):
|
||||||
return get_compressor(self.name)
|
return get_compressor(self.name)
|
||||||
elif self.name in ('zlib', 'lzma', ):
|
elif self.name in ('zlib', 'lzma', 'zstd', ):
|
||||||
return get_compressor(self.name, level=self.level)
|
return get_compressor(self.name, level=self.level)
|
||||||
elif self.name == 'auto':
|
elif self.name == 'auto':
|
||||||
return get_compressor(self.name, compressor=self.inner.compressor)
|
return get_compressor(self.name, compressor=self.inner.compressor)
|
||||||
|
|
|
@ -135,7 +135,7 @@ def check_extension_modules():
|
||||||
raise ExtensionModuleError
|
raise ExtensionModuleError
|
||||||
if chunker.API_VERSION != '1.1_01':
|
if chunker.API_VERSION != '1.1_01':
|
||||||
raise ExtensionModuleError
|
raise ExtensionModuleError
|
||||||
if compress.API_VERSION != '1.1_03':
|
if compress.API_VERSION != '1.1_04':
|
||||||
raise ExtensionModuleError
|
raise ExtensionModuleError
|
||||||
if borg.crypto.low_level.API_VERSION != '1.1_02':
|
if borg.crypto.low_level.API_VERSION != '1.1_02':
|
||||||
raise ExtensionModuleError
|
raise ExtensionModuleError
|
||||||
|
|
|
@ -5,9 +5,14 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
lzma = None
|
lzma = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import zstd
|
||||||
|
except ImportError:
|
||||||
|
zstd = None
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, Auto
|
from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, ZSTD, Auto
|
||||||
|
|
||||||
|
|
||||||
buffer = bytes(2**16)
|
buffer = bytes(2**16)
|
||||||
|
@ -69,6 +74,16 @@ def test_lzma():
|
||||||
assert data == Compressor(**params).decompress(cdata) # autodetect
|
assert data == Compressor(**params).decompress(cdata) # autodetect
|
||||||
|
|
||||||
|
|
||||||
|
def test_zstd():
|
||||||
|
if zstd is None:
|
||||||
|
pytest.skip("No zstd support found.")
|
||||||
|
c = get_compressor(name='zstd')
|
||||||
|
cdata = c.compress(data)
|
||||||
|
assert len(cdata) < len(data)
|
||||||
|
assert data == c.decompress(cdata)
|
||||||
|
assert data == Compressor(**params).decompress(cdata) # autodetect
|
||||||
|
|
||||||
|
|
||||||
def test_autodetect_invalid():
|
def test_autodetect_invalid():
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
Compressor(**params).decompress(b'\xff\xfftotalcrap')
|
Compressor(**params).decompress(b'\xff\xfftotalcrap')
|
||||||
|
@ -104,6 +119,12 @@ def test_compressor():
|
||||||
dict(name='lzma', level=6),
|
dict(name='lzma', level=6),
|
||||||
# we do not test lzma on level 9 because of the huge memory needs
|
# we do not test lzma on level 9 because of the huge memory needs
|
||||||
]
|
]
|
||||||
|
if zstd:
|
||||||
|
params_list += [
|
||||||
|
dict(name='zstd', level=1),
|
||||||
|
dict(name='zstd', level=3),
|
||||||
|
# also avoiding high zstd levels, memory needs unclear
|
||||||
|
]
|
||||||
for params in params_list:
|
for params in params_list:
|
||||||
c = Compressor(**params)
|
c = Compressor(**params)
|
||||||
assert data == c.decompress(c.compress(data))
|
assert data == c.decompress(c.compress(data))
|
||||||
|
@ -154,6 +175,16 @@ def test_compression_specs():
|
||||||
assert isinstance(lzma, LZMA)
|
assert isinstance(lzma, LZMA)
|
||||||
assert lzma.level == 9
|
assert lzma.level == 9
|
||||||
|
|
||||||
|
zstd = CompressionSpec('zstd').compressor
|
||||||
|
assert isinstance(zstd, ZSTD)
|
||||||
|
assert zstd.level == 3
|
||||||
|
zstd = CompressionSpec('zstd,1').compressor
|
||||||
|
assert isinstance(zstd, ZSTD)
|
||||||
|
assert zstd.level == 1
|
||||||
|
zstd = CompressionSpec('zstd,22').compressor
|
||||||
|
assert isinstance(zstd, ZSTD)
|
||||||
|
assert zstd.level == 22
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
CompressionSpec('lzma,9,invalid')
|
CompressionSpec('lzma,9,invalid')
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
|
|
Loading…
Reference in New Issue