mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-31 19:52:22 +00:00
auto compression: make sure expensive compression is actually better
if it is not significantly better compressed, we just store lz4
compressed data (which we already have computed anyway), because
that at least decompressed super fast.
(cherry picked from commit 011e0fd3fa
)
This commit is contained in:
parent
ce1d3ec7e4
commit
5e3758fc7a
2 changed files with 29 additions and 8 deletions
|
@ -244,7 +244,7 @@ class Auto(CompressorBase):
|
|||
lz4_data = self.lz4.compress(data)
|
||||
ratio = len(lz4_data) / len(data)
|
||||
if ratio < 0.97:
|
||||
return self.compressor, None
|
||||
return self.compressor, lz4_data
|
||||
elif ratio < 1:
|
||||
return self.lz4, lz4_data
|
||||
else:
|
||||
|
@ -255,9 +255,24 @@ class Auto(CompressorBase):
|
|||
|
||||
def compress(self, data):
|
||||
compressor, lz4_data = self._decide(data)
|
||||
if lz4_data is None:
|
||||
return compressor.compress(data)
|
||||
if compressor is self.lz4:
|
||||
# we know that trying to compress with expensive compressor is likely pointless,
|
||||
# but lz4 managed to at least squeeze the data a bit.
|
||||
return lz4_data
|
||||
if compressor is self.none:
|
||||
# we know that trying to compress with expensive compressor is likely pointless
|
||||
# and also lz4 did not manage to squeeze the data (not even a bit).
|
||||
uncompressed_data = compressor.compress(data)
|
||||
return uncompressed_data
|
||||
# if we get here, the decider decided to try the expensive compressor.
|
||||
# we also know that lz4_data is smaller than uncompressed data.
|
||||
exp_compressed_data = compressor.compress(data)
|
||||
ratio = len(exp_compressed_data) / len(lz4_data)
|
||||
if ratio < 0.99:
|
||||
# the expensive compressor managed to squeeze the data significantly better than lz4.
|
||||
return exp_compressed_data
|
||||
else:
|
||||
# otherwise let's just store the lz4 data, which decompresses extremely fast.
|
||||
return lz4_data
|
||||
|
||||
def decompress(self, data):
|
||||
|
|
|
@ -110,12 +110,18 @@ def test_compressor():
|
|||
|
||||
|
||||
def test_auto():
|
||||
compressor = CompressionSpec('auto,zlib,9').compressor
|
||||
compressor_auto_zlib = CompressionSpec('auto,zlib,9').compressor
|
||||
compressor_lz4 = CompressionSpec('lz4').compressor
|
||||
compressor_zlib = CompressionSpec('zlib,9').compressor
|
||||
data = bytes(500)
|
||||
compressed_auto_zlib = compressor_auto_zlib.compress(data)
|
||||
compressed_lz4 = compressor_lz4.compress(data)
|
||||
compressed_zlib = compressor_zlib.compress(data)
|
||||
ratio = len(compressed_zlib) / len(compressed_lz4)
|
||||
assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4
|
||||
|
||||
compressed = compressor.compress(bytes(500))
|
||||
assert Compressor.detect(compressed) == ZLIB
|
||||
|
||||
compressed = compressor.compress(b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~')
|
||||
data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~'
|
||||
compressed = compressor_auto_zlib.compress(data)
|
||||
assert Compressor.detect(compressed) == CNONE
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue