1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-31 19:52:22 +00:00

auto compression: make sure expensive compression is actually better

if it is not significantly better compressed, we just store lz4
compressed data (which we already have computed anyway), because
that at least decompressed super fast.

(cherry picked from commit 011e0fd3fa)
This commit is contained in:
Thomas Waldmann 2017-10-03 21:11:43 +02:00
parent ce1d3ec7e4
commit 5e3758fc7a
2 changed files with 29 additions and 8 deletions

View file

@ -244,7 +244,7 @@ class Auto(CompressorBase):
lz4_data = self.lz4.compress(data)
ratio = len(lz4_data) / len(data)
if ratio < 0.97:
return self.compressor, None
return self.compressor, lz4_data
elif ratio < 1:
return self.lz4, lz4_data
else:
@ -255,9 +255,24 @@ class Auto(CompressorBase):
def compress(self, data):
compressor, lz4_data = self._decide(data)
if lz4_data is None:
return compressor.compress(data)
if compressor is self.lz4:
# we know that trying to compress with expensive compressor is likely pointless,
# but lz4 managed to at least squeeze the data a bit.
return lz4_data
if compressor is self.none:
# we know that trying to compress with expensive compressor is likely pointless
# and also lz4 did not manage to squeeze the data (not even a bit).
uncompressed_data = compressor.compress(data)
return uncompressed_data
# if we get here, the decider decided to try the expensive compressor.
# we also know that lz4_data is smaller than uncompressed data.
exp_compressed_data = compressor.compress(data)
ratio = len(exp_compressed_data) / len(lz4_data)
if ratio < 0.99:
# the expensive compressor managed to squeeze the data significantly better than lz4.
return exp_compressed_data
else:
# otherwise let's just store the lz4 data, which decompresses extremely fast.
return lz4_data
def decompress(self, data):

View file

@ -110,12 +110,18 @@ def test_compressor():
def test_auto():
compressor = CompressionSpec('auto,zlib,9').compressor
compressor_auto_zlib = CompressionSpec('auto,zlib,9').compressor
compressor_lz4 = CompressionSpec('lz4').compressor
compressor_zlib = CompressionSpec('zlib,9').compressor
data = bytes(500)
compressed_auto_zlib = compressor_auto_zlib.compress(data)
compressed_lz4 = compressor_lz4.compress(data)
compressed_zlib = compressor_zlib.compress(data)
ratio = len(compressed_zlib) / len(compressed_lz4)
assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4
compressed = compressor.compress(bytes(500))
assert Compressor.detect(compressed) == ZLIB
compressed = compressor.compress(b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~')
data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~'
compressed = compressor_auto_zlib.compress(data)
assert Compressor.detect(compressed) == CNONE