mirror of
https://github.com/borgbackup/borg.git
synced 2025-03-19 18:36:07 +00:00
auto compression: make sure expensive compression is actually better
if it is not significantly better compressed, we just store lz4 compressed data (which we already have computed anyway), because that at least decompressed super fast.
This commit is contained in:
parent
35c042b97b
commit
011e0fd3fa
2 changed files with 29 additions and 8 deletions
|
@ -246,7 +246,7 @@ class Auto(CompressorBase):
|
||||||
lz4_data = self.lz4.compress(data)
|
lz4_data = self.lz4.compress(data)
|
||||||
ratio = len(lz4_data) / len(data)
|
ratio = len(lz4_data) / len(data)
|
||||||
if ratio < 0.97:
|
if ratio < 0.97:
|
||||||
return self.compressor, None
|
return self.compressor, lz4_data
|
||||||
elif ratio < 1:
|
elif ratio < 1:
|
||||||
return self.lz4, lz4_data
|
return self.lz4, lz4_data
|
||||||
else:
|
else:
|
||||||
|
@ -257,9 +257,24 @@ class Auto(CompressorBase):
|
||||||
|
|
||||||
def compress(self, data):
|
def compress(self, data):
|
||||||
compressor, lz4_data = self._decide(data)
|
compressor, lz4_data = self._decide(data)
|
||||||
if lz4_data is None:
|
if compressor is self.lz4:
|
||||||
return compressor.compress(data)
|
# we know that trying to compress with expensive compressor is likely pointless,
|
||||||
|
# but lz4 managed to at least squeeze the data a bit.
|
||||||
|
return lz4_data
|
||||||
|
if compressor is self.none:
|
||||||
|
# we know that trying to compress with expensive compressor is likely pointless
|
||||||
|
# and also lz4 did not manage to squeeze the data (not even a bit).
|
||||||
|
uncompressed_data = compressor.compress(data)
|
||||||
|
return uncompressed_data
|
||||||
|
# if we get here, the decider decided to try the expensive compressor.
|
||||||
|
# we also know that lz4_data is smaller than uncompressed data.
|
||||||
|
exp_compressed_data = compressor.compress(data)
|
||||||
|
ratio = len(exp_compressed_data) / len(lz4_data)
|
||||||
|
if ratio < 0.99:
|
||||||
|
# the expensive compressor managed to squeeze the data significantly better than lz4.
|
||||||
|
return exp_compressed_data
|
||||||
else:
|
else:
|
||||||
|
# otherwise let's just store the lz4 data, which decompresses extremely fast.
|
||||||
return lz4_data
|
return lz4_data
|
||||||
|
|
||||||
def decompress(self, data):
|
def decompress(self, data):
|
||||||
|
|
|
@ -110,12 +110,18 @@ def test_compressor():
|
||||||
|
|
||||||
|
|
||||||
def test_auto():
|
def test_auto():
|
||||||
compressor = CompressionSpec('auto,zlib,9').compressor
|
compressor_auto_zlib = CompressionSpec('auto,zlib,9').compressor
|
||||||
|
compressor_lz4 = CompressionSpec('lz4').compressor
|
||||||
|
compressor_zlib = CompressionSpec('zlib,9').compressor
|
||||||
|
data = bytes(500)
|
||||||
|
compressed_auto_zlib = compressor_auto_zlib.compress(data)
|
||||||
|
compressed_lz4 = compressor_lz4.compress(data)
|
||||||
|
compressed_zlib = compressor_zlib.compress(data)
|
||||||
|
ratio = len(compressed_zlib) / len(compressed_lz4)
|
||||||
|
assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4
|
||||||
|
|
||||||
compressed = compressor.compress(bytes(500))
|
data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~'
|
||||||
assert Compressor.detect(compressed) == ZLIB
|
compressed = compressor_auto_zlib.compress(data)
|
||||||
|
|
||||||
compressed = compressor.compress(b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~')
|
|
||||||
assert Compressor.detect(compressed) == CNONE
|
assert Compressor.detect(compressed) == CNONE
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue