Merge pull request #3085 from ThomasWaldmann/compressed-not-larger

auto compression: make sure expensive compression is actually better
This commit is contained in:
TW 2017-10-07 13:52:38 +02:00 committed by GitHub
commit 5436a253b0
2 changed files with 29 additions and 8 deletions

View File

@ -246,7 +246,7 @@ class Auto(CompressorBase):
lz4_data = self.lz4.compress(data)
ratio = len(lz4_data) / len(data)
if ratio < 0.97:
return self.compressor, None
return self.compressor, lz4_data
elif ratio < 1:
return self.lz4, lz4_data
else:
@ -257,9 +257,24 @@ class Auto(CompressorBase):
def compress(self, data):
compressor, lz4_data = self._decide(data)
if lz4_data is None:
return compressor.compress(data)
if compressor is self.lz4:
# we know that trying to compress with expensive compressor is likely pointless,
# but lz4 managed to at least squeeze the data a bit.
return lz4_data
if compressor is self.none:
# we know that trying to compress with expensive compressor is likely pointless
# and also lz4 did not manage to squeeze the data (not even a bit).
uncompressed_data = compressor.compress(data)
return uncompressed_data
# if we get here, the decider decided to try the expensive compressor.
# we also know that lz4_data is smaller than uncompressed data.
exp_compressed_data = compressor.compress(data)
ratio = len(exp_compressed_data) / len(lz4_data)
if ratio < 0.99:
# the expensive compressor managed to squeeze the data significantly better than lz4.
return exp_compressed_data
else:
# otherwise let's just store the lz4 data, which decompresses extremely fast.
return lz4_data
def decompress(self, data):

View File

@ -110,12 +110,18 @@ def test_compressor():
def test_auto():
compressor = CompressionSpec('auto,zlib,9').compressor
compressor_auto_zlib = CompressionSpec('auto,zlib,9').compressor
compressor_lz4 = CompressionSpec('lz4').compressor
compressor_zlib = CompressionSpec('zlib,9').compressor
data = bytes(500)
compressed_auto_zlib = compressor_auto_zlib.compress(data)
compressed_lz4 = compressor_lz4.compress(data)
compressed_zlib = compressor_zlib.compress(data)
ratio = len(compressed_zlib) / len(compressed_lz4)
assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4
compressed = compressor.compress(bytes(500))
assert Compressor.detect(compressed) == ZLIB
compressed = compressor.compress(b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~')
data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~'
compressed = compressor_auto_zlib.compress(data)
assert Compressor.detect(compressed) == CNONE