Merge pull request #3100 from ThomasWaldmann/compressed-not-larger-1.1

auto compression: make sure expensive compression is actually better
This commit is contained in:
TW 2017-10-07 16:31:44 +02:00 committed by GitHub
commit 5ba34634da
2 changed files with 29 additions and 8 deletions

View File

@ -244,7 +244,7 @@ class Auto(CompressorBase):
lz4_data = self.lz4.compress(data) lz4_data = self.lz4.compress(data)
ratio = len(lz4_data) / len(data) ratio = len(lz4_data) / len(data)
if ratio < 0.97: if ratio < 0.97:
return self.compressor, None return self.compressor, lz4_data
elif ratio < 1: elif ratio < 1:
return self.lz4, lz4_data return self.lz4, lz4_data
else: else:
@ -255,9 +255,24 @@ class Auto(CompressorBase):
def compress(self, data): def compress(self, data):
compressor, lz4_data = self._decide(data) compressor, lz4_data = self._decide(data)
if lz4_data is None: if compressor is self.lz4:
return compressor.compress(data) # we know that trying to compress with expensive compressor is likely pointless,
# but lz4 managed to at least squeeze the data a bit.
return lz4_data
if compressor is self.none:
# we know that trying to compress with expensive compressor is likely pointless
# and also lz4 did not manage to squeeze the data (not even a bit).
uncompressed_data = compressor.compress(data)
return uncompressed_data
# if we get here, the decider decided to try the expensive compressor.
# we also know that lz4_data is smaller than uncompressed data.
exp_compressed_data = compressor.compress(data)
ratio = len(exp_compressed_data) / len(lz4_data)
if ratio < 0.99:
# the expensive compressor managed to squeeze the data significantly better than lz4.
return exp_compressed_data
else: else:
# otherwise let's just store the lz4 data, which decompresses extremely fast.
return lz4_data return lz4_data
def decompress(self, data): def decompress(self, data):

View File

@ -110,12 +110,18 @@ def test_compressor():
def test_auto(): def test_auto():
compressor = CompressionSpec('auto,zlib,9').compressor compressor_auto_zlib = CompressionSpec('auto,zlib,9').compressor
compressor_lz4 = CompressionSpec('lz4').compressor
compressor_zlib = CompressionSpec('zlib,9').compressor
data = bytes(500)
compressed_auto_zlib = compressor_auto_zlib.compress(data)
compressed_lz4 = compressor_lz4.compress(data)
compressed_zlib = compressor_zlib.compress(data)
ratio = len(compressed_zlib) / len(compressed_lz4)
assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4
compressed = compressor.compress(bytes(500)) data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~'
assert Compressor.detect(compressed) == ZLIB compressed = compressor_auto_zlib.compress(data)
compressed = compressor.compress(b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~')
assert Compressor.detect(compressed) == CNONE assert Compressor.detect(compressed) == CNONE