diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 6c1d4c31f..8e509213e 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -244,7 +244,7 @@ class Auto(CompressorBase): lz4_data = self.lz4.compress(data) ratio = len(lz4_data) / len(data) if ratio < 0.97: - return self.compressor, None + return self.compressor, lz4_data elif ratio < 1: return self.lz4, lz4_data else: @@ -255,9 +255,24 @@ class Auto(CompressorBase): def compress(self, data): compressor, lz4_data = self._decide(data) - if lz4_data is None: - return compressor.compress(data) + if compressor is self.lz4: + # we know that trying to compress with expensive compressor is likely pointless, + # but lz4 managed to at least squeeze the data a bit. + return lz4_data + if compressor is self.none: + # we know that trying to compress with expensive compressor is likely pointless + # and also lz4 did not manage to squeeze the data (not even a bit). + uncompressed_data = compressor.compress(data) + return uncompressed_data + # if we get here, the decider decided to try the expensive compressor. + # we also know that lz4_data is smaller than uncompressed data. + exp_compressed_data = compressor.compress(data) + ratio = len(exp_compressed_data) / len(lz4_data) + if ratio < 0.99: + # the expensive compressor managed to squeeze the data significantly better than lz4. + return exp_compressed_data else: + # otherwise let's just store the lz4 data, which decompresses extremely fast. return lz4_data def decompress(self, data): diff --git a/src/borg/testsuite/compress.py b/src/borg/testsuite/compress.py index ee6da55a1..f881ad2c7 100644 --- a/src/borg/testsuite/compress.py +++ b/src/borg/testsuite/compress.py @@ -110,12 +110,18 @@ def test_compressor(): def test_auto(): - compressor = CompressionSpec('auto,zlib,9').compressor + compressor_auto_zlib = CompressionSpec('auto,zlib,9').compressor + compressor_lz4 = CompressionSpec('lz4').compressor + compressor_zlib = CompressionSpec('zlib,9').compressor + data = bytes(500) + compressed_auto_zlib = compressor_auto_zlib.compress(data) + compressed_lz4 = compressor_lz4.compress(data) + compressed_zlib = compressor_zlib.compress(data) + ratio = len(compressed_zlib) / len(compressed_lz4) + assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4 - compressed = compressor.compress(bytes(500)) - assert Compressor.detect(compressed) == ZLIB - - compressed = compressor.compress(b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~') + data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~' + compressed = compressor_auto_zlib.compress(data) assert Compressor.detect(compressed) == CNONE