From 5e3758fc7a78bf8e90b9385a202b625c71fe7f37 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 Oct 2017 21:11:43 +0200 Subject: [PATCH] auto compression: make sure expensive compression is actually better if it is not significantly better compressed, we just store lz4 compressed data (which we already have computed anyway), because that at least decompressed super fast. (cherry picked from commit 011e0fd3faf2730681a17403e8fd575bd3ea4b08) --- src/borg/compress.pyx | 21 ++++++++++++++++++--- src/borg/testsuite/compress.py | 16 +++++++++++----- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 6c1d4c31f..8e509213e 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -244,7 +244,7 @@ class Auto(CompressorBase): lz4_data = self.lz4.compress(data) ratio = len(lz4_data) / len(data) if ratio < 0.97: - return self.compressor, None + return self.compressor, lz4_data elif ratio < 1: return self.lz4, lz4_data else: @@ -255,9 +255,24 @@ class Auto(CompressorBase): def compress(self, data): compressor, lz4_data = self._decide(data) - if lz4_data is None: - return compressor.compress(data) + if compressor is self.lz4: + # we know that trying to compress with expensive compressor is likely pointless, + # but lz4 managed to at least squeeze the data a bit. + return lz4_data + if compressor is self.none: + # we know that trying to compress with expensive compressor is likely pointless + # and also lz4 did not manage to squeeze the data (not even a bit). + uncompressed_data = compressor.compress(data) + return uncompressed_data + # if we get here, the decider decided to try the expensive compressor. + # we also know that lz4_data is smaller than uncompressed data. + exp_compressed_data = compressor.compress(data) + ratio = len(exp_compressed_data) / len(lz4_data) + if ratio < 0.99: + # the expensive compressor managed to squeeze the data significantly better than lz4. + return exp_compressed_data else: + # otherwise let's just store the lz4 data, which decompresses extremely fast. return lz4_data def decompress(self, data): diff --git a/src/borg/testsuite/compress.py b/src/borg/testsuite/compress.py index ee6da55a1..f881ad2c7 100644 --- a/src/borg/testsuite/compress.py +++ b/src/borg/testsuite/compress.py @@ -110,12 +110,18 @@ def test_compressor(): def test_auto(): - compressor = CompressionSpec('auto,zlib,9').compressor + compressor_auto_zlib = CompressionSpec('auto,zlib,9').compressor + compressor_lz4 = CompressionSpec('lz4').compressor + compressor_zlib = CompressionSpec('zlib,9').compressor + data = bytes(500) + compressed_auto_zlib = compressor_auto_zlib.compress(data) + compressed_lz4 = compressor_lz4.compress(data) + compressed_zlib = compressor_zlib.compress(data) + ratio = len(compressed_zlib) / len(compressed_lz4) + assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4 - compressed = compressor.compress(bytes(500)) - assert Compressor.detect(compressed) == ZLIB - - compressed = compressor.compress(b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~') + data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~' + compressed = compressor_auto_zlib.compress(data) assert Compressor.detect(compressed) == CNONE