transfer: remove the zlib type bytes hack

hack: see the docstring of ZLIB_legacy class. New clean ZLIB class that works as every other compressor. ZLIB ID 0x0500, ZLIB_legacy ID 0x.8..
2025-02-24 23:13:25 +00:00 · 2022-05-03 20:51:43 +02:00 · 2022-05-03 20:51:43 +02:00 · 01f72d15b4
commit 01f72d15b4
parent ba1dbe6111
4 changed files with 56 additions and 11 deletions
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@ -44,7 +44,7 @@
    from .archive import has_link
    from .cache import Cache, assert_secure, SecurityManager
    from .constants import *  # NOQA
-    from .compress import CompressionSpec
+    from .compress import CompressionSpec, ZLIB, ZLIB_legacy
    from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required
    from .crypto.key import RepoKey, KeyfileKey, Blake2RepoKey, Blake2KeyfileKey, FlexiKey
    from .crypto.keymanager import KeyManager
@ -351,6 +351,11 @@ def upgrade_item(item):
            item.get_size(memorize=True)  # if not already present: compute+remember size for items with chunks
            return item

+        def upgrade_compressed_chunk(chunk):
+            if ZLIB_legacy.detect(chunk):
+                chunk = ZLIB.ID + chunk  # get rid of the attic legacy: prepend separate type bytes for zlib
+            return chunk
+
        dry_run = args.dry_run

        args.consider_checkpoints = True
@ -378,6 +383,7 @@ def upgrade_item(item):
                                    cdata = other_repository.get(chunk_id)
                                    # keep compressed payload same, avoid decompression / recompression
                                    data = other_key.decrypt(chunk_id, cdata, decompress=False)
+                                    data = upgrade_compressed_chunk(data)
                                    chunk_entry = cache.add_chunk(chunk_id, data, archive.stats, wait=False,
                                                                  compress=False, size=size)
                                    cache.repository.async_response(wait=False)
--- a/src/borg/compress.pyx
+++ b/src/borg/compress.pyx
@ -331,14 +331,52 @@ class ZSTD(DecidingCompressor):
        return dest[:osize]


-class ZLIB(CompressorBase):
+class ZLIB(DecidingCompressor):
    """
    zlib compression / decompression (python stdlib)
    """
-    ID = b'\x08\x00'  # not used here, see detect()
-                      # avoid all 0x.8.. IDs elsewhere!
+    ID = b'\x05\x00'
    name = 'zlib'

+    def __init__(self, level=6, **kwargs):
+        super().__init__(**kwargs)
+        self.level = level
+
+    def _decide(self, data):
+        """
+        Decides what to do with *data*. Returns (compressor, zlib_data).
+
+        *zlib_data* is the ZLIB result if *compressor* is ZLIB as well, otherwise it is None.
+        """
+        zlib_data = zlib.compress(data, self.level)
+        if len(zlib_data) < len(data):
+            return self, zlib_data
+        else:
+            return NONE_COMPRESSOR, None
+
+    def decompress(self, data):
+        data = super().decompress(data)
+        try:
+            return zlib.decompress(data)
+        except zlib.error as e:
+            raise DecompressionError(str(e)) from None
+
+
+class ZLIB_legacy(CompressorBase):
+    """
+    zlib compression / decompression (python stdlib)
+
+    Note: This is the legacy ZLIB support as used by borg < 1.3.
+          It still suffers from attic *only* supporting zlib and not having separate
+          ID bytes to differentiate between differently compressed chunks.
+          This just works because zlib compressed stuff always starts with 0x.8.. bytes.
+          Newer borg uses the ZLIB class that has separate ID bytes (as all the other
+          compressors) and does not need this hack.
+    """
+    ID = b'\x08\x00'  # not used here, see detect()
+    # avoid all 0x.8.. IDs elsewhere!
+    name = 'zlib_legacy'
+
    @classmethod
    def detect(cls, data):
        # matches misc. patterns 0x.8.. used by zlib
@ -502,13 +540,14 @@ COMPRESSOR_TABLE = {
    CNONE.name: CNONE,
    LZ4.name: LZ4,
    ZLIB.name: ZLIB,
+    ZLIB_legacy.name: ZLIB_legacy,
    LZMA.name: LZMA,
    Auto.name: Auto,
    ZSTD.name: ZSTD,
    ObfuscateSize.name: ObfuscateSize,
 }
 # List of possible compression types. Does not include Auto, since it is a meta-Compressor.
-COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ObfuscateSize, ]  # check fast stuff first
+COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, ZLIB_legacy, LZMA, ObfuscateSize, ]  # check fast stuff first

 def get_compressor(name, **kwargs):
    cls = COMPRESSOR_TABLE[name]
@ -554,7 +593,7 @@ class CompressionSpec:
        self.name = values[0]
        if self.name in ('none', 'lz4', ):
            return
-        elif self.name in ('zlib', 'lzma', ):
+        elif self.name in ('zlib', 'lzma', 'zlib_legacy'):  # zlib_legacy just for testing
            if count < 2:
                level = 6  # default compression level in py stdlib
            elif count == 2:
@ -597,7 +636,7 @@ class CompressionSpec:
    def compressor(self):
        if self.name in ('none', 'lz4', ):
            return get_compressor(self.name)
-        elif self.name in ('zlib', 'lzma', 'zstd', ):
+        elif self.name in ('zlib', 'lzma', 'zstd', 'zlib_legacy'):
            return get_compressor(self.name, level=self.level)
        elif self.name == 'auto':
            return get_compressor(self.name, compressor=self.inner.compressor)
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@ -2442,7 +2442,7 @@ def test_compression_none_uncompressible(self):
    def test_compression_zlib_compressible(self):
        size, csize = self._get_sizes('zlib', compressible=True)
        assert csize < size * 0.1
-        assert csize == 35
+        assert csize == 37

    def test_compression_zlib_uncompressible(self):
        size, csize = self._get_sizes('zlib', compressible=False)
@ -2451,7 +2451,7 @@ def test_compression_zlib_uncompressible(self):
    def test_compression_auto_compressible(self):
        size, csize = self._get_sizes('auto,zlib', compressible=True)
        assert csize < size * 0.1
-        assert csize == 35  # same as compression 'zlib'
+        assert csize == 37  # same as compression 'zlib'

    def test_compression_auto_uncompressible(self):
        size, csize = self._get_sizes('auto,zlib', compressible=False)
--- a/src/borg/testsuite/compress.py
+++ b/src/borg/testsuite/compress.py
@ -88,11 +88,11 @@ def test_autodetect_invalid():
        Compressor(**params).decompress(b'\x08\x00notreallyzlib')


-def test_zlib_compat():
+def test_zlib_legacy_compat():
    # for compatibility reasons, we do not add an extra header for zlib,
    # nor do we expect one when decompressing / autodetecting
    for level in range(10):
-        c = get_compressor(name='zlib', level=level)
+        c = get_compressor(name='zlib_legacy', level=level)
        cdata1 = c.compress(data)
        cdata2 = zlib.compress(data, level)
        assert cdata1 == cdata2