1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-24 23:13:25 +00:00

transfer: remove the zlib type bytes hack

hack: see the docstring of ZLIB_legacy class.

New clean ZLIB class that works as every other compressor.

ZLIB ID 0x0500, ZLIB_legacy ID 0x.8..
This commit is contained in:
Thomas Waldmann 2022-05-03 20:51:43 +02:00
parent ba1dbe6111
commit 01f72d15b4
4 changed files with 56 additions and 11 deletions

View file

@ -44,7 +44,7 @@
from .archive import has_link
from .cache import Cache, assert_secure, SecurityManager
from .constants import * # NOQA
from .compress import CompressionSpec
from .compress import CompressionSpec, ZLIB, ZLIB_legacy
from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required
from .crypto.key import RepoKey, KeyfileKey, Blake2RepoKey, Blake2KeyfileKey, FlexiKey
from .crypto.keymanager import KeyManager
@ -351,6 +351,11 @@ def upgrade_item(item):
item.get_size(memorize=True) # if not already present: compute+remember size for items with chunks
return item
def upgrade_compressed_chunk(chunk):
if ZLIB_legacy.detect(chunk):
chunk = ZLIB.ID + chunk # get rid of the attic legacy: prepend separate type bytes for zlib
return chunk
dry_run = args.dry_run
args.consider_checkpoints = True
@ -378,6 +383,7 @@ def upgrade_item(item):
cdata = other_repository.get(chunk_id)
# keep compressed payload same, avoid decompression / recompression
data = other_key.decrypt(chunk_id, cdata, decompress=False)
data = upgrade_compressed_chunk(data)
chunk_entry = cache.add_chunk(chunk_id, data, archive.stats, wait=False,
compress=False, size=size)
cache.repository.async_response(wait=False)

View file

@ -331,14 +331,52 @@ class ZSTD(DecidingCompressor):
return dest[:osize]
class ZLIB(CompressorBase):
class ZLIB(DecidingCompressor):
"""
zlib compression / decompression (python stdlib)
"""
ID = b'\x08\x00' # not used here, see detect()
# avoid all 0x.8.. IDs elsewhere!
ID = b'\x05\x00'
name = 'zlib'
def __init__(self, level=6, **kwargs):
super().__init__(**kwargs)
self.level = level
def _decide(self, data):
"""
Decides what to do with *data*. Returns (compressor, zlib_data).
*zlib_data* is the ZLIB result if *compressor* is ZLIB as well, otherwise it is None.
"""
zlib_data = zlib.compress(data, self.level)
if len(zlib_data) < len(data):
return self, zlib_data
else:
return NONE_COMPRESSOR, None
def decompress(self, data):
data = super().decompress(data)
try:
return zlib.decompress(data)
except zlib.error as e:
raise DecompressionError(str(e)) from None
class ZLIB_legacy(CompressorBase):
"""
zlib compression / decompression (python stdlib)
Note: This is the legacy ZLIB support as used by borg < 1.3.
It still suffers from attic *only* supporting zlib and not having separate
ID bytes to differentiate between differently compressed chunks.
This just works because zlib compressed stuff always starts with 0x.8.. bytes.
Newer borg uses the ZLIB class that has separate ID bytes (as all the other
compressors) and does not need this hack.
"""
ID = b'\x08\x00' # not used here, see detect()
# avoid all 0x.8.. IDs elsewhere!
name = 'zlib_legacy'
@classmethod
def detect(cls, data):
# matches misc. patterns 0x.8.. used by zlib
@ -502,13 +540,14 @@ COMPRESSOR_TABLE = {
CNONE.name: CNONE,
LZ4.name: LZ4,
ZLIB.name: ZLIB,
ZLIB_legacy.name: ZLIB_legacy,
LZMA.name: LZMA,
Auto.name: Auto,
ZSTD.name: ZSTD,
ObfuscateSize.name: ObfuscateSize,
}
# List of possible compression types. Does not include Auto, since it is a meta-Compressor.
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ObfuscateSize, ] # check fast stuff first
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, ZLIB_legacy, LZMA, ObfuscateSize, ] # check fast stuff first
def get_compressor(name, **kwargs):
cls = COMPRESSOR_TABLE[name]
@ -554,7 +593,7 @@ class CompressionSpec:
self.name = values[0]
if self.name in ('none', 'lz4', ):
return
elif self.name in ('zlib', 'lzma', ):
elif self.name in ('zlib', 'lzma', 'zlib_legacy'): # zlib_legacy just for testing
if count < 2:
level = 6 # default compression level in py stdlib
elif count == 2:
@ -597,7 +636,7 @@ class CompressionSpec:
def compressor(self):
if self.name in ('none', 'lz4', ):
return get_compressor(self.name)
elif self.name in ('zlib', 'lzma', 'zstd', ):
elif self.name in ('zlib', 'lzma', 'zstd', 'zlib_legacy'):
return get_compressor(self.name, level=self.level)
elif self.name == 'auto':
return get_compressor(self.name, compressor=self.inner.compressor)

View file

@ -2442,7 +2442,7 @@ def test_compression_none_uncompressible(self):
def test_compression_zlib_compressible(self):
size, csize = self._get_sizes('zlib', compressible=True)
assert csize < size * 0.1
assert csize == 35
assert csize == 37
def test_compression_zlib_uncompressible(self):
size, csize = self._get_sizes('zlib', compressible=False)
@ -2451,7 +2451,7 @@ def test_compression_zlib_uncompressible(self):
def test_compression_auto_compressible(self):
size, csize = self._get_sizes('auto,zlib', compressible=True)
assert csize < size * 0.1
assert csize == 35 # same as compression 'zlib'
assert csize == 37 # same as compression 'zlib'
def test_compression_auto_uncompressible(self):
size, csize = self._get_sizes('auto,zlib', compressible=False)

View file

@ -88,11 +88,11 @@ def test_autodetect_invalid():
Compressor(**params).decompress(b'\x08\x00notreallyzlib')
def test_zlib_compat():
def test_zlib_legacy_compat():
# for compatibility reasons, we do not add an extra header for zlib,
# nor do we expect one when decompressing / autodetecting
for level in range(10):
c = get_compressor(name='zlib', level=level)
c = get_compressor(name='zlib_legacy', level=level)
cdata1 = c.compress(data)
cdata2 = zlib.compress(data, level)
assert cdata1 == cdata2