mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-24 23:13:25 +00:00
transfer: remove the zlib type bytes hack
hack: see the docstring of ZLIB_legacy class. New clean ZLIB class that works as every other compressor. ZLIB ID 0x0500, ZLIB_legacy ID 0x.8..
This commit is contained in:
parent
ba1dbe6111
commit
01f72d15b4
4 changed files with 56 additions and 11 deletions
|
@ -44,7 +44,7 @@
|
|||
from .archive import has_link
|
||||
from .cache import Cache, assert_secure, SecurityManager
|
||||
from .constants import * # NOQA
|
||||
from .compress import CompressionSpec
|
||||
from .compress import CompressionSpec, ZLIB, ZLIB_legacy
|
||||
from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required
|
||||
from .crypto.key import RepoKey, KeyfileKey, Blake2RepoKey, Blake2KeyfileKey, FlexiKey
|
||||
from .crypto.keymanager import KeyManager
|
||||
|
@ -351,6 +351,11 @@ def upgrade_item(item):
|
|||
item.get_size(memorize=True) # if not already present: compute+remember size for items with chunks
|
||||
return item
|
||||
|
||||
def upgrade_compressed_chunk(chunk):
|
||||
if ZLIB_legacy.detect(chunk):
|
||||
chunk = ZLIB.ID + chunk # get rid of the attic legacy: prepend separate type bytes for zlib
|
||||
return chunk
|
||||
|
||||
dry_run = args.dry_run
|
||||
|
||||
args.consider_checkpoints = True
|
||||
|
@ -378,6 +383,7 @@ def upgrade_item(item):
|
|||
cdata = other_repository.get(chunk_id)
|
||||
# keep compressed payload same, avoid decompression / recompression
|
||||
data = other_key.decrypt(chunk_id, cdata, decompress=False)
|
||||
data = upgrade_compressed_chunk(data)
|
||||
chunk_entry = cache.add_chunk(chunk_id, data, archive.stats, wait=False,
|
||||
compress=False, size=size)
|
||||
cache.repository.async_response(wait=False)
|
||||
|
|
|
@ -331,14 +331,52 @@ class ZSTD(DecidingCompressor):
|
|||
return dest[:osize]
|
||||
|
||||
|
||||
class ZLIB(CompressorBase):
|
||||
class ZLIB(DecidingCompressor):
|
||||
"""
|
||||
zlib compression / decompression (python stdlib)
|
||||
"""
|
||||
ID = b'\x08\x00' # not used here, see detect()
|
||||
# avoid all 0x.8.. IDs elsewhere!
|
||||
ID = b'\x05\x00'
|
||||
name = 'zlib'
|
||||
|
||||
def __init__(self, level=6, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.level = level
|
||||
|
||||
def _decide(self, data):
|
||||
"""
|
||||
Decides what to do with *data*. Returns (compressor, zlib_data).
|
||||
|
||||
*zlib_data* is the ZLIB result if *compressor* is ZLIB as well, otherwise it is None.
|
||||
"""
|
||||
zlib_data = zlib.compress(data, self.level)
|
||||
if len(zlib_data) < len(data):
|
||||
return self, zlib_data
|
||||
else:
|
||||
return NONE_COMPRESSOR, None
|
||||
|
||||
def decompress(self, data):
|
||||
data = super().decompress(data)
|
||||
try:
|
||||
return zlib.decompress(data)
|
||||
except zlib.error as e:
|
||||
raise DecompressionError(str(e)) from None
|
||||
|
||||
|
||||
class ZLIB_legacy(CompressorBase):
|
||||
"""
|
||||
zlib compression / decompression (python stdlib)
|
||||
|
||||
Note: This is the legacy ZLIB support as used by borg < 1.3.
|
||||
It still suffers from attic *only* supporting zlib and not having separate
|
||||
ID bytes to differentiate between differently compressed chunks.
|
||||
This just works because zlib compressed stuff always starts with 0x.8.. bytes.
|
||||
Newer borg uses the ZLIB class that has separate ID bytes (as all the other
|
||||
compressors) and does not need this hack.
|
||||
"""
|
||||
ID = b'\x08\x00' # not used here, see detect()
|
||||
# avoid all 0x.8.. IDs elsewhere!
|
||||
name = 'zlib_legacy'
|
||||
|
||||
@classmethod
|
||||
def detect(cls, data):
|
||||
# matches misc. patterns 0x.8.. used by zlib
|
||||
|
@ -502,13 +540,14 @@ COMPRESSOR_TABLE = {
|
|||
CNONE.name: CNONE,
|
||||
LZ4.name: LZ4,
|
||||
ZLIB.name: ZLIB,
|
||||
ZLIB_legacy.name: ZLIB_legacy,
|
||||
LZMA.name: LZMA,
|
||||
Auto.name: Auto,
|
||||
ZSTD.name: ZSTD,
|
||||
ObfuscateSize.name: ObfuscateSize,
|
||||
}
|
||||
# List of possible compression types. Does not include Auto, since it is a meta-Compressor.
|
||||
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ObfuscateSize, ] # check fast stuff first
|
||||
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, ZLIB_legacy, LZMA, ObfuscateSize, ] # check fast stuff first
|
||||
|
||||
def get_compressor(name, **kwargs):
|
||||
cls = COMPRESSOR_TABLE[name]
|
||||
|
@ -554,7 +593,7 @@ class CompressionSpec:
|
|||
self.name = values[0]
|
||||
if self.name in ('none', 'lz4', ):
|
||||
return
|
||||
elif self.name in ('zlib', 'lzma', ):
|
||||
elif self.name in ('zlib', 'lzma', 'zlib_legacy'): # zlib_legacy just for testing
|
||||
if count < 2:
|
||||
level = 6 # default compression level in py stdlib
|
||||
elif count == 2:
|
||||
|
@ -597,7 +636,7 @@ class CompressionSpec:
|
|||
def compressor(self):
|
||||
if self.name in ('none', 'lz4', ):
|
||||
return get_compressor(self.name)
|
||||
elif self.name in ('zlib', 'lzma', 'zstd', ):
|
||||
elif self.name in ('zlib', 'lzma', 'zstd', 'zlib_legacy'):
|
||||
return get_compressor(self.name, level=self.level)
|
||||
elif self.name == 'auto':
|
||||
return get_compressor(self.name, compressor=self.inner.compressor)
|
||||
|
|
|
@ -2442,7 +2442,7 @@ def test_compression_none_uncompressible(self):
|
|||
def test_compression_zlib_compressible(self):
|
||||
size, csize = self._get_sizes('zlib', compressible=True)
|
||||
assert csize < size * 0.1
|
||||
assert csize == 35
|
||||
assert csize == 37
|
||||
|
||||
def test_compression_zlib_uncompressible(self):
|
||||
size, csize = self._get_sizes('zlib', compressible=False)
|
||||
|
@ -2451,7 +2451,7 @@ def test_compression_zlib_uncompressible(self):
|
|||
def test_compression_auto_compressible(self):
|
||||
size, csize = self._get_sizes('auto,zlib', compressible=True)
|
||||
assert csize < size * 0.1
|
||||
assert csize == 35 # same as compression 'zlib'
|
||||
assert csize == 37 # same as compression 'zlib'
|
||||
|
||||
def test_compression_auto_uncompressible(self):
|
||||
size, csize = self._get_sizes('auto,zlib', compressible=False)
|
||||
|
|
|
@ -88,11 +88,11 @@ def test_autodetect_invalid():
|
|||
Compressor(**params).decompress(b'\x08\x00notreallyzlib')
|
||||
|
||||
|
||||
def test_zlib_compat():
|
||||
def test_zlib_legacy_compat():
|
||||
# for compatibility reasons, we do not add an extra header for zlib,
|
||||
# nor do we expect one when decompressing / autodetecting
|
||||
for level in range(10):
|
||||
c = get_compressor(name='zlib', level=level)
|
||||
c = get_compressor(name='zlib_legacy', level=level)
|
||||
cdata1 = c.compress(data)
|
||||
cdata2 = zlib.compress(data, level)
|
||||
assert cdata1 == cdata2
|
||||
|
|
Loading…
Reference in a new issue