compression: use the 2 bytes for type and level, fixes #6698

adapt borg transfer, transferred chunks are set to compression level "unknown".
This commit is contained in:
Thomas Waldmann 2022-05-17 22:54:12 +02:00
parent 32a3601e4a
commit 6584a92c81
3 changed files with 42 additions and 29 deletions

View File

@ -379,8 +379,13 @@ class Archiver:
return new_item
def upgrade_compressed_chunk(chunk):
level = b'\xFF' # FF means unknown compression level
if ZLIB_legacy.detect(chunk):
chunk = ZLIB.ID + chunk # get rid of the attic legacy: prepend separate type bytes for zlib
ctype = ZLIB.ID
chunk = ctype + level + chunk # get rid of the attic legacy: prepend separate type/level bytes
else:
ctype = chunk[0:1]
chunk = ctype + level + chunk[2:] # keep type same, but set level
return chunk
dry_run = args.dry_run

View File

@ -56,16 +56,21 @@ cdef class CompressorBase:
also handles compression format auto detection and
adding/stripping the ID header (which enable auto detection).
"""
ID = b'\xFF\xFF' # reserved and not used
# overwrite with a unique 2-bytes bytestring in child classes
ID = b'\xFF' # reserved and not used
# overwrite with a unique 1-byte bytestring in child classes
name = 'baseclass'
@classmethod
def detect(cls, data):
return data.startswith(cls.ID)
def __init__(self, **kwargs):
pass
def __init__(self, level=255, **kwargs):
assert 0 <= level <= 255
if self.ID is not None:
self.id_level = self.ID + bytes((level, )) # level 255 means "unknown level"
assert len(self.id_level) == 2
else:
self.id_level = None
def decide(self, data):
"""
@ -85,8 +90,8 @@ cdef class CompressorBase:
Compress *data* (bytes) and return bytes result. Prepend the ID bytes of this compressor,
which is needed so that the correct decompressor can be used for decompression.
"""
# add ID bytes
return self.ID + data
# add id_level bytes
return self.id_level + data
def decompress(self, data):
"""
@ -96,7 +101,7 @@ cdef class CompressorBase:
Only handles input generated by _this_ Compressor - for a general purpose
decompression method see *Compressor.decompress*.
"""
# strip ID bytes
# strip id_level bytes
return data[2:]
cdef class DecidingCompressor(CompressorBase):
@ -106,8 +111,8 @@ cdef class DecidingCompressor(CompressorBase):
"""
name = 'decidebaseclass'
def __init__(self, **kwargs):
super().__init__(**kwargs)
def __init__(self, level=255, **kwargs):
super().__init__(level=level, **kwargs)
def _decide(self, data):
"""
@ -148,9 +153,12 @@ class CNONE(CompressorBase):
"""
none - no compression, just pass through data
"""
ID = b'\x00\x00'
ID = b'\x00'
name = 'none'
def __init__(self, level=255, **kwargs):
super().__init__(level=level, **kwargs) # no defined levels for CNONE, so just say "unknown"
def compress(self, data):
return super().compress(data)
@ -170,11 +178,11 @@ class LZ4(DecidingCompressor):
- wrapper releases CPython's GIL to support multithreaded code
- uses safe lz4 methods that never go beyond the end of the output buffer
"""
ID = b'\x01\x00'
ID = b'\x01'
name = 'lz4'
def __init__(self, **kwargs):
pass
def __init__(self, level=255, **kwargs):
super().__init__(level=level, **kwargs) # no defined levels for LZ4, so just say "unknown"
def _decide(self, idata):
"""
@ -235,11 +243,11 @@ class LZMA(DecidingCompressor):
"""
lzma compression / decompression
"""
ID = b'\x02\x00'
ID = b'\x02'
name = 'lzma'
def __init__(self, level=6, **kwargs):
super().__init__(**kwargs)
super().__init__(level=level, **kwargs)
self.level = level
if lzma is None:
raise ValueError('No lzma support found.')
@ -270,11 +278,11 @@ class ZSTD(DecidingCompressor):
# This is a NOT THREAD SAFE implementation.
# Only ONE python context must be created at a time.
# It should work flawlessly as long as borg will call ONLY ONE compression job at time.
ID = b'\x03\x00'
ID = b'\x03'
name = 'zstd'
def __init__(self, level=3, **kwargs):
super().__init__(**kwargs)
super().__init__(level=level, **kwargs)
self.level = level
def _decide(self, idata):
@ -335,11 +343,11 @@ class ZLIB(DecidingCompressor):
"""
zlib compression / decompression (python stdlib)
"""
ID = b'\x05\x00'
ID = b'\x05'
name = 'zlib'
def __init__(self, level=6, **kwargs):
super().__init__(**kwargs)
super().__init__(level=level, **kwargs)
self.level = level
def _decide(self, data):
@ -373,8 +381,8 @@ class ZLIB_legacy(CompressorBase):
Newer borg uses the ZLIB class that has separate ID bytes (as all the other
compressors) and does not need this hack.
"""
ID = b'\x08\x00' # not used here, see detect()
# avoid all 0x.8.. IDs elsewhere!
ID = b'\x08' # not used here, see detect()
# avoid all 0x.8 IDs elsewhere!
name = 'zlib_legacy'
@classmethod
@ -386,7 +394,7 @@ class ZLIB_legacy(CompressorBase):
return check_ok and is_deflate
def __init__(self, level=6, **kwargs):
super().__init__(**kwargs)
super().__init__(level=level, **kwargs)
self.level = level
def compress(self, data):
@ -478,14 +486,14 @@ class ObfuscateSize(CompressorBase):
"""
Meta-Compressor that obfuscates the compressed data size.
"""
ID = b'\x04\x00'
ID = b'\x04'
name = 'obfuscate'
header_fmt = Struct('>I')
header_len = len(header_fmt.pack(0))
def __init__(self, level=None, compressor=None):
super().__init__()
super().__init__(level=level) # data will be encrypted, so we can tell the level
self.compressor = compressor
if level is None:
pass # decompression

View File

@ -256,8 +256,8 @@ class TestKey:
plaintext = b'123456789'
id = key.id_hash(plaintext)
authenticated = key.encrypt(id, plaintext)
# 0x07 is the key TYPE, \x0000 identifies no compression.
assert authenticated == b'\x07\x00\x00' + plaintext
# 0x07 is the key TYPE, \x00ff identifies no compression / unknown level.
assert authenticated == b'\x07\x00\xff' + plaintext
def test_blake2_authenticated_encrypt(self, monkeypatch):
monkeypatch.setenv('BORG_PASSPHRASE', 'test')
@ -267,8 +267,8 @@ class TestKey:
plaintext = b'123456789'
id = key.id_hash(plaintext)
authenticated = key.encrypt(id, plaintext)
# 0x06 is the key TYPE, 0x0000 identifies no compression.
assert authenticated == b'\x06\x00\x00' + plaintext
# 0x06 is the key TYPE, 0x00ff identifies no compression / unknown level.
assert authenticated == b'\x06\x00\xff' + plaintext
class TestTAM: