recreate: consider level for recompression, fixes #6698, fixes #3622

This commit is contained in:
Thomas Waldmann 2022-07-05 02:38:09 +02:00
parent 820a927b06
commit 0dc25000a9
4 changed files with 13 additions and 10 deletions

View File

@ -2080,8 +2080,9 @@ class ArchiveRecreater:
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it
old_chunk = self.key.decrypt(chunk_id, self.repository.get(chunk_id), decompress=False)
if Compressor.detect(old_chunk).name == self.key.compressor.decide(data).name:
# Stored chunk has the same compression we wanted
compressor_cls, level = Compressor.detect(old_chunk)
if compressor_cls.name == self.key.compressor.decide(data).name and level == self.key.compressor.level:
# Stored chunk has the same compression method and level as we wanted
overwrite = False
chunk_entry = self.cache.add_chunk(chunk_id, data, target.stats, overwrite=overwrite, wait=False)
self.cache.repository.async_response(wait=False)

View File

@ -4809,9 +4809,8 @@ class Archiver:
help='recompress data chunks according to `MODE` and ``--compression``. '
'Possible modes are '
'`if-different`: recompress if current compression is with a different '
'compression algorithm (the level is not considered); '
'`always`: recompress even if current compression is with the same '
'compression algorithm (use this to change the compression level); and '
'compression algorithm or different level; '
'`always`: recompress unconditionally; and '
'`never`: do not recompress (use this option to explicitly prevent '
'recompression). '
'If no MODE is given, `if-different` will be used. '

View File

@ -66,6 +66,7 @@ cdef class CompressorBase:
def __init__(self, level=255, **kwargs):
assert 0 <= level <= 255
self.level = level
if self.ID is not None:
self.id_level = self.ID + bytes((level, )) # level 255 means "unknown level"
assert len(self.id_level) == 2
@ -539,7 +540,8 @@ class ObfuscateSize(CompressorBase):
compr_size = self.header_fmt.unpack(obfuscated_data[0:self.header_len])[0]
compressed_data = obfuscated_data[self.header_len:self.header_len+compr_size]
if self.compressor is None:
self.compressor = Compressor.detect(compressed_data)()
compressor_cls = Compressor.detect(compressed_data)[0]
self.compressor = compressor_cls()
return self.compressor.decompress(compressed_data) # decompress data
@ -578,15 +580,16 @@ class Compressor:
return self.compressor.compress(data)
def decompress(self, data):
compressor_cls = self.detect(data)
compressor_cls = self.detect(data)[0]
return compressor_cls(**self.params).decompress(data)
@staticmethod
def detect(data):
hdr = bytes(data[:2]) # detect() does not work with memoryview
level = hdr[1] # usually the level, but not for zlib_legacy
for cls in COMPRESSOR_LIST:
if cls.detect(hdr):
return cls
return cls, (255 if cls.name == 'zlib_legacy' else level)
else:
raise ValueError('No decompressor for this data found: %r.', data[:2])

View File

@ -133,11 +133,11 @@ def test_auto():
compressed_lz4 = compressor_lz4.compress(data)
compressed_zlib = compressor_zlib.compress(data)
ratio = len(compressed_zlib) / len(compressed_lz4)
assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4
assert Compressor.detect(compressed_auto_zlib)[0] == ZLIB if ratio < 0.99 else LZ4
data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~'
compressed = compressor_auto_zlib.compress(data)
assert Compressor.detect(compressed) == CNONE
assert Compressor.detect(compressed)[0] == CNONE
def test_obfuscate():