diff --git a/src/borg/archive.py b/src/borg/archive.py index 490128f7d..d377871bc 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -2269,8 +2269,10 @@ class ArchiveRecreater: overwrite = self.recompress if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks: # Check if this chunk is already compressed the way we want it - _, old_chunk = self.repo_objs.parse(chunk_id, self.repository.get(chunk_id), decompress=False) - compressor_cls, level = Compressor.detect(old_chunk) + old_meta, old_data = self.repo_objs.parse(chunk_id, self.repository.get(chunk_id), decompress=False) + # TODO simplify code below + compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"])) + compressor_cls, level = Compressor.detect(compr_hdr) if ( compressor_cls.name == self.repo_objs.compressor.decide(data).name and level == self.repo_objs.compressor.level diff --git a/src/borg/repoobj.py b/src/borg/repoobj.py index 3979fa6a9..3f49c11c1 100644 --- a/src/borg/repoobj.py +++ b/src/borg/repoobj.py @@ -44,11 +44,12 @@ class RepoObj: data_compressed = self.compressor.compress(data) # TODO: compressor also adds compressor type/level bytes ctype = data_compressed[0] clevel = data_compressed[1] + data_compressed = data_compressed[2:] # strip the type/level bytes else: assert isinstance(size, int) assert isinstance(ctype, int) assert isinstance(clevel, int) - data_compressed = data # is already compressed + data_compressed = data # is already compressed, is NOT prefixed by type/level bytes meta["size"] = size meta["csize"] = len(data_compressed) meta["ctype"] = ctype @@ -94,10 +95,10 @@ class RepoObj: compr_hdr = bytes((ctype, clevel)) compressor_cls, compression_level = Compressor.detect(compr_hdr) compressor = compressor_cls(level=compression_level) - data = compressor.decompress(data_compressed) # TODO: decompressor still needs type/level bytes + data = compressor.decompress(compr_hdr + data_compressed) # TODO: decompressor still needs type/level bytes self.key.assert_id(id, data) else: - data = data_compressed + data = data_compressed # does not include the type/level bytes return meta, data @@ -125,7 +126,7 @@ class RepoObj1: # legacy data_compressed = self.compressor.compress(data) # TODO: compressor also adds compressor type/level bytes else: assert isinstance(size, int) - data_compressed = data # is already compressed + data_compressed = data # is already compressed, must include type/level bytes data_encrypted = self.key.encrypt(id, data_compressed) return data_encrypted diff --git a/src/borg/testsuite/remote.py b/src/borg/testsuite/remote.py index 95375dc09..35f2b6df3 100644 --- a/src/borg/testsuite/remote.py +++ b/src/borg/testsuite/remote.py @@ -191,7 +191,7 @@ class TestRepositoryCache: list(decrypted_cache.get_many([H1, H2, H3])) iterator = decrypted_cache.get_many([H1, H2, H3]) - assert next(iterator) == (6, b"1234") + assert next(iterator) == (4, b"1234") with open(decrypted_cache.key_filename(H2), "a+b") as fd: fd.seek(-1, io.SEEK_END) @@ -201,4 +201,4 @@ class TestRepositoryCache: fd.truncate() with pytest.raises(IntegrityError): - assert next(iterator) == (26, b"5678") + assert next(iterator) == (4, b"5678") diff --git a/src/borg/testsuite/repoobj.py b/src/borg/testsuite/repoobj.py index b48876c2d..b7b452bac 100644 --- a/src/borg/testsuite/repoobj.py +++ b/src/borg/testsuite/repoobj.py @@ -3,6 +3,7 @@ import pytest from ..crypto.key import PlaintextKey from ..repository import Repository from ..repoobj import RepoObj, RepoObj1 +from ..compress import LZ4 @pytest.fixture @@ -34,9 +35,8 @@ def test_format_parse_roundtrip(key): assert data == got_data edata = repo_objs.extract_crypted_data(cdata) - compressor = repo_objs.compressor key = repo_objs.key - assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level))) + assert edata.startswith(bytes((key.TYPE,))) def test_format_parse_roundtrip_borg1(key): # legacy @@ -57,3 +57,39 @@ def test_format_parse_roundtrip_borg1(key): # legacy compressor = repo_objs.compressor key = repo_objs.key assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level))) + + +def test_borg1_borg2_transition(key): + # borg transfer reads borg 1.x repo objects (without decompressing them), + # writes borg 2 repo objects (giving already compressed data to avoid compression). + meta = {} # borg1 does not support this kind of metadata + data = b"foobar" * 10 + len_data = len(data) + repo_objs1 = RepoObj1(key) + id = repo_objs1.id_hash(data) + borg1_cdata = repo_objs1.format(id, meta, data) + meta1, compr_data1 = repo_objs1.parse(id, borg1_cdata, decompress=False) # borg transfer avoids (de)compression + # in borg 1, we can only get this metadata after decrypting the whole chunk (and we do not have "size" here): + assert meta1["ctype"] == LZ4.ID[0] # default compression + assert meta1["clevel"] == 0xFF # lz4 does not know levels (yet?) + assert meta1["csize"] < len_data # lz4 should make it smaller + + repo_objs2 = RepoObj(key) + # note: as we did not decompress, we do not have "size" and we need to get it from somewhere else. + # here, we just use len_data. for borg transfer, we also know the size from another metadata source. + borg2_cdata = repo_objs2.format( + id, meta1, compr_data1[2:], compress=False, size=len_data, ctype=meta1["ctype"], clevel=meta1["clevel"] + ) + meta2, data2 = repo_objs2.parse(id, borg2_cdata) + assert data2 == data + assert meta2["ctype"] == LZ4.ID[0] + assert meta2["clevel"] == 0xFF + assert meta2["csize"] == meta1["csize"] - 2 # borg2 does not store the type/level bytes there + assert meta2["size"] == len_data + + meta2 = repo_objs2.parse_meta(id, borg2_cdata) + # now, in borg 2, we have nice and separately decrypted metadata (no need to decrypt the whole chunk): + assert meta2["ctype"] == LZ4.ID[0] + assert meta2["clevel"] == 0xFF + assert meta2["csize"] == meta1["csize"] - 2 # borg2 does not store the type/level bytes there + assert meta2["size"] == len_data