add a test for borg 1 -> 2 repo objects transformation

This commit is contained in:
Thomas Waldmann 2022-09-04 22:34:58 +02:00
parent 754c583799
commit b6cbf045ff
4 changed files with 49 additions and 10 deletions

View File

@ -2269,8 +2269,10 @@ class ArchiveRecreater:
overwrite = self.recompress overwrite = self.recompress
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks: if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it # Check if this chunk is already compressed the way we want it
_, old_chunk = self.repo_objs.parse(chunk_id, self.repository.get(chunk_id), decompress=False) old_meta, old_data = self.repo_objs.parse(chunk_id, self.repository.get(chunk_id), decompress=False)
compressor_cls, level = Compressor.detect(old_chunk) # TODO simplify code below
compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
compressor_cls, level = Compressor.detect(compr_hdr)
if ( if (
compressor_cls.name == self.repo_objs.compressor.decide(data).name compressor_cls.name == self.repo_objs.compressor.decide(data).name
and level == self.repo_objs.compressor.level and level == self.repo_objs.compressor.level

View File

@ -44,11 +44,12 @@ class RepoObj:
data_compressed = self.compressor.compress(data) # TODO: compressor also adds compressor type/level bytes data_compressed = self.compressor.compress(data) # TODO: compressor also adds compressor type/level bytes
ctype = data_compressed[0] ctype = data_compressed[0]
clevel = data_compressed[1] clevel = data_compressed[1]
data_compressed = data_compressed[2:] # strip the type/level bytes
else: else:
assert isinstance(size, int) assert isinstance(size, int)
assert isinstance(ctype, int) assert isinstance(ctype, int)
assert isinstance(clevel, int) assert isinstance(clevel, int)
data_compressed = data # is already compressed data_compressed = data # is already compressed, is NOT prefixed by type/level bytes
meta["size"] = size meta["size"] = size
meta["csize"] = len(data_compressed) meta["csize"] = len(data_compressed)
meta["ctype"] = ctype meta["ctype"] = ctype
@ -94,10 +95,10 @@ class RepoObj:
compr_hdr = bytes((ctype, clevel)) compr_hdr = bytes((ctype, clevel))
compressor_cls, compression_level = Compressor.detect(compr_hdr) compressor_cls, compression_level = Compressor.detect(compr_hdr)
compressor = compressor_cls(level=compression_level) compressor = compressor_cls(level=compression_level)
data = compressor.decompress(data_compressed) # TODO: decompressor still needs type/level bytes data = compressor.decompress(compr_hdr + data_compressed) # TODO: decompressor still needs type/level bytes
self.key.assert_id(id, data) self.key.assert_id(id, data)
else: else:
data = data_compressed data = data_compressed # does not include the type/level bytes
return meta, data return meta, data
@ -125,7 +126,7 @@ class RepoObj1: # legacy
data_compressed = self.compressor.compress(data) # TODO: compressor also adds compressor type/level bytes data_compressed = self.compressor.compress(data) # TODO: compressor also adds compressor type/level bytes
else: else:
assert isinstance(size, int) assert isinstance(size, int)
data_compressed = data # is already compressed data_compressed = data # is already compressed, must include type/level bytes
data_encrypted = self.key.encrypt(id, data_compressed) data_encrypted = self.key.encrypt(id, data_compressed)
return data_encrypted return data_encrypted

View File

@ -191,7 +191,7 @@ class TestRepositoryCache:
list(decrypted_cache.get_many([H1, H2, H3])) list(decrypted_cache.get_many([H1, H2, H3]))
iterator = decrypted_cache.get_many([H1, H2, H3]) iterator = decrypted_cache.get_many([H1, H2, H3])
assert next(iterator) == (6, b"1234") assert next(iterator) == (4, b"1234")
with open(decrypted_cache.key_filename(H2), "a+b") as fd: with open(decrypted_cache.key_filename(H2), "a+b") as fd:
fd.seek(-1, io.SEEK_END) fd.seek(-1, io.SEEK_END)
@ -201,4 +201,4 @@ class TestRepositoryCache:
fd.truncate() fd.truncate()
with pytest.raises(IntegrityError): with pytest.raises(IntegrityError):
assert next(iterator) == (26, b"5678") assert next(iterator) == (4, b"5678")

View File

@ -3,6 +3,7 @@ import pytest
from ..crypto.key import PlaintextKey from ..crypto.key import PlaintextKey
from ..repository import Repository from ..repository import Repository
from ..repoobj import RepoObj, RepoObj1 from ..repoobj import RepoObj, RepoObj1
from ..compress import LZ4
@pytest.fixture @pytest.fixture
@ -34,9 +35,8 @@ def test_format_parse_roundtrip(key):
assert data == got_data assert data == got_data
edata = repo_objs.extract_crypted_data(cdata) edata = repo_objs.extract_crypted_data(cdata)
compressor = repo_objs.compressor
key = repo_objs.key key = repo_objs.key
assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level))) assert edata.startswith(bytes((key.TYPE,)))
def test_format_parse_roundtrip_borg1(key): # legacy def test_format_parse_roundtrip_borg1(key): # legacy
@ -57,3 +57,39 @@ def test_format_parse_roundtrip_borg1(key): # legacy
compressor = repo_objs.compressor compressor = repo_objs.compressor
key = repo_objs.key key = repo_objs.key
assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level))) assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level)))
def test_borg1_borg2_transition(key):
# borg transfer reads borg 1.x repo objects (without decompressing them),
# writes borg 2 repo objects (giving already compressed data to avoid compression).
meta = {} # borg1 does not support this kind of metadata
data = b"foobar" * 10
len_data = len(data)
repo_objs1 = RepoObj1(key)
id = repo_objs1.id_hash(data)
borg1_cdata = repo_objs1.format(id, meta, data)
meta1, compr_data1 = repo_objs1.parse(id, borg1_cdata, decompress=False) # borg transfer avoids (de)compression
# in borg 1, we can only get this metadata after decrypting the whole chunk (and we do not have "size" here):
assert meta1["ctype"] == LZ4.ID[0] # default compression
assert meta1["clevel"] == 0xFF # lz4 does not know levels (yet?)
assert meta1["csize"] < len_data # lz4 should make it smaller
repo_objs2 = RepoObj(key)
# note: as we did not decompress, we do not have "size" and we need to get it from somewhere else.
# here, we just use len_data. for borg transfer, we also know the size from another metadata source.
borg2_cdata = repo_objs2.format(
id, meta1, compr_data1[2:], compress=False, size=len_data, ctype=meta1["ctype"], clevel=meta1["clevel"]
)
meta2, data2 = repo_objs2.parse(id, borg2_cdata)
assert data2 == data
assert meta2["ctype"] == LZ4.ID[0]
assert meta2["clevel"] == 0xFF
assert meta2["csize"] == meta1["csize"] - 2 # borg2 does not store the type/level bytes there
assert meta2["size"] == len_data
meta2 = repo_objs2.parse_meta(id, borg2_cdata)
# now, in borg 2, we have nice and separately decrypted metadata (no need to decrypt the whole chunk):
assert meta2["ctype"] == LZ4.ID[0]
assert meta2["clevel"] == 0xFF
assert meta2["csize"] == meta1["csize"] - 2 # borg2 does not store the type/level bytes there
assert meta2["size"] == len_data