tests for borg transfer --upgrader=From12To20

This commit is contained in:
Thomas Waldmann 2022-11-28 14:28:22 +01:00
parent 87e597011a
commit 6a82d01b35
No known key found for this signature in database
GPG Key ID: 243ACFA951F78E01
3 changed files with 324 additions and 0 deletions

View File

@ -0,0 +1,76 @@
# this scripts uses borg 1.2 to generate test data for "borg transfer --upgrader=From12To20"
BORG=./borg-1.2.2
# on macOS, gnu tar is available as gtar
TAR=gtar
SRC=/tmp/borgtest
ARCHIVE=`pwd`/src/borg/testsuite/archiver/repo12.tar.gz
export BORG_REPO=/tmp/repo12
META=$BORG_REPO/test_meta
export BORG_PASSPHRASE="waytooeasyonlyfortests"
export BORG_DELETE_I_KNOW_WHAT_I_AM_DOING=YES
$BORG init -e repokey 2> /dev/null
mkdir $META
# archive1
mkdir $SRC
pushd $SRC >/dev/null
mkdir directory
echo "content" > directory/no_hardlink
echo "hardlink content" > hardlink1
ln hardlink1 hardlink2
echo "symlinked content" > target
ln -s target symlink
ln -s doesnotexist broken_symlink
mkfifo fifo
touch without_xattrs
touch with_xattrs
xattr -w key1 value with_xattrs
xattr -w key2 "" with_xattrs
touch without_flags
touch with_flags
chflags nodump with_flags
popd >/dev/null
$BORG create ::archive1 $SRC
$BORG list ::archive1 --json-lines > $META/archive1_list.json
rm -rf $SRC
# archive2
mkdir $SRC
pushd $SRC >/dev/null
sudo mkdir root_stuff
sudo mknod root_stuff/bdev_12_34 b 12 34
sudo mknod root_stuff/cdev_34_56 c 34 56
sudo touch root_stuff/strange_uid_gid # no user name, no group name for this uid/gid!
sudo chown 54321:54321 root_stuff/strange_uid_gid
popd >/dev/null
$BORG create ::archive2 $SRC
$BORG list ::archive2 --json-lines > $META/archive2_list.json
sudo rm -rf $SRC/root_stuff
rm -rf $SRC
$BORG --version > $META/borg_version.txt
$BORG list :: --json > $META/repo_list.json
pushd $BORG_REPO >/dev/null
$TAR czf $ARCHIVE .
popd >/dev/null
$BORG delete :: 2> /dev/null

Binary file not shown.

View File

@ -1,6 +1,12 @@
import json
import os
import stat
import tarfile
from datetime import timezone
import unittest
from ...constants import * # NOQA
from ...helpers.time import parse_timestamp
from . import ArchiverTestCaseBase, RemoteArchiverTestCaseBase, ArchiverTestCaseBinaryBase, RK_ENCRYPTION, BORG_EXES
@ -31,11 +37,253 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd(repo2, "transfer", other_repo1, "--dry-run")
check_repo(repo2)
def test_transfer_upgrade(self):
# test upgrading a borg 1.2 repo to borg 2
# testing using json is a bit problematic because parseformat (used for json dumping)
# already tweaks the values a bit for better printability (like e.g. using the empty
# string for attributes that are not present).
# borg 1.2 repo dir contents, created by: scripts/make-testdata/test_transfer_upgrade.sh
repo12_tar = os.path.join(os.path.dirname(__file__), "repo12.tar.gz")
repo12_tzoffset = "+01:00" # timezone used to create the repo/archives/json dumps inside the tar file
def local_to_utc(local_naive, tzoffset, tzinfo):
# local_naive was meant to be in tzoffset timezone (e.g. "+01:00"),
# but we want it non-naive in tzinfo time zone (e.g. timezone.utc).
ts = parse_timestamp(local_naive + tzoffset)
return ts.astimezone(tzinfo).isoformat(timespec="microseconds")
dst_dir = f"{self.repository_location}1"
os.makedirs(dst_dir)
with tarfile.open(repo12_tar) as tf:
tf.extractall(dst_dir)
other_repo1 = f"--other-repo={self.repository_location}1"
repo2 = f"--repo={self.repository_location}2"
assert os.environ.get("BORG_PASSPHRASE") == "waytooeasyonlyfortests"
os.environ["BORG_TESTONLY_WEAKEN_KDF"] = "0" # must use the strong kdf here or it can't decrypt the key
self.cmd(repo2, "rcreate", RK_ENCRYPTION, other_repo1)
self.cmd(repo2, "transfer", other_repo1, "--upgrader=From12To20")
self.cmd(repo2, "check")
# check list of archives / manifest
rlist_json = self.cmd(repo2, "rlist", "--json")
got = json.loads(rlist_json)
with open(os.path.join(dst_dir, "test_meta", "repo_list.json")) as f:
expected = json.load(f)
for key in "encryption", "repository":
# some stuff obviously needs to be different, remove that!
del got[key]
del expected[key]
assert len(got["archives"]) == len(expected["archives"])
for got_archive, expected_archive in zip(got["archives"], expected["archives"]):
del got_archive["id"]
del expected_archive["id"]
# timestamps:
# borg 1.2 transformed to local time and had microseconds = 0, no tzoffset
# borg 2 uses an utc timestamp, with microseconds and with tzoffset
for key in "start", "time":
# fix expectation: local time meant +01:00, so we convert that to utc +00:00
expected_archive[key] = local_to_utc(expected_archive[key], repo12_tzoffset, timezone.utc)
# set microseconds to 0, so we can compare got with expected.
got_ts = parse_timestamp(got_archive[key])
got_archive[key] = got_ts.replace(microsecond=0).isoformat(timespec="microseconds")
assert got == expected
for archive in got["archives"]:
name = archive["name"]
# check archive contents
list_json = self.cmd(repo2, "list", "--json-lines", name)
got = [json.loads(line) for line in list_json.splitlines()]
with open(os.path.join(dst_dir, "test_meta", f"{name}_list.json")) as f:
lines = f.read()
expected = [json.loads(line) for line in lines.splitlines()]
hardlinks = {}
for g, e in zip(got, expected):
print(f"exp: {e}\ngot: {g}\n")
# borg 1.2 parseformat uses .get("bsdflags", 0) so the json has 0 even
# if there were no bsdflags stored in the item.
# borg 2 parseformat uses .get("bsdflags"), so the json has either an int
# (if the archived item has bsdflags) or None (if the item has no bsdflags).
if e["flags"] == 0 and g["flags"] is None:
# this is expected behaviour, fix the expectation
e["flags"] = None
# borg2 parseformat falls back to str(item.uid) if it does not have item.user,
# same for str(item.gid) and no item.group.
# so user/group are always str type, even if it is just str(uid) or str(gid).
# fix expectation (borg1 used int type for user/group in that case):
if g["user"] == str(g["uid"]) == str(e["uid"]):
e["user"] = str(e["uid"])
if g["group"] == str(g["gid"]) == str(e["gid"]):
e["group"] = str(e["gid"])
for key in "mtime", "ctime", "atime":
if key in e:
e[key] = local_to_utc(e[key], repo12_tzoffset, timezone.utc)
# borg 1 used hardlink slaves linking back to their hardlink masters.
# borg 2 uses symmetric approach: just normal items. if they are hardlinks,
# each item has normal attributes, including the chunks list, size. additionally,
# they have a hlid and same hlid means same inode / belonging to same set of hardlinks.
hardlink = bool(g.get("hlid")) # note: json has "" as hlid if there is no hlid in the item
if hardlink:
hardlinks[g["path"]] = g["hlid"]
if e["mode"].startswith("h"):
# fix expectations: borg1 signalled a hardlink slave with "h"
# borg2 treats all hardlinks symmetrically as normal files
e["mode"] = g["mode"][0] + e["mode"][1:]
# borg1 used source/linktarget to link back to hardlink master
assert e["source"] != ""
assert e["linktarget"] != ""
# fix expectations: borg2 does not use source/linktarget any more for hardlinks
e["source"] = ""
e["linktarget"] = ""
# borg 1 has size == 0 for hardlink slaves, borg 2 has the real file size
assert e["size"] == 0
assert g["size"] >= 0
# fix expectation for size
e["size"] = g["size"]
# Note: size == 0 for all items without a size or chunks list (like e.g. directories)
# Note: healthy == True indicates the *absence* of the additional chunks_healthy list
del g["hlid"]
assert g == e
if name == "archive1":
# hardlinks referring to same inode have same hlid
assert hardlinks["tmp/borgtest/hardlink1"] == hardlinks["tmp/borgtest/hardlink2"]
self.repository_path = f"{self.repository_location}2"
for archive_name in ("archive1", "archive2"):
archive, repository = self.open_archive(archive_name)
with repository:
for item in archive.iter_items():
print(item)
# borg1 used to store some stuff with None values
# borg2 does just not have the key if the value is not known.
item_dict = item.as_dict()
assert not any(value is None for value in item_dict.values()), f"found None value in {item_dict}"
# with borg2, all items with chunks must have a precomputed size
assert "chunks" not in item or "size" in item and item.size >= 0
if item.path.endswith("directory") or item.path.endswith("borgtest"):
assert stat.S_ISDIR(item.mode)
assert item.uid > 0
assert "hlid" not in item
elif item.path.endswith("no_hardlink") or item.path.endswith("target"):
assert stat.S_ISREG(item.mode)
assert item.uid > 0
assert "hlid" not in item
assert len(item.chunks) > 0
assert "bsdflags" not in item
elif item.path.endswith("hardlink1"):
assert stat.S_ISREG(item.mode)
assert item.uid > 0
assert "hlid" in item and len(item.hlid) == 32 # 256bit
hlid1 = item.hlid
assert len(item.chunks) > 0
chunks1 = item.chunks
size1 = item.size
assert "source" not in item
assert "hardlink_master" not in item
elif item.path.endswith("hardlink2"):
assert stat.S_ISREG(item.mode)
assert item.uid > 0
assert "hlid" in item and len(item.hlid) == 32 # 256bit
hlid2 = item.hlid
assert len(item.chunks) > 0
chunks2 = item.chunks
size2 = item.size
assert "source" not in item
assert "hardlink_master" not in item
elif item.path.endswith("broken_symlink"):
assert stat.S_ISLNK(item.mode)
assert item.source == "doesnotexist"
assert item.uid > 0
assert "hlid" not in item
elif item.path.endswith("symlink"):
assert stat.S_ISLNK(item.mode)
assert item.source == "target"
assert item.uid > 0
assert "hlid" not in item
elif item.path.endswith("fifo"):
assert stat.S_ISFIFO(item.mode)
assert item.uid > 0
assert "hlid" not in item
elif item.path.endswith("without_xattrs"):
assert stat.S_ISREG(item.mode)
assert "xattrs" not in item
elif item.path.endswith("with_xattrs"):
assert stat.S_ISREG(item.mode)
assert "xattrs" in item
assert len(item.xattrs) == 2
assert item.xattrs[b"key1"] == b"value"
assert item.xattrs[b"key2"] == b""
elif item.path.endswith("without_flags"):
assert stat.S_ISREG(item.mode)
# borg1 did not store a flags value of 0 ("nothing special")
# borg2 reflects this "i do not know" by not having the k/v pair
assert "bsdflags" not in item
elif item.path.endswith("with_flags"):
assert stat.S_ISREG(item.mode)
assert "bsdflags" in item
assert item.bsdflags == stat.UF_NODUMP
elif item.path.endswith("root_stuff"):
assert stat.S_ISDIR(item.mode)
assert item.uid == 0
assert item.gid == 0
assert "hlid" not in item
elif item.path.endswith("cdev_34_56"):
assert stat.S_ISCHR(item.mode)
# looks like we can't use os.major/minor with data coming from another platform,
# thus we only do a rather rough check here:
assert "rdev" in item and item.rdev != 0
assert item.uid == 0
assert item.gid == 0
assert item.user == "root"
assert item.group in ("root", "wheel")
assert "hlid" not in item
elif item.path.endswith("bdev_12_34"):
assert stat.S_ISBLK(item.mode)
# looks like we can't use os.major/minor with data coming from another platform,
# thus we only do a rather rough check here:
assert "rdev" in item and item.rdev != 0
assert item.uid == 0
assert item.gid == 0
assert item.user == "root"
assert item.group in ("root", "wheel")
assert "hlid" not in item
elif item.path.endswith("strange_uid_gid"):
assert stat.S_ISREG(item.mode)
assert item.uid == 54321
assert item.gid == 54321
assert "user" not in item
assert "group" not in item
else:
raise NotImplementedError(f"test missing for {item.path}")
if archive_name == "archive1":
assert hlid1 == hlid2
assert size1 == size2 == 16 + 1 # 16 text chars + \n
assert chunks1 == chunks2
class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):
"""run the same tests, but with a remote repository"""
@unittest.skip("only works locally")
def test_transfer_upgrade(self):
pass
@unittest.skipUnless("binary" in BORG_EXES, "no borg.exe available")
class ArchiverTestCaseBinary(ArchiverTestCaseBinaryBase, ArchiverTestCase):
"""runs the same tests, but via the borg binary"""
@unittest.skip("only works locally")
def test_transfer_upgrade(self):
pass