repoobj: add a layer to format/parse repo objects

borg < 2:

obj = encrypted(compressed(data))

borg 2:

obj = enc_meta_len32 + encrypted(msgpacked(meta)) + encrypted(compressed(data))

handle compr / decompr in repoobj

move the assert_id call from decrypt to RepoObj.parse

also:
- for AEADKeyBase, add a dummy assert_id (not needed here)
- only test assert_id for other if not AEADKeyBase instance
- remove test_getting_wrong_chunk. assert_id is called elsewhere
  and is not needed any more anyway with the new AEAD crypto.
- only give manifest (includes key, repo, repo_objs)
- only return manifest from Manifest.load (includes key, repo, repo_objs)
This commit is contained in:
Thomas Waldmann 2022-08-23 03:25:06 +02:00
parent 6c6f10df1e
commit fa986a9f19
35 changed files with 481 additions and 366 deletions

View File

@ -48,6 +48,7 @@ from .item import Item, ArchiveItem, ItemDiff
from .platform import acl_get, acl_set, set_flags, get_flags, swidth, hostname
from .remote import cache_if_remote
from .repository import Repository, LIST_SCAN_LIMIT
from .repoobj import RepoObj
has_link = hasattr(os, "link")
@ -262,9 +263,9 @@ def OsOpen(*, flags, path=None, parent_fd=None, name=None, noatime=False, op="op
class DownloadPipeline:
def __init__(self, repository, key):
def __init__(self, repository, repo_objs):
self.repository = repository
self.key = key
self.repo_objs = repo_objs
def unpack_many(self, ids, *, filter=None, preload=False):
"""
@ -308,8 +309,9 @@ class DownloadPipeline:
yield item
def fetch_many(self, ids, is_preloaded=False):
for id_, data in zip(ids, self.repository.get_many(ids, is_preloaded=is_preloaded)):
yield self.key.decrypt(id_, data)
for id_, cdata in zip(ids, self.repository.get_many(ids, is_preloaded=is_preloaded)):
_, data = self.repo_objs.parse(id_, cdata)
yield data
class ChunkBuffer:
@ -391,12 +393,12 @@ def get_item_uid_gid(item, *, numeric, uid_forced=None, gid_forced=None, uid_def
return uid, gid
def archive_get_items(metadata, key, repository):
def archive_get_items(metadata, *, repo_objs, repository):
if "item_ptrs" in metadata: # looks like a v2+ archive
assert "items" not in metadata
items = []
for id, data in zip(metadata.item_ptrs, repository.get_many(metadata.item_ptrs)):
data = key.decrypt(id, data)
for id, cdata in zip(metadata.item_ptrs, repository.get_many(metadata.item_ptrs)):
_, data = repo_objs.parse(id, cdata)
ids = msgpack.unpackb(data)
items.extend(ids)
return items
@ -406,16 +408,16 @@ def archive_get_items(metadata, key, repository):
return metadata.items
def archive_put_items(chunk_ids, *, key, cache=None, stats=None, add_reference=None):
def archive_put_items(chunk_ids, *, repo_objs, cache=None, stats=None, add_reference=None):
"""gets a (potentially large) list of archive metadata stream chunk ids and writes them to repo objects"""
item_ptrs = []
for i in range(0, len(chunk_ids), IDS_PER_CHUNK):
data = msgpack.packb(chunk_ids[i : i + IDS_PER_CHUNK])
id = key.id_hash(data)
id = repo_objs.id_hash(data)
if cache is not None and stats is not None:
cache.add_chunk(id, data, stats)
elif add_reference is not None:
cdata = key.encrypt(id, data)
cdata = repo_objs.format(id, {}, data)
add_reference(id, len(data), cdata)
else:
raise NotImplementedError
@ -435,8 +437,6 @@ class Archive:
def __init__(
self,
repository,
key,
manifest,
name,
cache=None,
@ -458,10 +458,12 @@ class Archive:
iec=False,
):
self.cwd = os.getcwd()
self.key = key
self.repository = repository
self.cache = cache
assert isinstance(manifest, Manifest)
self.manifest = manifest
self.key = manifest.repo_objs.key
self.repo_objs = manifest.repo_objs
self.repository = manifest.repository
self.cache = cache
self.stats = Statistics(output_json=log_json, iec=iec)
self.iec = iec
self.show_progress = progress
@ -488,7 +490,7 @@ class Archive:
end = datetime.now().astimezone() # local time with local timezone
self.end = end
self.consider_part_files = consider_part_files
self.pipeline = DownloadPipeline(self.repository, self.key)
self.pipeline = DownloadPipeline(self.repository, self.repo_objs)
self.create = create
if self.create:
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
@ -507,12 +509,13 @@ class Archive:
self.load(info.id)
def _load_meta(self, id):
data = self.key.decrypt(id, self.repository.get(id))
cdata = self.repository.get(id)
_, data = self.repo_objs.parse(id, cdata)
metadata = ArchiveItem(internal_dict=msgpack.unpackb(data))
if metadata.version not in (1, 2): # legacy: still need to read v1 archives
raise Exception("Unknown archive metadata version")
# note: metadata.items must not get written to disk!
metadata.items = archive_get_items(metadata, self.key, self.repository)
metadata.items = archive_get_items(metadata, repo_objs=self.repo_objs, repository=self.repository)
return metadata
def load(self, id):
@ -626,7 +629,9 @@ Duration: {0.duration}
if name in self.manifest.archives:
raise self.AlreadyExists(name)
self.items_buffer.flush(flush=True)
item_ptrs = archive_put_items(self.items_buffer.chunks, key=self.key, cache=self.cache, stats=self.stats)
item_ptrs = archive_put_items(
self.items_buffer.chunks, repo_objs=self.repo_objs, cache=self.cache, stats=self.stats
)
duration = timedelta(seconds=time.monotonic() - self.start_monotonic)
if timestamp is None:
end = datetime.now().astimezone() # local time with local timezone
@ -660,7 +665,7 @@ Duration: {0.duration}
metadata.update(additional_metadata or {})
metadata = ArchiveItem(metadata)
data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive")
self.id = self.key.id_hash(data)
self.id = self.repo_objs.id_hash(data)
try:
self.cache.add_chunk(self.id, data, self.stats)
except IntegrityError as err:
@ -699,7 +704,7 @@ Duration: {0.duration}
for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
pi.show(increase=1)
add(id)
data = self.key.decrypt(id, chunk)
_, data = self.repo_objs.parse(id, chunk)
sync.feed(data)
unique_size = archive_index.stats_against(cache.chunks)[1]
pi.finish()
@ -1011,7 +1016,7 @@ Duration: {0.duration}
for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
if progress:
pi.show(i)
data = self.key.decrypt(items_id, data)
_, data = self.repo_objs.parse(items_id, data)
unpacker.feed(data)
chunk_decref(items_id, stats)
try:
@ -1666,6 +1671,7 @@ class ArchiveChecker:
logger.error("Repository contains no apparent data at all, cannot continue check/repair.")
return False
self.key = self.make_key(repository)
self.repo_objs = RepoObj(self.key)
if verify_data:
self.verify_data()
if Manifest.MANIFEST_ID not in self.chunks:
@ -1674,7 +1680,7 @@ class ArchiveChecker:
self.manifest = self.rebuild_manifest()
else:
try:
self.manifest, _ = Manifest.load(repository, (Manifest.Operation.CHECK,), key=self.key)
self.manifest = Manifest.load(repository, (Manifest.Operation.CHECK,), key=self.key)
except IntegrityErrorBase as exc:
logger.error("Repository manifest is corrupted: %s", exc)
self.error_found = True
@ -1765,7 +1771,7 @@ class ArchiveChecker:
chunk_data_iter = self.repository.get_many(chunk_ids)
else:
try:
self.key.decrypt(chunk_id, encrypted_data, decompress=decompress)
self.repo_objs.parse(chunk_id, encrypted_data, decompress=decompress)
except IntegrityErrorBase as integrity_error:
self.error_found = True
errors += 1
@ -1796,7 +1802,7 @@ class ArchiveChecker:
# from the underlying media.
try:
encrypted_data = self.repository.get(defect_chunk)
self.key.decrypt(defect_chunk, encrypted_data, decompress=decompress)
self.repo_objs.parse(defect_chunk, encrypted_data, decompress=decompress)
except IntegrityErrorBase:
# failed twice -> get rid of this chunk
del self.chunks[defect_chunk]
@ -1844,7 +1850,7 @@ class ArchiveChecker:
pi.show()
cdata = self.repository.get(chunk_id)
try:
data = self.key.decrypt(chunk_id, cdata)
_, data = self.repo_objs.parse(chunk_id, cdata)
except IntegrityErrorBase as exc:
logger.error("Skipping corrupted chunk: %s", exc)
self.error_found = True
@ -1890,7 +1896,7 @@ class ArchiveChecker:
def add_callback(chunk):
id_ = self.key.id_hash(chunk)
cdata = self.key.encrypt(id_, chunk)
cdata = self.repo_objs.format(id_, {}, chunk)
add_reference(id_, len(chunk), cdata)
return id_
@ -1913,7 +1919,7 @@ class ArchiveChecker:
def replacement_chunk(size):
chunk = Chunk(None, allocation=CH_ALLOC, size=size)
chunk_id, data = cached_hash(chunk, self.key.id_hash)
cdata = self.key.encrypt(chunk_id, data)
cdata = self.repo_objs.format(chunk_id, {}, data)
return chunk_id, size, cdata
offset = 0
@ -2032,7 +2038,7 @@ class ArchiveChecker:
return True, ""
i = 0
archive_items = archive_get_items(archive, self.key, repository)
archive_items = archive_get_items(archive, repo_objs=self.repo_objs, repository=repository)
for state, items in groupby(archive_items, missing_chunk_detector):
items = list(items)
if state % 2:
@ -2044,7 +2050,7 @@ class ArchiveChecker:
unpacker.resync()
for chunk_id, cdata in zip(items, repository.get_many(items)):
try:
data = self.key.decrypt(chunk_id, cdata)
_, data = self.repo_objs.parse(chunk_id, cdata)
unpacker.feed(data)
for item in unpacker:
valid, reason = valid_item(item)
@ -2057,7 +2063,7 @@ class ArchiveChecker:
i,
)
except IntegrityError as integrity_error:
# key.decrypt() detected integrity issues.
# repo_objs.parse() detected integrity issues.
# maybe the repo gave us a valid cdata, but not for the chunk_id we wanted.
# or the authentication of cdata failed, meaning the encrypted data was corrupted.
report(str(integrity_error), chunk_id, i)
@ -2098,7 +2104,7 @@ class ArchiveChecker:
mark_as_possibly_superseded(archive_id)
cdata = self.repository.get(archive_id)
try:
data = self.key.decrypt(archive_id, cdata)
_, data = self.repo_objs.parse(archive_id, cdata)
except IntegrityError as integrity_error:
logger.error("Archive metadata block %s is corrupted: %s", bin_to_hex(archive_id), integrity_error)
self.error_found = True
@ -2114,14 +2120,18 @@ class ArchiveChecker:
verify_file_chunks(info.name, item)
items_buffer.add(item)
items_buffer.flush(flush=True)
for previous_item_id in archive_get_items(archive, self.key, self.repository):
for previous_item_id in archive_get_items(
archive, repo_objs=self.repo_objs, repository=self.repository
):
mark_as_possibly_superseded(previous_item_id)
for previous_item_ptr in archive.item_ptrs:
mark_as_possibly_superseded(previous_item_ptr)
archive.item_ptrs = archive_put_items(items_buffer.chunks, key=self.key, add_reference=add_reference)
archive.item_ptrs = archive_put_items(
items_buffer.chunks, repo_objs=self.repo_objs, add_reference=add_reference
)
data = msgpack.packb(archive.as_dict())
new_archive_id = self.key.id_hash(data)
cdata = self.key.encrypt(new_archive_id, data)
cdata = self.repo_objs.format(new_archive_id, {}, data)
add_reference(new_archive_id, len(data), cdata)
self.manifest.archives[info.name] = (new_archive_id, info.ts)
pi.finish()
@ -2162,9 +2172,7 @@ class ArchiveRecreater:
def __init__(
self,
repository,
manifest,
key,
cache,
matcher,
exclude_caches=False,
@ -2181,9 +2189,10 @@ class ArchiveRecreater:
timestamp=None,
checkpoint_interval=1800,
):
self.repository = repository
self.key = key
self.manifest = manifest
self.repository = manifest.repository
self.key = manifest.key
self.repo_objs = manifest.repo_objs
self.cache = cache
self.matcher = matcher
@ -2260,9 +2269,12 @@ class ArchiveRecreater:
overwrite = self.recompress
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it
old_chunk = self.key.decrypt(chunk_id, self.repository.get(chunk_id), decompress=False)
_, old_chunk = self.repo_objs.parse(chunk_id, self.repository.get(chunk_id), decompress=False)
compressor_cls, level = Compressor.detect(old_chunk)
if compressor_cls.name == self.key.compressor.decide(data).name and level == self.key.compressor.level:
if (
compressor_cls.name == self.repo_objs.compressor.decide(data).name
and level == self.repo_objs.compressor.level
):
# Stored chunk has the same compression method and level as we wanted
overwrite = False
chunk_entry = self.cache.add_chunk(chunk_id, data, target.stats, overwrite=overwrite, wait=False)
@ -2371,8 +2383,6 @@ class ArchiveRecreater:
def create_target_archive(self, name):
target = Archive(
self.repository,
self.key,
self.manifest,
name,
create=True,
@ -2384,4 +2394,4 @@ class ArchiveRecreater:
return target
def open_archive(self, name, **kwargs):
return Archive(self.repository, self.key, self.manifest, name, cache=self.cache, **kwargs)
return Archive(self.manifest, name, cache=self.cache, **kwargs)

View File

@ -14,6 +14,7 @@ from ..manifest import Manifest, AI_HUMAN_SORT_KEYS
from ..patterns import PatternMatcher
from ..remote import RemoteRepository
from ..repository import Repository
from ..repoobj import RepoObj, RepoObj1
from ..patterns import (
ArgparsePatternAction,
ArgparseExcludeFileAction,
@ -80,7 +81,7 @@ def with_repository(
:param create: create repository
:param lock: lock repository
:param exclusive: (bool) lock repository exclusively (for writing)
:param manifest: load manifest and key, pass them as keyword arguments
:param manifest: load manifest and repo_objs (key), pass them as keyword arguments
:param cache: open cache, pass it as keyword argument (implies manifest)
:param secure: do assert_secure after loading manifest
:param compatibility: mandatory if not create and (manifest or cache), specifies mandatory feature categories to check
@ -135,16 +136,16 @@ def with_repository(
"You can use 'borg transfer' to copy archives from old to new repos."
)
if manifest or cache:
kwargs["manifest"], kwargs["key"] = Manifest.load(repository, compatibility)
manifest_ = Manifest.load(repository, compatibility)
kwargs["manifest"] = manifest_
if "compression" in args:
kwargs["key"].compressor = args.compression.compressor
manifest_.repo_objs.compressor = args.compression.compressor
if secure:
assert_secure(repository, kwargs["manifest"], self.lock_wait)
assert_secure(repository, manifest_, self.lock_wait)
if cache:
with Cache(
repository,
kwargs["key"],
kwargs["manifest"],
manifest_,
progress=getattr(args, "progress", False),
lock_wait=self.lock_wait,
cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
@ -160,7 +161,7 @@ def with_repository(
return decorator
def with_other_repository(manifest=False, key=False, cache=False, compatibility=None):
def with_other_repository(manifest=False, cache=False, compatibility=None):
"""
this is a simplified version of "with_repository", just for the "other location".
@ -170,7 +171,7 @@ def with_other_repository(manifest=False, key=False, cache=False, compatibility=
compatibility = compat_check(
create=False,
manifest=manifest,
key=key,
key=manifest,
cache=cache,
compatibility=compatibility,
decorator_name="with_other_repository",
@ -199,17 +200,16 @@ def with_other_repository(manifest=False, key=False, cache=False, compatibility=
if repository.version not in (1, 2):
raise Error("This borg version only accepts version 1 or 2 repos for --other-repo.")
kwargs["other_repository"] = repository
if manifest or key or cache:
manifest_, key_ = Manifest.load(repository, compatibility)
if manifest or cache:
manifest_ = Manifest.load(
repository, compatibility, ro_cls=RepoObj if repository.version > 1 else RepoObj1
)
assert_secure(repository, manifest_, self.lock_wait)
if manifest:
kwargs["other_manifest"] = manifest_
if key:
kwargs["other_key"] = key_
if cache:
with Cache(
repository,
key_,
manifest_,
progress=False,
lock_wait=self.lock_wait,
@ -229,12 +229,10 @@ def with_other_repository(manifest=False, key=False, cache=False, compatibility=
def with_archive(method):
@functools.wraps(method)
def wrapper(self, args, repository, key, manifest, **kwargs):
def wrapper(self, args, repository, manifest, **kwargs):
archive_name = getattr(args, "name", None)
assert archive_name is not None
archive = Archive(
repository,
key,
manifest,
archive_name,
numeric_ids=getattr(args, "numeric_ids", False),
@ -246,7 +244,7 @@ def with_archive(method):
log_json=args.log_json,
iec=args.iec,
)
return method(self, args, repository=repository, manifest=manifest, key=key, archive=archive, **kwargs)
return method(self, args, repository=repository, manifest=manifest, archive=archive, **kwargs)
return wrapper

View File

@ -109,9 +109,9 @@ class ConfigMixIn:
name = args.name
if args.cache:
manifest, key = Manifest.load(repository, (Manifest.Operation.WRITE,))
manifest = Manifest.load(repository, (Manifest.Operation.WRITE,))
assert_secure(repository, manifest, self.lock_wait)
cache = Cache(repository, key, manifest, lock_wait=self.lock_wait)
cache = Cache(repository, manifest, lock_wait=self.lock_wait)
try:
if args.cache:

View File

@ -39,8 +39,9 @@ logger = create_logger()
class CreateMixIn:
@with_repository(exclusive=True, compatibility=(Manifest.Operation.WRITE,))
def do_create(self, args, repository, manifest=None, key=None):
def do_create(self, args, repository, manifest):
"""Create new archive"""
key = manifest.key
matcher = PatternMatcher(fallback=True)
matcher.add_inclexcl(args.patterns)
@ -210,7 +211,6 @@ class CreateMixIn:
if not dry_run:
with Cache(
repository,
key,
manifest,
progress=args.progress,
lock_wait=self.lock_wait,
@ -219,8 +219,6 @@ class CreateMixIn:
iec=args.iec,
) as cache:
archive = Archive(
repository,
key,
manifest,
args.name,
cache=cache,

View File

@ -16,6 +16,7 @@ from ..helpers import positive_int_validator, NameSpec
from ..manifest import Manifest
from ..platform import get_process_id
from ..repository import Repository, LIST_SCAN_LIMIT, TAG_PUT, TAG_DELETE, TAG_COMMIT
from ..repoobj import RepoObj
from ._common import with_repository
from ._common import process_epilog
@ -29,11 +30,12 @@ class DebugMixIn:
return EXIT_SUCCESS
@with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
def do_debug_dump_archive_items(self, args, repository, manifest, key):
def do_debug_dump_archive_items(self, args, repository, manifest):
"""dump (decrypted, decompressed) archive items metadata (not: data)"""
archive = Archive(repository, key, manifest, args.name, consider_part_files=args.consider_part_files)
repo_objs = manifest.repo_objs
archive = Archive(manifest, args.name, consider_part_files=args.consider_part_files)
for i, item_id in enumerate(archive.metadata.items):
data = key.decrypt(item_id, repository.get(item_id))
_, data = repo_objs.parse(item_id, repository.get(item_id))
filename = "%06d_%s.items" % (i, bin_to_hex(item_id))
print("Dumping", filename)
with open(filename, "wb") as fd:
@ -42,8 +44,9 @@ class DebugMixIn:
return EXIT_SUCCESS
@with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
def do_debug_dump_archive(self, args, repository, manifest, key):
def do_debug_dump_archive(self, args, repository, manifest):
"""dump decoded archive metadata (not: data)"""
repo_objs = manifest.repo_objs
try:
archive_meta_orig = manifest.archives.get_raw_dict()[args.name]
except KeyError:
@ -62,7 +65,7 @@ class DebugMixIn:
fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
fd.write(",\n")
data = key.decrypt(archive_meta_orig["id"], repository.get(archive_meta_orig["id"]))
_, data = repo_objs.parse(archive_meta_orig["id"], repository.get(archive_meta_orig["id"]))
archive_org_dict = msgpack.unpackb(data, object_hook=StableDict)
fd.write(' "_meta":\n')
@ -74,10 +77,10 @@ class DebugMixIn:
first = True
items = []
for chunk_id in archive_org_dict["item_ptrs"]:
data = key.decrypt(chunk_id, repository.get(chunk_id))
_, data = repo_objs.parse(chunk_id, repository.get(chunk_id))
items.extend(msgpack.unpackb(data))
for item_id in items:
data = key.decrypt(item_id, repository.get(item_id))
_, data = repo_objs.parse(item_id, repository.get(item_id))
unpacker.feed(data)
for item in unpacker:
item = prepare_dump_dict(item)
@ -95,10 +98,10 @@ class DebugMixIn:
return EXIT_SUCCESS
@with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
def do_debug_dump_manifest(self, args, repository, manifest, key):
def do_debug_dump_manifest(self, args, repository, manifest):
"""dump decoded repository manifest"""
data = key.decrypt(manifest.MANIFEST_ID, repository.get(manifest.MANIFEST_ID))
repo_objs = manifest.repo_objs
_, data = repo_objs.parse(manifest.MANIFEST_ID, repository.get(manifest.MANIFEST_ID))
meta = prepare_dump_dict(msgpack.unpackb(data, object_hook=StableDict))
@ -113,9 +116,9 @@ class DebugMixIn:
def decrypt_dump(i, id, cdata, tag=None, segment=None, offset=None):
if cdata is not None:
data = key.decrypt(id, cdata)
_, data = repo_objs.parse(id, cdata)
else:
data = b""
_, data = {}, b""
tag_str = "" if tag is None else "_" + tag
segment_str = "_" + str(segment) if segment is not None else ""
offset_str = "_" + str(offset) if offset is not None else ""
@ -132,6 +135,7 @@ class DebugMixIn:
for id, cdata, tag, segment, offset in repository.scan_low_level():
if tag == TAG_PUT:
key = key_factory(repository, cdata)
repo_objs = RepoObj(key)
break
i = 0
for id, cdata, tag, segment, offset in repository.scan_low_level(segment=args.segment, offset=args.offset):
@ -147,6 +151,7 @@ class DebugMixIn:
ids = repository.list(limit=1, marker=None)
cdata = repository.get(ids[0])
key = key_factory(repository, cdata)
repo_objs = RepoObj(key)
marker = None
i = 0
while True:
@ -195,6 +200,7 @@ class DebugMixIn:
ids = repository.list(limit=1, marker=None)
cdata = repository.get(ids[0])
key = key_factory(repository, cdata)
repo_objs = RepoObj(key)
marker = None
last_data = b""
@ -207,7 +213,7 @@ class DebugMixIn:
marker = result[-1]
for id in result:
cdata = repository.get(id)
data = key.decrypt(id, cdata)
_, data = repo_objs.parse(id, cdata)
# try to locate wanted sequence crossing the border of last_data and data
boundary_data = last_data[-(len(wanted) - 1) :] + data[: len(wanted) - 1]
@ -284,7 +290,7 @@ class DebugMixIn:
return EXIT_SUCCESS
@with_repository(manifest=False, exclusive=True, cache=True, compatibility=Manifest.NO_OPERATION_CHECK)
def do_debug_refcount_obj(self, args, repository, manifest, key, cache):
def do_debug_refcount_obj(self, args, repository, manifest, cache):
"""display refcounts for the objects with the given IDs"""
for hex_id in args.ids:
try:

View File

@ -19,7 +19,7 @@ class DeleteMixIn:
"""Delete archives"""
self.output_list = args.output_list
dry_run = args.dry_run
manifest, key = Manifest.load(repository, (Manifest.Operation.DELETE,))
manifest = Manifest.load(repository, (Manifest.Operation.DELETE,))
archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
if not archive_names:
return self.exit_code
@ -56,7 +56,7 @@ class DeleteMixIn:
return self.exit_code
stats = Statistics(iec=args.iec)
with Cache(repository, key, manifest, progress=args.progress, lock_wait=self.lock_wait, iec=args.iec) as cache:
with Cache(repository, manifest, progress=args.progress, lock_wait=self.lock_wait, iec=args.iec) as cache:
def checkpoint_func():
manifest.write()
@ -80,12 +80,7 @@ class DeleteMixIn:
if not dry_run:
archive = Archive(
repository,
key,
manifest,
archive_name,
cache=cache,
consider_part_files=args.consider_part_files,
manifest, archive_name, cache=cache, consider_part_files=args.consider_part_files
)
archive.delete(stats, progress=args.progress, forced=args.forced)
checkpointed = self.maybe_checkpoint(

View File

@ -15,7 +15,7 @@ logger = create_logger()
class DiffMixIn:
@with_repository(compatibility=(Manifest.Operation.READ,))
@with_archive
def do_diff(self, args, repository, manifest, key, archive):
def do_diff(self, args, repository, manifest, archive):
"""Diff contents of two archives"""
def print_json_output(diff, path):
@ -27,7 +27,7 @@ class DiffMixIn:
print_output = print_json_output if args.json_lines else print_text_output
archive1 = archive
archive2 = Archive(repository, key, manifest, args.other_name, consider_part_files=args.consider_part_files)
archive2 = Archive(manifest, args.other_name, consider_part_files=args.consider_part_files)
can_compare_chunk_ids = (
archive1.metadata.get("chunker_params", False) == archive2.metadata.get("chunker_params", True)

View File

@ -22,7 +22,7 @@ logger = create_logger()
class ExtractMixIn:
@with_repository(compatibility=(Manifest.Operation.READ,))
@with_archive
def do_extract(self, args, repository, manifest, key, archive):
def do_extract(self, args, repository, manifest, archive):
"""Extract archive contents"""
# be restrictive when restoring files, restore permissions later
if sys.getfilesystemencoding() == "ascii":

View File

@ -16,7 +16,7 @@ logger = create_logger()
class InfoMixIn:
@with_repository(cache=True, compatibility=(Manifest.Operation.READ,))
def do_info(self, args, repository, manifest, key, cache):
def do_info(self, args, repository, manifest, cache):
"""Show archive details such as disk space used"""
def format_cmdline(cmdline):
@ -29,13 +29,7 @@ class InfoMixIn:
for i, archive_name in enumerate(archive_names, 1):
archive = Archive(
repository,
key,
manifest,
archive_name,
cache=cache,
consider_part_files=args.consider_part_files,
iec=args.iec,
manifest, archive_name, cache=cache, consider_part_files=args.consider_part_files, iec=args.iec
)
info = archive.info()
if args.json:

View File

@ -17,8 +17,9 @@ logger = create_logger(__name__)
class KeysMixIn:
@with_repository(compatibility=(Manifest.Operation.CHECK,))
def do_change_passphrase(self, args, repository, manifest, key):
def do_change_passphrase(self, args, repository, manifest):
"""Change repository key file passphrase"""
key = manifest.key
if not hasattr(key, "change_passphrase"):
print("This repository is not encrypted, cannot change the passphrase.")
return EXIT_ERROR
@ -30,8 +31,9 @@ class KeysMixIn:
return EXIT_SUCCESS
@with_repository(exclusive=True, manifest=True, cache=True, compatibility=(Manifest.Operation.CHECK,))
def do_change_location(self, args, repository, manifest, key, cache):
def do_change_location(self, args, repository, manifest, cache):
"""Change repository key location"""
key = manifest.key
if not hasattr(key, "change_passphrase"):
print("This repository is not encrypted, cannot change the key location.")
return EXIT_ERROR
@ -71,6 +73,7 @@ class KeysMixIn:
# rewrite the manifest with the new key, so that the key-type byte of the manifest changes
manifest.key = key_new
manifest.repo_objs.key = key_new
manifest.write()
repository.commit(compact=False)

View File

@ -16,7 +16,7 @@ logger = create_logger()
class ListMixIn:
@with_repository(compatibility=(Manifest.Operation.READ,))
def do_list(self, args, repository, manifest, key):
def do_list(self, args, repository, manifest):
"""List archive contents"""
matcher = build_matcher(args.patterns, args.paths)
if args.format is not None:
@ -27,9 +27,7 @@ class ListMixIn:
format = "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}"
def _list_inner(cache):
archive = Archive(
repository, key, manifest, args.name, cache=cache, consider_part_files=args.consider_part_files
)
archive = Archive(manifest, args.name, cache=cache, consider_part_files=args.consider_part_files)
formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
for item in archive.iter_items(lambda item: matcher.match(item.path)):
@ -37,7 +35,7 @@ class ListMixIn:
# Only load the cache if it will be used
if ItemFormatter.format_needs_cache(format):
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
with Cache(repository, manifest, lock_wait=self.lock_wait) as cache:
_list_inner(cache)
else:
_list_inner(cache=None)

View File

@ -31,11 +31,11 @@ class MountMixIn:
return self._do_mount(args)
@with_repository(compatibility=(Manifest.Operation.READ,))
def _do_mount(self, args, repository, manifest, key):
def _do_mount(self, args, repository, manifest):
from ..fuse import FuseOperations
with cache_if_remote(repository, decrypted_cache=key) as cached_repo:
operations = FuseOperations(key, repository, manifest, args, cached_repo)
with cache_if_remote(repository, decrypted_cache=manifest.repo_objs) as cached_repo:
operations = FuseOperations(manifest, args, cached_repo)
logger.info("Mounting filesystem")
try:
operations.mount(args.mountpoint, args.options, args.foreground)

View File

@ -71,7 +71,7 @@ def prune_split(archives, rule, n, kept_because=None):
class PruneMixIn:
@with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,))
def do_prune(self, args, repository, manifest, key):
def do_prune(self, args, repository, manifest):
"""Prune repository archives according to specified rules"""
if not any(
(args.secondly, args.minutely, args.hourly, args.daily, args.weekly, args.monthly, args.yearly, args.within)
@ -119,7 +119,7 @@ class PruneMixIn:
to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
stats = Statistics(iec=args.iec)
with Cache(repository, key, manifest, lock_wait=self.lock_wait, iec=args.iec) as cache:
with Cache(repository, manifest, lock_wait=self.lock_wait, iec=args.iec) as cache:
def checkpoint_func():
manifest.write()
@ -142,9 +142,7 @@ class PruneMixIn:
else:
archives_deleted += 1
log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len)
archive = Archive(
repository, key, manifest, archive.name, cache, consider_part_files=args.consider_part_files
)
archive = Archive(manifest, archive.name, cache, consider_part_files=args.consider_part_files)
archive.delete(stats, forced=args.forced)
checkpointed = self.maybe_checkpoint(
checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval

View File

@ -16,9 +16,10 @@ logger = create_logger()
class RCreateMixIn:
@with_repository(create=True, exclusive=True, manifest=False)
@with_other_repository(key=True, compatibility=(Manifest.Operation.READ,))
def do_rcreate(self, args, repository, *, other_repository=None, other_key=None):
@with_other_repository(manifest=True, compatibility=(Manifest.Operation.READ,))
def do_rcreate(self, args, repository, *, other_repository=None, other_manifest=None):
"""Create a new, empty repository"""
other_key = other_manifest.key if other_manifest is not None else None
path = args.location.canonical_path()
logger.info('Initializing repository at "%s"' % path)
if other_key is not None:
@ -32,7 +33,7 @@ class RCreateMixIn:
manifest.key = key
manifest.write()
repository.commit(compact=False)
with Cache(repository, key, manifest, warn_if_unencrypted=False):
with Cache(repository, manifest, warn_if_unencrypted=False):
pass
if key.tam_required:
tam_file = tam_required_file(repository)

View File

@ -28,7 +28,7 @@ class RDeleteMixIn:
location = repository._location.canonical_path()
msg = []
try:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
n_archives = len(manifest.archives)
msg.append(
f"You requested to completely DELETE the following repository "

View File

@ -17,7 +17,7 @@ logger = create_logger()
class RecreateMixIn:
@with_repository(cache=True, exclusive=True, compatibility=(Manifest.Operation.CHECK,))
def do_recreate(self, args, repository, manifest, key, cache):
def do_recreate(self, args, repository, manifest, cache):
"""Re-create archives"""
matcher = build_matcher(args.patterns, args.paths)
self.output_list = args.output_list
@ -26,9 +26,7 @@ class RecreateMixIn:
always_recompress = args.recompress == "always"
recreater = ArchiveRecreater(
repository,
manifest,
key,
cache,
matcher,
exclude_caches=args.exclude_caches,

View File

@ -13,7 +13,7 @@ logger = create_logger()
class RenameMixIn:
@with_repository(exclusive=True, cache=True, compatibility=(Manifest.Operation.CHECK,))
@with_archive
def do_rename(self, args, repository, manifest, key, cache, archive):
def do_rename(self, args, repository, manifest, cache, archive):
"""Rename an existing archive"""
archive.rename(args.newname)
manifest.write()

View File

@ -13,8 +13,9 @@ logger = create_logger()
class RInfoMixIn:
@with_repository(cache=True, compatibility=(Manifest.Operation.READ,))
def do_rinfo(self, args, repository, manifest, key, cache):
def do_rinfo(self, args, repository, manifest, cache):
"""Show repository infos"""
key = manifest.key
info = basic_json_data(manifest, cache=cache, extra={"security_dir": cache.security_manager.dir})
if args.json:

View File

@ -14,7 +14,7 @@ logger = create_logger()
class RListMixIn:
@with_repository(compatibility=(Manifest.Operation.READ,))
def do_rlist(self, args, repository, manifest, key):
def do_rlist(self, args, repository, manifest):
"""List the archives contained in a repository"""
if args.format is not None:
format = args.format
@ -22,7 +22,7 @@ class RListMixIn:
format = "{archive}{NL}"
else:
format = "{archive:<36} {time} [{id}]{NL}"
formatter = ArchiveFormatter(format, repository, manifest, key, json=args.json, iec=args.iec)
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=args.json, iec=args.iec)
output_data = []

View File

@ -53,7 +53,7 @@ def get_tar_filter(fname, decompress):
class TarMixIn:
@with_repository(compatibility=(Manifest.Operation.READ,))
@with_archive
def do_export_tar(self, args, repository, manifest, key, archive):
def do_export_tar(self, args, repository, manifest, archive):
"""Export archive contents as a tarball"""
self.output_list = args.output_list
@ -239,7 +239,7 @@ class TarMixIn:
return self.exit_code
@with_repository(cache=True, exclusive=True, compatibility=(Manifest.Operation.WRITE,))
def do_import_tar(self, args, repository, manifest, key, cache):
def do_import_tar(self, args, repository, manifest, cache):
"""Create a backup archive from a tarball"""
self.output_filter = args.output_filter
self.output_list = args.output_list
@ -250,7 +250,7 @@ class TarMixIn:
tarstream_close = args.tarfile != "-"
with create_filter_process(filter, stream=tarstream, stream_close=tarstream_close, inbound=True) as _stream:
self._import_tar(args, repository, manifest, key, cache, _stream)
self._import_tar(args, repository, manifest, manifest.key, cache, _stream)
return self.exit_code
@ -259,8 +259,6 @@ class TarMixIn:
t0_monotonic = time.monotonic()
archive = Archive(
repository,
key,
manifest,
args.name,
cache=cache,

View File

@ -15,12 +15,12 @@ logger = create_logger()
class TransferMixIn:
@with_other_repository(manifest=True, key=True, compatibility=(Manifest.Operation.READ,))
@with_other_repository(manifest=True, compatibility=(Manifest.Operation.READ,))
@with_repository(exclusive=True, manifest=True, cache=True, compatibility=(Manifest.Operation.WRITE,))
def do_transfer(
self, args, *, repository, manifest, key, cache, other_repository=None, other_manifest=None, other_key=None
):
def do_transfer(self, args, *, repository, manifest, cache, other_repository=None, other_manifest=None):
"""archives transfer from other repository, optionally upgrade data format"""
key = manifest.key
other_key = other_manifest.key
if not uses_same_id_hash(other_key, key):
self.print_error(
"You must keep the same ID hash ([HMAC-]SHA256 or BLAKE2b) or deduplication will break. "
@ -57,8 +57,8 @@ class TransferMixIn:
else:
if not dry_run:
print(f"{name}: copying archive to destination repo...")
other_archive = Archive(other_repository, other_key, other_manifest, name)
archive = Archive(repository, key, manifest, name, cache=cache, create=True) if not dry_run else None
other_archive = Archive(other_manifest, name)
archive = Archive(manifest, name, cache=cache, create=True) if not dry_run else None
upgrader.new_archive(archive=archive)
for item in other_archive.iter_items():
if "chunks" in item:
@ -69,7 +69,7 @@ class TransferMixIn:
if not dry_run:
cdata = other_repository.get(chunk_id)
# keep compressed payload same, avoid decompression / recompression
data = other_key.decrypt(chunk_id, cdata, decompress=False)
meta, data = other_manifest.repo_objs.parse(chunk_id, cdata, decompress=False)
data = upgrader.upgrade_compressed_chunk(chunk=data)
chunk_entry = cache.add_chunk(
chunk_id, data, archive.stats, wait=False, compress=False, size=size

View File

@ -396,7 +396,6 @@ class Cache:
def __new__(
cls,
repository,
key,
manifest,
path=None,
sync=True,
@ -410,8 +409,6 @@ class Cache:
):
def local():
return LocalCache(
repository=repository,
key=key,
manifest=manifest,
path=path,
sync=sync,
@ -424,14 +421,7 @@ class Cache:
)
def adhoc():
return AdHocCache(
repository=repository,
key=key,
manifest=manifest,
lock_wait=lock_wait,
iec=iec,
consider_part_files=consider_part_files,
)
return AdHocCache(manifest=manifest, lock_wait=lock_wait, iec=iec, consider_part_files=consider_part_files)
if not permit_adhoc_cache:
return local()
@ -481,9 +471,7 @@ Total chunks: {0.total_chunks}
# so we can just sum up all archives to get the "all archives" stats:
total_size = 0
for archive_name in self.manifest.archives:
archive = Archive(
self.repository, self.key, self.manifest, archive_name, consider_part_files=self.consider_part_files
)
archive = Archive(self.manifest, archive_name, consider_part_files=self.consider_part_files)
stats = archive.calc_stats(self, want_unique=False)
total_size += stats.osize
stats = self.Summary(total_size, unique_size, total_unique_chunks, total_chunks)._asdict()
@ -503,8 +491,6 @@ class LocalCache(CacheStatsMixin):
def __init__(
self,
repository,
key,
manifest,
path=None,
sync=True,
@ -522,27 +508,29 @@ class LocalCache(CacheStatsMixin):
:param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
"""
CacheStatsMixin.__init__(self, iec=iec)
self.repository = repository
self.key = key
assert isinstance(manifest, Manifest)
self.manifest = manifest
self.repository = manifest.repository
self.key = manifest.key
self.repo_objs = manifest.repo_objs
self.progress = progress
self.cache_mode = cache_mode
self.consider_part_files = consider_part_files
self.timestamp = None
self.txn_active = False
self.path = cache_dir(repository, path)
self.security_manager = SecurityManager(repository)
self.path = cache_dir(self.repository, path)
self.security_manager = SecurityManager(self.repository)
self.cache_config = CacheConfig(self.repository, self.path, lock_wait)
# Warn user before sending data to a never seen before unencrypted repository
if not os.path.exists(self.path):
self.security_manager.assert_access_unknown(warn_if_unencrypted, manifest, key)
self.security_manager.assert_access_unknown(warn_if_unencrypted, manifest, self.key)
self.create()
self.open()
try:
self.security_manager.assert_secure(manifest, key, cache_config=self.cache_config)
self.security_manager.assert_secure(manifest, self.key, cache_config=self.cache_config)
if not self.check_cache_compatibility():
self.wipe_cache()
@ -912,7 +900,7 @@ class LocalCache(CacheStatsMixin):
self.manifest.check_repository_compatibility((Manifest.Operation.READ,))
self.begin_txn()
with cache_if_remote(self.repository, decrypted_cache=self.key) as decrypted_repository:
with cache_if_remote(self.repository, decrypted_cache=self.repo_objs) as decrypted_repository:
# TEMPORARY HACK: to avoid archive index caching, create a FILE named ~/.cache/borg/REPOID/chunks.archive.d -
# this is only recommended if you have a fast, low latency connection to your repo (e.g. if repo is local disk)
self.do_cache = os.path.isdir(archive_path)
@ -965,7 +953,7 @@ class LocalCache(CacheStatsMixin):
return self.chunk_incref(id, stats)
if size is None:
raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
data = self.key.encrypt(id, chunk, compress=compress)
data = self.repo_objs.format(id, {}, chunk, compress=compress, size=size)
self.repository.put(id, data, wait=wait)
self.chunks.add(id, 1, size)
stats.update(size, not refcount)
@ -1094,18 +1082,18 @@ All archives: unknown unknown unknown
Unique chunks Total chunks
Chunk index: {0.total_unique_chunks:20d} unknown"""
def __init__(
self, repository, key, manifest, warn_if_unencrypted=True, lock_wait=None, consider_part_files=False, iec=False
):
def __init__(self, manifest, warn_if_unencrypted=True, lock_wait=None, consider_part_files=False, iec=False):
CacheStatsMixin.__init__(self, iec=iec)
self.repository = repository
self.key = key
assert isinstance(manifest, Manifest)
self.manifest = manifest
self.repository = manifest.repository
self.key = manifest.key
self.repo_objs = manifest.repo_objs
self.consider_part_files = consider_part_files
self._txn_active = False
self.security_manager = SecurityManager(repository)
self.security_manager.assert_secure(manifest, key, lock_wait=lock_wait)
self.security_manager = SecurityManager(self.repository)
self.security_manager.assert_secure(manifest, self.key, lock_wait=lock_wait)
logger.warning("Note: --no-cache-sync is an experimental feature.")
@ -1138,7 +1126,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
refcount = self.seen_chunk(id, size)
if refcount:
return self.chunk_incref(id, stats, size=size)
data = self.key.encrypt(id, chunk, compress=compress)
data = self.repo_objs.format(id, {}, chunk, compress=compress)
self.repository.put(id, data, wait=wait)
self.chunks.add(id, 1, size)
stats.update(size, not refcount)

View File

@ -12,7 +12,6 @@ logger = create_logger()
import argon2.low_level
from ..constants import * # NOQA
from ..compress import Compressor
from ..helpers import StableDict
from ..helpers import Error, IntegrityError
from ..helpers import get_keys_dir, get_security_dir
@ -23,6 +22,8 @@ from ..helpers import msgpack
from ..item import Key, EncryptedKey, want_bytes
from ..manifest import Manifest
from ..platform import SaveFile
from ..repoobj import RepoObj
from .nonces import NonceManager
from .low_level import AES, bytes_to_int, num_cipher_blocks, hmac_sha256, blake2b_256, hkdf_hmac_sha512
@ -107,7 +108,8 @@ def identify_key(manifest_data):
raise UnsupportedPayloadError(key_type)
def key_factory(repository, manifest_data):
def key_factory(repository, manifest_chunk, *, ro_cls=RepoObj):
manifest_data = ro_cls.extract_crypted_data(manifest_chunk)
return identify_key(manifest_data).detect(repository, manifest_data)
@ -186,10 +188,6 @@ class KeyBase:
self.TYPE_STR = bytes([self.TYPE])
self.repository = repository
self.target = None # key location file path / repo obj
# Some commands write new chunks (e.g. rename) but don't take a --compression argument. This duplicates
# the default used by those commands who do take a --compression argument.
self.compressor = Compressor("lz4")
self.decompress = self.compressor.decompress
self.tam_required = True
self.copy_crypt_key = False
@ -197,10 +195,10 @@ class KeyBase:
"""Return HMAC hash using the "id" HMAC key"""
raise NotImplementedError
def encrypt(self, id, data, compress=True):
def encrypt(self, id, data):
pass
def decrypt(self, id, data, decompress=True):
def decrypt(self, id, data):
pass
def assert_id(self, id, data):
@ -301,19 +299,12 @@ class PlaintextKey(KeyBase):
def id_hash(self, data):
return sha256(data).digest()
def encrypt(self, id, data, compress=True):
if compress:
data = self.compressor.compress(data)
def encrypt(self, id, data):
return b"".join([self.TYPE_STR, data])
def decrypt(self, id, data, decompress=True):
def decrypt(self, id, data):
self.assert_type(data[0], id)
payload = memoryview(data)[1:]
if not decompress:
return payload
data = self.decompress(payload)
self.assert_id(id, data)
return data
return memoryview(data)[1:]
def _tam_key(self, salt, context):
return salt + context
@ -380,23 +371,16 @@ class AESKeyBase(KeyBase):
logically_encrypted = True
def encrypt(self, id, data, compress=True):
if compress:
data = self.compressor.compress(data)
def encrypt(self, id, data):
next_iv = self.nonce_manager.ensure_reservation(self.cipher.next_iv(), self.cipher.block_count(len(data)))
return self.cipher.encrypt(data, header=self.TYPE_STR, iv=next_iv)
def decrypt(self, id, data, decompress=True):
def decrypt(self, id, data):
self.assert_type(data[0], id)
try:
payload = self.cipher.decrypt(data)
return self.cipher.decrypt(data)
except IntegrityError as e:
raise IntegrityError(f"Chunk {bin_to_hex(id)}: Could not decrypt [{str(e)}]")
if not decompress:
return payload
data = self.decompress(memoryview(payload))
self.assert_id(id, data)
return data
def init_from_given_data(self, *, crypt_key, id_key, chunk_seed):
assert len(crypt_key) in (32 + 32, 32 + 128)
@ -804,19 +788,12 @@ class AuthenticatedKeyBase(AESKeyBase, FlexiKey):
if manifest_data is not None:
self.assert_type(manifest_data[0])
def encrypt(self, id, data, compress=True):
if compress:
data = self.compressor.compress(data)
def encrypt(self, id, data):
return b"".join([self.TYPE_STR, data])
def decrypt(self, id, data, decompress=True):
def decrypt(self, id, data):
self.assert_type(data[0], id)
payload = memoryview(data)[1:]
if not decompress:
return payload
data = self.decompress(payload)
self.assert_id(id, data)
return data
return memoryview(data)[1:]
class AuthenticatedKey(ID_HMAC_SHA_256, AuthenticatedKeyBase):
@ -861,10 +838,15 @@ class AEADKeyBase(KeyBase):
MAX_IV = 2**48 - 1
def encrypt(self, id, data, compress=True):
def assert_id(self, id, data):
# note: assert_id(id, data) is not needed any more for the new AEAD crypto.
# we put the id into AAD when storing the chunk, so it gets into the authentication tag computation.
# when decrypting, we provide the id we **want** as AAD for the auth tag verification, so
# decrypting only succeeds if we got the ciphertext we wrote **for that chunk id**.
pass
def encrypt(self, id, data):
# to encrypt new data in this session we use always self.cipher and self.sessionid
if compress:
data = self.compressor.compress(data)
reserved = b"\0"
iv = self.cipher.next_iv()
if iv > self.MAX_IV: # see the data-structures docs about why the IV range is enough
@ -873,7 +855,7 @@ class AEADKeyBase(KeyBase):
header = self.TYPE_STR + reserved + iv_48bit + self.sessionid
return self.cipher.encrypt(data, header=header, iv=iv, aad=id)
def decrypt(self, id, data, decompress=True):
def decrypt(self, id, data):
# to decrypt existing data, we need to get a cipher configured for the sessionid and iv from header
self.assert_type(data[0], id)
iv_48bit = data[2:8]
@ -881,17 +863,9 @@ class AEADKeyBase(KeyBase):
iv = int.from_bytes(iv_48bit, "big")
cipher = self._get_cipher(sessionid, iv)
try:
payload = cipher.decrypt(data, aad=id)
return cipher.decrypt(data, aad=id)
except IntegrityError as e:
raise IntegrityError(f"Chunk {bin_to_hex(id)}: Could not decrypt [{str(e)}]")
if not decompress:
return payload
data = self.decompress(memoryview(payload))
# note: calling self.assert_id(id, data) is not needed any more for the new AEAD crypto.
# we put the id into AAD when storing the chunk, so it gets into the authentication tag computation.
# when decrypting, we provide the id we **want** as AAD for the auth tag verification, so
# decrypting only succeeds if we got the ciphertext we wrote **for that chunk id**.
return data
def init_from_given_data(self, *, crypt_key, id_key, chunk_seed):
assert len(crypt_key) in (32 + 32, 32 + 128)

View File

@ -7,6 +7,8 @@ from hashlib import sha256
from ..helpers import Error, yes, bin_to_hex, dash_open
from ..manifest import Manifest, NoManifestError
from ..repository import Repository
from ..repoobj import RepoObj
from .key import CHPOKeyfileKey, RepoKeyNotFoundError, KeyBlobStorage, identify_key
@ -40,10 +42,11 @@ class KeyManager:
self.keyblob_storage = None
try:
manifest_data = self.repository.get(Manifest.MANIFEST_ID)
manifest_chunk = self.repository.get(Manifest.MANIFEST_ID)
except Repository.ObjectNotFound:
raise NoManifestError
manifest_data = RepoObj.extract_crypted_data(manifest_chunk)
key = identify_key(manifest_data)
self.keyblob_storage = key.STORAGE
if self.keyblob_storage == KeyBlobStorage.NO_STORAGE:

View File

@ -241,12 +241,12 @@ class ItemCache:
class FuseBackend:
"""Virtual filesystem based on archive(s) to provide information to fuse"""
def __init__(self, key, manifest, repository, args, decrypted_repository):
self.repository_uncached = repository
def __init__(self, manifest, args, decrypted_repository):
self._args = args
self.numeric_ids = args.numeric_ids
self._manifest = manifest
self.key = key
self.repo_objs = manifest.repo_objs
self.repository_uncached = manifest.repository
# Maps inode numbers to Item instances. This is used for synthetic inodes, i.e. file-system objects that are
# made up and are not contained in the archives. For example archive directories or intermediate directories
# not contained in archives.
@ -330,13 +330,7 @@ class FuseBackend:
"""Build FUSE inode hierarchy from archive metadata"""
self.file_versions = {} # for versions mode: original path -> version
t0 = time.perf_counter()
archive = Archive(
self.repository_uncached,
self.key,
self._manifest,
archive_name,
consider_part_files=self._args.consider_part_files,
)
archive = Archive(self._manifest, archive_name, consider_part_files=self._args.consider_part_files)
strip_components = self._args.strip_components
matcher = build_matcher(self._args.patterns, self._args.paths)
hlm = HardLinkManager(id_type=bytes, info_type=str) # hlid -> path
@ -447,9 +441,9 @@ class FuseBackend:
class FuseOperations(llfuse.Operations, FuseBackend):
"""Export archive as a FUSE filesystem"""
def __init__(self, key, repository, manifest, args, decrypted_repository):
def __init__(self, manifest, args, decrypted_repository):
llfuse.Operations.__init__(self)
FuseBackend.__init__(self, key, manifest, repository, args, decrypted_repository)
FuseBackend.__init__(self, manifest, args, decrypted_repository)
self.decrypted_repository = decrypted_repository
data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1))
logger.debug("mount data cache capacity: %d chunks", data_cache_capacity)
@ -688,7 +682,7 @@ class FuseOperations(llfuse.Operations, FuseBackend):
# evict fully read chunk from cache
del self.data_cache[id]
else:
data = self.key.decrypt(id, self.repository_uncached.get(id))
_, data = self.repo_objs.parse(id, self.repository_uncached.get(id))
if offset + n < len(data):
# chunk was only partially read, cache it
self.data_cache[id] = data

View File

@ -673,7 +673,7 @@ class ArchiveFormatter(BaseFormatter):
if self._archive is None or self._archive.id != self.id:
from ..archive import Archive
self._archive = Archive(self.repository, self.key, self.manifest, self.name, iec=self.iec)
self._archive = Archive(self.manifest, self.name, iec=self.iec)
return self._archive
def get_meta(self, key, rs):

View File

@ -17,6 +17,7 @@ from .helpers.datastruct import StableDict
from .helpers.parseformat import bin_to_hex
from .helpers.time import parse_timestamp
from .helpers.errors import Error
from .repoobj import RepoObj
class NoManifestError(Error):
@ -164,10 +165,11 @@ class Manifest:
MANIFEST_ID = b"\0" * 32
def __init__(self, key, repository, item_keys=None):
def __init__(self, key, repository, item_keys=None, ro_cls=RepoObj):
self.archives = Archives()
self.config = {}
self.key = key
self.repo_objs = ro_cls(key)
self.repository = repository
self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
self.tam_verified = False
@ -182,7 +184,7 @@ class Manifest:
return parse_timestamp(self.timestamp)
@classmethod
def load(cls, repository, operations, key=None, force_tam_not_required=False):
def load(cls, repository, operations, key=None, force_tam_not_required=False, *, ro_cls=RepoObj):
from .item import ManifestItem
from .crypto.key import key_factory, tam_required_file, tam_required
from .repository import Repository
@ -192,14 +194,14 @@ class Manifest:
except Repository.ObjectNotFound:
raise NoManifestError
if not key:
key = key_factory(repository, cdata)
manifest = cls(key, repository)
data = key.decrypt(cls.MANIFEST_ID, cdata)
key = key_factory(repository, cdata, ro_cls=ro_cls)
manifest = cls(key, repository, ro_cls=ro_cls)
_, data = manifest.repo_objs.parse(cls.MANIFEST_ID, cdata)
manifest_dict, manifest.tam_verified = key.unpack_and_verify_manifest(
data, force_tam_not_required=force_tam_not_required
)
m = ManifestItem(internal_dict=manifest_dict)
manifest.id = key.id_hash(data)
manifest.id = manifest.repo_objs.id_hash(data)
if m.get("version") not in (1, 2):
raise ValueError("Invalid manifest version")
manifest.archives.set_raw_dict(m.archives)
@ -219,7 +221,7 @@ class Manifest:
logger.debug("Manifest is TAM verified and says TAM is *not* required, updating security database...")
os.unlink(tam_required_file(repository))
manifest.check_repository_compatibility(operations)
return manifest, key
return manifest
def check_repository_compatibility(self, operations):
for operation in operations:
@ -272,5 +274,5 @@ class Manifest:
)
self.tam_verified = True
data = self.key.pack_and_authenticate_metadata(manifest.as_dict())
self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(self.MANIFEST_ID, data))
self.id = self.repo_objs.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.repo_objs.format(self.MANIFEST_ID, {}, data))

View File

@ -1283,7 +1283,7 @@ def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None
"""
Return a Repository(No)Cache for *repository*.
If *decrypted_cache* is a key object, then get and get_many will return a tuple
If *decrypted_cache* is a repo_objs object, then get and get_many will return a tuple
(csize, plaintext) instead of the actual data in the repository. The cache will
store decrypted data, which increases CPU efficiency (by avoiding repeatedly decrypting
and more importantly MAC and ID checking cached objects).
@ -1292,7 +1292,7 @@ def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None
if decrypted_cache and (pack or unpack or transform):
raise ValueError("decrypted_cache and pack/unpack/transform are incompatible")
elif decrypted_cache:
key = decrypted_cache
repo_objs = decrypted_cache
# 32 bit csize, 64 bit (8 byte) xxh64
cache_struct = struct.Struct("=I8s")
compressor = Compressor("lz4")
@ -1311,8 +1311,8 @@ def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None
return csize, compressor.decompress(compressed)
def transform(id_, data):
csize = len(data)
decrypted = key.decrypt(id_, data)
meta, decrypted = repo_objs.parse(id_, data)
csize = meta.get("csize", len(data))
return csize, decrypted
if isinstance(repository, RemoteRepository) or force_cache:

129
src/borg/repoobj.py Normal file
View File

@ -0,0 +1,129 @@
from struct import Struct
from borg.helpers import msgpack
from borg.compress import Compressor, LZ4_COMPRESSOR
class RepoObj:
meta_len_hdr = Struct("<I")
@classmethod
def extract_crypted_data(cls, data: bytes) -> bytes:
# used for crypto type detection
offs = cls.meta_len_hdr.size
meta_len = cls.meta_len_hdr.unpack(data[:offs])[0]
return data[offs + meta_len :]
def __init__(self, key):
self.key = key
# Some commands write new chunks (e.g. rename) but don't take a --compression argument. This duplicates
# the default used by those commands who do take a --compression argument.
self.compressor = LZ4_COMPRESSOR
self.decompress = Compressor("lz4").decompress
def id_hash(self, data: bytes) -> bytes:
return self.key.id_hash(data)
def format(self, id: bytes, meta: dict, data: bytes, compress: bool = True, size: int = None) -> bytes:
assert isinstance(id, bytes)
assert isinstance(meta, dict)
assert isinstance(data, (bytes, memoryview))
assert compress or size is not None
if compress:
assert size is None or size == len(data)
size = len(data) if size is None else size
data_compressed = self.compressor.compress(data) # TODO: compressor also adds compressor type/level bytes
else:
assert isinstance(size, int)
data_compressed = data # is already compressed
meta = dict(meta) # make a copy, so call arg is not modified
meta["size"] = size
meta["csize"] = len(data_compressed)
# meta["ctype"] = ...
# meta["clevel"] = ...
data_encrypted = self.key.encrypt(id, data_compressed)
meta_packed = msgpack.packb(meta)
meta_encrypted = self.key.encrypt(id, meta_packed)
hdr = self.meta_len_hdr.pack(len(meta_encrypted))
return hdr + meta_encrypted + data_encrypted
def parse_meta(self, id: bytes, cdata: bytes) -> dict:
# when calling parse_meta, enough cdata needs to be supplied to completely contain the
# meta_len_hdr and the encrypted, packed metadata. it is allowed to provide more cdata.
assert isinstance(id, bytes)
assert isinstance(cdata, bytes)
obj = memoryview(cdata)
offs = self.meta_len_hdr.size
hdr = obj[:offs]
len_meta_encrypted = self.meta_len_hdr.unpack(hdr)[0]
assert offs + len_meta_encrypted <= len(obj)
meta_encrypted = obj[offs : offs + len_meta_encrypted]
meta_packed = self.key.decrypt(id, meta_encrypted)
meta = msgpack.unpackb(meta_packed)
return meta
def parse(self, id: bytes, cdata: bytes, decompress: bool = True) -> tuple[dict, bytes]:
assert isinstance(id, bytes)
assert isinstance(cdata, bytes)
obj = memoryview(cdata)
offs = self.meta_len_hdr.size
hdr = obj[:offs]
len_meta_encrypted = self.meta_len_hdr.unpack(hdr)[0]
assert offs + len_meta_encrypted <= len(obj)
meta_encrypted = obj[offs : offs + len_meta_encrypted]
offs += len_meta_encrypted
meta_packed = self.key.decrypt(id, meta_encrypted)
meta = msgpack.unpackb(meta_packed)
data_encrypted = obj[offs:]
data_compressed = self.key.decrypt(id, data_encrypted)
if decompress:
data = self.decompress(data_compressed) # TODO: decompressor still needs type/level bytes
self.key.assert_id(id, data)
else:
data = data_compressed
return meta, data
class RepoObj1: # legacy
@classmethod
def extract_crypted_data(cls, data: bytes) -> bytes:
# used for crypto type detection
return data
def __init__(self, key):
self.key = key
self.compressor = LZ4_COMPRESSOR
self.decompress = Compressor("lz4").decompress
def id_hash(self, data: bytes) -> bytes:
return self.key.id_hash(data)
def format(self, id: bytes, meta: dict, data: bytes, compress: bool = True, size: int = None) -> bytes:
assert isinstance(id, bytes)
assert meta == {}
assert isinstance(data, (bytes, memoryview))
assert compress or size is not None
assert compress or size is not None
if compress:
assert size is None
size = len(data)
data_compressed = self.compressor.compress(data) # TODO: compressor also adds compressor type/level bytes
else:
assert isinstance(size, int)
data_compressed = data # is already compressed
data_encrypted = self.key.encrypt(id, data_compressed)
return data_encrypted
def parse(self, id: bytes, cdata: bytes, decompress: bool = True) -> tuple[dict, bytes]:
assert isinstance(id, bytes)
assert isinstance(cdata, bytes)
meta = {}
data_compressed = self.key.decrypt(id, cdata)
meta["csize"] = len(data_compressed)
if decompress:
data = self.decompress(data_compressed) # TODO: decompressor still needs type/level bytes
self.key.assert_id(id, data)
meta["size"] = len(data)
else:
data = data_compressed
return meta, data

View File

@ -110,8 +110,8 @@ class ArchiveTimestampTestCase(BaseTestCase):
def _test_timestamp_parsing(self, isoformat, expected):
repository = Mock()
key = PlaintextKey(repository)
manifest = Manifest(repository, key)
a = Archive(repository, key, manifest, "test", create=True)
manifest = Manifest(key, repository)
a = Archive(manifest, "test", create=True)
a.metadata = ArchiveItem(time=isoformat)
self.assert_equal(a.ts, expected)

View File

@ -314,8 +314,8 @@ class ArchiverTestCaseBase(BaseTestCase):
def open_archive(self, name):
repository = Repository(self.repository_path, exclusive=True)
with repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive = Archive(repository, key, manifest, name)
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive = Archive(manifest, name)
return archive, repository
def open_repository(self):
@ -1660,7 +1660,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd(f"--repo={self.repository_location}", "extract", "test.4", "--dry-run")
# Make sure both archives have been renamed
with Repository(self.repository_path) as repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
self.assert_equal(len(manifest.archives), 2)
self.assert_in("test.3", manifest.archives)
self.assert_in("test.4", manifest.archives)
@ -1784,8 +1784,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")
self.create_src_archive("test")
with Repository(self.repository_path, exclusive=True) as repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive = Archive(repository, key, manifest, "test")
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive = Archive(manifest, "test")
for item in archive.iter_items():
if item.path.endswith("testsuite/archiver.py"):
repository.delete(item.chunks[-1].id)
@ -1803,8 +1803,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")
self.create_src_archive("test")
with Repository(self.repository_path, exclusive=True) as repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive = Archive(repository, key, manifest, "test")
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
archive = Archive(manifest, "test")
id = archive.metadata.items[0]
repository.put(id, b"corrupted items metadata stream chunk")
repository.commit(compact=False)
@ -1952,12 +1952,12 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd(f"--repo={self.repository_location}", "create", "--dry-run", "test", "input")
# Make sure no archive has been created
with Repository(self.repository_path) as repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
self.assert_equal(len(manifest.archives), 0)
def add_unknown_feature(self, operation):
with Repository(self.repository_path, exclusive=True) as repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
manifest.config["feature_flags"] = {operation.value: {"mandatory": ["unknown-feature"]}}
manifest.write()
repository.commit(compact=False)
@ -2034,8 +2034,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
with Repository(self.repository_path, exclusive=True) as repository:
if path_prefix:
repository._location = Location(self.repository_location)
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, key, manifest) as cache:
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, manifest) as cache:
cache.begin_txn()
cache.cache_config.mandatory_features = {"unknown-feature"}
cache.commit()
@ -2059,8 +2059,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
with Repository(self.repository_path, exclusive=True) as repository:
if path_prefix:
repository._location = Location(self.repository_location)
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, key, manifest) as cache:
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, manifest) as cache:
assert cache.cache_config.mandatory_features == set()
def test_progress_on(self):
@ -3060,11 +3060,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd(f"--repo={self.repository_location}", "check")
# Then check that the cache on disk matches exactly what's in the repo.
with self.open_repository() as repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, key, manifest, sync=False) as cache:
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, manifest, sync=False) as cache:
original_chunks = cache.chunks
Cache.destroy(repository)
with Cache(repository, key, manifest) as cache:
with Cache(repository, manifest) as cache:
correct_chunks = cache.chunks
assert original_chunks is not correct_chunks
seen = set()
@ -3080,8 +3080,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.cmd(f"--repo={self.repository_location}", "create", "test", "input")
with self.open_repository() as repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, key, manifest, sync=False) as cache:
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
with Cache(repository, manifest, sync=False) as cache:
cache.begin_txn()
cache.chunks.incref(list(cache.chunks.iteritems())[0][0])
cache.commit()
@ -3966,7 +3966,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
def test_manifest_rebuild_duplicate_archive(self):
archive, repository = self.open_archive("archive1")
key = archive.key
repo_objs = archive.repo_objs
with repository:
manifest = repository.get(Manifest.MANIFEST_ID)
corrupted_manifest = manifest + b"corrupted!"
@ -3983,8 +3984,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
"version": 2,
}
)
archive_id = key.id_hash(archive)
repository.put(archive_id, key.encrypt(archive_id, archive))
archive_id = repo_objs.id_hash(archive)
repository.put(archive_id, repo_objs.format(archive_id, {}, archive))
repository.commit(compact=False)
self.cmd(f"--repo={self.repository_location}", "check", exit_code=1)
self.cmd(f"--repo={self.repository_location}", "check", "--repair", exit_code=0)
@ -4042,45 +4043,43 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
class ManifestAuthenticationTest(ArchiverTestCaseBase):
def spoof_manifest(self, repository):
with repository:
_, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
repository.put(
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
cdata = manifest.repo_objs.format(
Manifest.MANIFEST_ID,
key.encrypt(
Manifest.MANIFEST_ID,
msgpack.packb(
{
"version": 1,
"archives": {},
"config": {},
"timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(
timespec="microseconds"
),
}
),
{},
msgpack.packb(
{
"version": 1,
"archives": {},
"config": {},
"timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(
timespec="microseconds"
),
}
),
)
repository.put(Manifest.MANIFEST_ID, cdata)
repository.commit(compact=False)
def test_fresh_init_tam_required(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
repository = Repository(self.repository_path, exclusive=True)
with repository:
manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
repository.put(
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
cdata = manifest.repo_objs.format(
Manifest.MANIFEST_ID,
key.encrypt(
Manifest.MANIFEST_ID,
msgpack.packb(
{
"version": 1,
"archives": {},
"timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(
timespec="microseconds"
),
}
),
{},
msgpack.packb(
{
"version": 1,
"archives": {},
"timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(
timespec="microseconds"
),
}
),
)
repository.put(Manifest.MANIFEST_ID, cdata)
repository.commit(compact=False)
with pytest.raises(TAMRequiredError):

View File

@ -9,7 +9,6 @@ from .hashindex import H
from .key import TestKey
from ..archive import Statistics
from ..cache import AdHocCache
from ..compress import CompressionSpec
from ..crypto.key import AESOCBRepoKey
from ..hashindex import ChunkIndex, CacheSynchronizer
from ..manifest import Manifest
@ -167,17 +166,16 @@ class TestAdHocCache:
def key(self, repository, monkeypatch):
monkeypatch.setenv("BORG_PASSPHRASE", "test")
key = AESOCBRepoKey.create(repository, TestKey.MockArgs())
key.compressor = CompressionSpec("none").compressor
return key
@pytest.fixture
def manifest(self, repository, key):
Manifest(key, repository).write()
return Manifest.load(repository, key=key, operations=Manifest.NO_OPERATION_CHECK)[0]
return Manifest.load(repository, key=key, operations=Manifest.NO_OPERATION_CHECK)
@pytest.fixture
def cache(self, repository, key, manifest):
return AdHocCache(repository, key, manifest)
return AdHocCache(manifest)
def test_does_not_contain_manifest(self, cache):
assert not cache.seen_chunk(Manifest.MANIFEST_ID)

View File

@ -8,6 +8,7 @@ import pytest
from ..crypto.key import bin_to_hex
from ..crypto.key import PlaintextKey, AuthenticatedKey, Blake2AuthenticatedKey
from ..crypto.key import RepoKey, KeyfileKey, Blake2RepoKey, Blake2KeyfileKey
from ..crypto.key import AEADKeyBase
from ..crypto.key import AESOCBRepoKey, AESOCBKeyfileKey, CHPORepoKey, CHPOKeyfileKey
from ..crypto.key import Blake2AESOCBRepoKey, Blake2AESOCBKeyfileKey, Blake2CHPORepoKey, Blake2CHPOKeyfileKey
from ..crypto.key import ID_HMAC_SHA_256, ID_BLAKE2b_256
@ -42,15 +43,8 @@ class TestKey:
F84MsMMiqpbz4KVICeBZhfAaTPs4W7BC63qml0ZXJhdGlvbnPOAAGGoKRzYWx02gAgLENQ
2uVCoR7EnAoiRzn8J+orbojKtJlNCnQ31SSC8rendmVyc2lvbgE=""".strip()
keyfile2_cdata = unhexlify(
re.sub(
r"\W",
"",
"""
0055f161493fcfc16276e8c31493c4641e1eb19a79d0326fad0291e5a9c98e5933
00000000000003e8d21eaf9b86c297a8cd56432e1915bb
""",
)
keyfile2_cdata = bytes.fromhex(
"003be7d57280d1a42add9f3f36ea363bbc5e9349ad01ddec0634a54dd02959e70500000000000003ec063d2cbcacba6b"
)
keyfile2_id = unhexlify("c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314")
@ -69,7 +63,7 @@ class TestKey:
qkPqtDDxs2j/T7+ndmVyc2lvbgE=""".strip()
keyfile_blake2_cdata = bytes.fromhex(
"04fdf9475cf2323c0ba7a99ddc011064f2e7d039f539f2e448" "0e6f5fc6ff9993d604040404040404098c8cee1c6db8c28947"
"04d6040f5ef80e0a8ac92badcbe3dee83b7a6b53d5c9a58c4eed14964cb10ef591040404040404040d1e65cc1f435027"
)
# Verified against b2sum. Entire string passed to BLAKE2, including the padded 64 byte key contained in
# keyfile_blake2_key_file above is
@ -224,7 +218,8 @@ class TestKey:
data = bytearray(self.keyfile2_cdata)
id = bytearray(key.id_hash(data)) # corrupt chunk id
id[12] = 0
key.decrypt(id, data)
plaintext = key.decrypt(id, data)
key.assert_id(id, plaintext)
def test_roundtrip(self, key):
repository = key.repository
@ -237,45 +232,18 @@ class TestKey:
decrypted = loaded_key.decrypt(id, encrypted)
assert decrypted == plaintext
def test_decrypt_decompress(self, key):
plaintext = b"123456789"
id = key.id_hash(plaintext)
encrypted = key.encrypt(id, plaintext)
assert key.decrypt(id, encrypted, decompress=False) != plaintext
assert key.decrypt(id, encrypted) == plaintext
def test_assert_id(self, key):
plaintext = b"123456789"
id = key.id_hash(plaintext)
key.assert_id(id, plaintext)
id_changed = bytearray(id)
id_changed[0] ^= 1
with pytest.raises(IntegrityError):
key.assert_id(id_changed, plaintext)
plaintext_changed = plaintext + b"1"
with pytest.raises(IntegrityError):
key.assert_id(id, plaintext_changed)
def test_getting_wrong_chunk_fails(self, key):
# for the new AEAD crypto, we provide the chunk id as AAD when encrypting/authenticating,
# we provide the id **we want** as AAD when authenticating/decrypting the data we got from the repo.
# only if the id used for encrypting matches the id we want, the AEAD crypto authentication will succeed.
# thus, there is no need any more for calling self._assert_id() for the new crypto.
# the old crypto as well as plaintext and authenticated modes still need to call self._assert_id().
plaintext_wanted = b"123456789"
id_wanted = key.id_hash(plaintext_wanted)
ciphertext_wanted = key.encrypt(id_wanted, plaintext_wanted)
plaintext_other = b"xxxxxxxxx"
id_other = key.id_hash(plaintext_other)
ciphertext_other = key.encrypt(id_other, plaintext_other)
# both ciphertexts are authentic and decrypting them should succeed:
key.decrypt(id_wanted, ciphertext_wanted)
key.decrypt(id_other, ciphertext_other)
# but if we wanted the one and got the other, it must fail.
# the new crypto will fail due to AEAD auth failure,
# the old crypto and plaintext, authenticated modes will fail due to ._assert_id() check failing:
with pytest.raises(IntegrityErrorBase):
key.decrypt(id_wanted, ciphertext_other)
if not isinstance(key, AEADKeyBase):
with pytest.raises(IntegrityError):
key.assert_id(id_changed, plaintext)
plaintext_changed = plaintext + b"1"
with pytest.raises(IntegrityError):
key.assert_id(id, plaintext_changed)
def test_authenticated_encrypt(self, monkeypatch):
monkeypatch.setenv("BORG_PASSPHRASE", "test")
@ -285,8 +253,8 @@ class TestKey:
plaintext = b"123456789"
id = key.id_hash(plaintext)
authenticated = key.encrypt(id, plaintext)
# 0x07 is the key TYPE, \x00ff identifies no compression / unknown level.
assert authenticated == b"\x07\x00\xff" + plaintext
# 0x07 is the key TYPE.
assert authenticated == b"\x07" + plaintext
def test_blake2_authenticated_encrypt(self, monkeypatch):
monkeypatch.setenv("BORG_PASSPHRASE", "test")
@ -296,8 +264,8 @@ class TestKey:
plaintext = b"123456789"
id = key.id_hash(plaintext)
authenticated = key.encrypt(id, plaintext)
# 0x06 is the key TYPE, 0x00ff identifies no compression / unknown level.
assert authenticated == b"\x06\x00\xff" + plaintext
# 0x06 is the key TYPE.
assert authenticated == b"\x06" + plaintext
class TestTAM:

View File

@ -9,8 +9,8 @@ import pytest
from ..remote import SleepingBandwidthLimiter, RepositoryCache, cache_if_remote
from ..repository import Repository
from ..crypto.key import PlaintextKey
from ..compress import CompressionSpec
from ..helpers import IntegrityError
from ..repoobj import RepoObj
from .hashindex import H
from .key import TestKey
@ -160,35 +160,38 @@ class TestRepositoryCache:
def key(self, repository, monkeypatch):
monkeypatch.setenv("BORG_PASSPHRASE", "test")
key = PlaintextKey.create(repository, TestKey.MockArgs())
key.compressor = CompressionSpec("none").compressor
return key
def _put_encrypted_object(self, key, repository, data):
id_ = key.id_hash(data)
repository.put(id_, key.encrypt(id_, data))
@pytest.fixture
def repo_objs(self, key):
return RepoObj(key)
def _put_encrypted_object(self, repo_objs, repository, data):
id_ = repo_objs.id_hash(data)
repository.put(id_, repo_objs.format(id_, {}, data))
return id_
@pytest.fixture
def H1(self, key, repository):
return self._put_encrypted_object(key, repository, b"1234")
def H1(self, repo_objs, repository):
return self._put_encrypted_object(repo_objs, repository, b"1234")
@pytest.fixture
def H2(self, key, repository):
return self._put_encrypted_object(key, repository, b"5678")
def H2(self, repo_objs, repository):
return self._put_encrypted_object(repo_objs, repository, b"5678")
@pytest.fixture
def H3(self, key, repository):
return self._put_encrypted_object(key, repository, bytes(100))
def H3(self, repo_objs, repository):
return self._put_encrypted_object(repo_objs, repository, bytes(100))
@pytest.fixture
def decrypted_cache(self, key, repository):
return cache_if_remote(repository, decrypted_cache=key, force_cache=True)
def decrypted_cache(self, repo_objs, repository):
return cache_if_remote(repository, decrypted_cache=repo_objs, force_cache=True)
def test_cache_corruption(self, decrypted_cache: RepositoryCache, H1, H2, H3):
list(decrypted_cache.get_many([H1, H2, H3]))
iterator = decrypted_cache.get_many([H1, H2, H3])
assert next(iterator) == (7, b"1234")
assert next(iterator) == (6, b"1234")
with open(decrypted_cache.key_filename(H2), "a+b") as fd:
fd.seek(-1, io.SEEK_END)
@ -198,4 +201,4 @@ class TestRepositoryCache:
fd.truncate()
with pytest.raises(IntegrityError):
assert next(iterator) == (7, b"5678")
assert next(iterator) == (26, b"5678")

View File

@ -0,0 +1,59 @@
import pytest
from ..crypto.key import PlaintextKey
from ..repository import Repository
from ..repoobj import RepoObj, RepoObj1
@pytest.fixture
def repository(tmpdir):
return Repository(tmpdir, create=True)
@pytest.fixture
def key(repository):
return PlaintextKey(repository)
def test_format_parse_roundtrip(key):
repo_objs = RepoObj(key)
data = b"foobar" * 10
id = repo_objs.id_hash(data)
meta = {"custom": "something"} # size and csize are computed automatically
cdata = repo_objs.format(id, meta, data)
got_meta = repo_objs.parse_meta(id, cdata)
assert got_meta["size"] == len(data)
assert got_meta["csize"] < len(data)
assert got_meta["custom"] == "something"
got_meta, got_data = repo_objs.parse(id, cdata)
assert got_meta["size"] == len(data)
assert got_meta["csize"] < len(data)
assert got_meta["custom"] == "something"
assert data == got_data
edata = repo_objs.extract_crypted_data(cdata)
compressor = repo_objs.compressor
key = repo_objs.key
assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level)))
def test_format_parse_roundtrip_borg1(key): # legacy
repo_objs = RepoObj1(key)
data = b"foobar" * 10
id = repo_objs.id_hash(data)
meta = {} # borg1 does not support this kind of metadata
cdata = repo_objs.format(id, meta, data)
# borg1 does not support separate metadata and borg2 does not invoke parse_meta for borg1 repos
got_meta, got_data = repo_objs.parse(id, cdata)
assert got_meta["size"] == len(data)
assert got_meta["csize"] < len(data)
assert data == got_data
edata = repo_objs.extract_crypted_data(cdata)
compressor = repo_objs.compressor
key = repo_objs.key
assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level)))