From 1b6f9289174d42ddf6fe340d7ff5a1f02557aef3 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Fri, 15 Sep 2023 22:19:29 +0200
Subject: [PATCH 1/8] ro_type: typed repo objects, see #7670

writing: put type into repoobj metadata
reading: check wanted type against type we got

repoobj metadata is encrypted and authenticated.
repoobj data is encrypted and authenticated, also (separately).
encryption and decryption of both metadata and data get the
same "chunk ID" as AAD, so both are "bound" to that (same) ID.

a repo-side attacker can neither see cleartext metadata/data,
nor successfully tamper with it (AEAD decryption would fail).

also, a repo-side attacker could not replace a repoobj A with a
differently typed repoobj B without borg noticing:
- the metadata/data is cryptographically bound to its ID.
  authentication/decryption would fail on mismatch.
- the type check would fail.

thus, the problem (see CVEs in changelog) solved in borg 1 by the
manifest and archive TAMs is now already solved by the type check.
---
 src/borg/archive.py                          | 59 +++++++++++---------
 src/borg/archiver/debug_cmd.py               | 20 ++++---
 src/borg/archiver/rcompress_cmd.py           |  7 ++-
 src/borg/archiver/tar_cmds.py                |  4 +-
 src/borg/archiver/transfer_cmd.py            | 18 +++++-
 src/borg/cache.py                            | 27 +++++++--
 src/borg/constants.py                        |  8 +++
 src/borg/fuse.py                             |  3 +-
 src/borg/helpers/misc.py                     |  3 +-
 src/borg/helpers/parseformat.py              |  2 +-
 src/borg/manifest.py                         |  4 +-
 src/borg/remote.py                           |  2 +-
 src/borg/repoobj.py                          | 18 +++++-
 src/borg/testsuite/archive.py                |  3 +-
 src/borg/testsuite/archiver/check_cmd.py     |  2 +-
 src/borg/testsuite/archiver/checks.py        |  4 +-
 src/borg/testsuite/archiver/rcompress_cmd.py |  4 +-
 src/borg/testsuite/remote.py                 |  3 +-
 src/borg/testsuite/repoobj.py                | 52 +++++++++++++----
 19 files changed, 171 insertions(+), 72 deletions(-)

diff --git a/src/borg/archive.py b/src/borg/archive.py
index 8d85724c..8db9e28c 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -296,7 +296,7 @@ class DownloadPipeline:
         """
         hlids_preloaded = set()
         unpacker = msgpack.Unpacker(use_list=False)
-        for data in self.fetch_many(ids):
+        for data in self.fetch_many(ids, ro_type=ROBJ_ARCHIVE_STREAM):
             unpacker.feed(data)
             for _item in unpacker:
                 item = Item(internal_dict=_item)
@@ -318,9 +318,10 @@ class DownloadPipeline:
                         self.repository.preload([c.id for c in item.chunks])
                 yield item
 
-    def fetch_many(self, ids, is_preloaded=False):
+    def fetch_many(self, ids, is_preloaded=False, ro_type=None):
+        assert ro_type is not None
         for id_, cdata in zip(ids, self.repository.get_many(ids, is_preloaded=is_preloaded)):
-            _, data = self.repo_objs.parse(id_, cdata)
+            _, data = self.repo_objs.parse(id_, cdata, ro_type=ro_type)
             yield data
 
 
@@ -393,7 +394,9 @@ class CacheChunkBuffer(ChunkBuffer):
         self.stats = stats
 
     def write_chunk(self, chunk):
-        id_, _ = self.cache.add_chunk(self.key.id_hash(chunk), {}, chunk, stats=self.stats, wait=False)
+        id_, _ = self.cache.add_chunk(
+            self.key.id_hash(chunk), {}, chunk, stats=self.stats, wait=False, ro_type=ROBJ_ARCHIVE_STREAM
+        )
         logger.debug(f"writing item metadata stream chunk {bin_to_hex(id_)}")
         self.cache.repository.async_response(wait=False)
         return id_
@@ -422,7 +425,7 @@ def archive_get_items(metadata, *, repo_objs, repository):
         assert "items" not in metadata
         items = []
         for id, cdata in zip(metadata.item_ptrs, repository.get_many(metadata.item_ptrs)):
-            _, data = repo_objs.parse(id, cdata)
+            _, data = repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_CHUNKIDS)
             ids = msgpack.unpackb(data)
             items.extend(ids)
         return items
@@ -440,9 +443,9 @@ def archive_put_items(chunk_ids, *, repo_objs, cache=None, stats=None, add_refer
         id = repo_objs.id_hash(data)
         logger.debug(f"writing item_ptrs chunk {bin_to_hex(id)}")
         if cache is not None and stats is not None:
-            cache.add_chunk(id, {}, data, stats=stats)
+            cache.add_chunk(id, {}, data, stats=stats, ro_type=ROBJ_ARCHIVE_CHUNKIDS)
         elif add_reference is not None:
-            cdata = repo_objs.format(id, {}, data)
+            cdata = repo_objs.format(id, {}, data, ro_type=ROBJ_ARCHIVE_CHUNKIDS)
             add_reference(id, len(data), cdata)
         else:
             raise NotImplementedError
@@ -531,7 +534,7 @@ class Archive:
 
     def _load_meta(self, id):
         cdata = self.repository.get(id)
-        _, data = self.repo_objs.parse(id, cdata)
+        _, data = self.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)
         archive, _ = self.key.unpack_and_verify_archive(data)
         metadata = ArchiveItem(internal_dict=archive)
         if metadata.version not in (1, 2):  # legacy: still need to read v1 archives
@@ -702,7 +705,7 @@ Duration: {0.duration}
         data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive")
         self.id = self.repo_objs.id_hash(data)
         try:
-            self.cache.add_chunk(self.id, {}, data, stats=self.stats)
+            self.cache.add_chunk(self.id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
         except IntegrityError as err:
             err_msg = str(err)
             # hack to avoid changing the RPC protocol by introducing new (more specific) exception class
@@ -740,7 +743,7 @@ Duration: {0.duration}
             for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
                 pi.show(increase=1)
                 add(id)
-                _, data = self.repo_objs.parse(id, chunk)
+                _, data = self.repo_objs.parse(id, chunk, ro_type=ROBJ_ARCHIVE_STREAM)
                 sync.feed(data)
             unique_size = archive_index.stats_against(cache.chunks)[1]
             pi.finish()
@@ -826,7 +829,9 @@ Duration: {0.duration}
                     # it would get stuck.
                     if "chunks" in item:
                         item_chunks_size = 0
-                        for data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
+                        for data in self.pipeline.fetch_many(
+                            [c.id for c in item.chunks], is_preloaded=True, ro_type=ROBJ_FILE_STREAM
+                        ):
                             if pi:
                                 pi.show(increase=len(data), info=[remove_surrogates(item.path)])
                             if stdout:
@@ -878,7 +883,7 @@ Duration: {0.duration}
                     fd = open(path, "wb")
                 with fd:
                     ids = [c.id for c in item.chunks]
-                    for data in self.pipeline.fetch_many(ids, is_preloaded=True):
+                    for data in self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM):
                         if pi:
                             pi.show(increase=len(data), info=[remove_surrogates(item.path)])
                         with backup_io("write"):
@@ -1027,7 +1032,7 @@ Duration: {0.duration}
             del metadata.items
         data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive")
         new_id = self.key.id_hash(data)
-        self.cache.add_chunk(new_id, {}, data, stats=self.stats)
+        self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
         self.manifest.archives[self.name] = (new_id, metadata.time)
         self.cache.chunk_decref(self.id, self.stats)
         self.id = new_id
@@ -1076,7 +1081,7 @@ Duration: {0.duration}
             for i, (items_id, data) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
                 if progress:
                     pi.show(i)
-                _, data = self.repo_objs.parse(items_id, data)
+                _, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM)
                 unpacker.feed(data)
                 chunk_decref(items_id, stats)
                 try:
@@ -1132,8 +1137,8 @@ Duration: {0.duration}
                 path,
                 item1,
                 item2,
-                archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
-                archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
+                archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])], ro_type=ROBJ_FILE_STREAM),
+                archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])], ro_type=ROBJ_FILE_STREAM),
                 can_compare_chunk_ids=can_compare_chunk_ids,
             )
 
@@ -1319,7 +1324,7 @@ class ChunksProcessor:
                 started_hashing = time.monotonic()
                 chunk_id, data = cached_hash(chunk, self.key.id_hash)
                 stats.hashing_time += time.monotonic() - started_hashing
-                chunk_entry = cache.add_chunk(chunk_id, {}, data, stats=stats, wait=False)
+                chunk_entry = cache.add_chunk(chunk_id, {}, data, stats=stats, wait=False, ro_type=ROBJ_FILE_STREAM)
                 self.cache.repository.async_response(wait=False)
                 return chunk_entry
 
@@ -1898,7 +1903,7 @@ class ArchiveChecker:
                 else:
                     try:
                         # we must decompress, so it'll call assert_id() in there:
-                        self.repo_objs.parse(chunk_id, encrypted_data, decompress=True)
+                        self.repo_objs.parse(chunk_id, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE)
                     except IntegrityErrorBase as integrity_error:
                         self.error_found = True
                         errors += 1
@@ -1930,7 +1935,7 @@ class ArchiveChecker:
                     try:
                         encrypted_data = self.repository.get(defect_chunk)
                         # we must decompress, so it'll call assert_id() in there:
-                        self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True)
+                        self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE)
                     except IntegrityErrorBase:
                         # failed twice -> get rid of this chunk
                         del self.chunks[defect_chunk]
@@ -1978,7 +1983,7 @@ class ArchiveChecker:
             pi.show()
             cdata = self.repository.get(chunk_id)
             try:
-                _, data = self.repo_objs.parse(chunk_id, cdata)
+                _, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
             except IntegrityErrorBase as exc:
                 logger.error("Skipping corrupted chunk: %s", exc)
                 self.error_found = True
@@ -2043,7 +2048,7 @@ class ArchiveChecker:
 
         def add_callback(chunk):
             id_ = self.key.id_hash(chunk)
-            cdata = self.repo_objs.format(id_, {}, chunk)
+            cdata = self.repo_objs.format(id_, {}, chunk, ro_type=ROBJ_ARCHIVE_STREAM)
             add_reference(id_, len(chunk), cdata)
             return id_
 
@@ -2066,7 +2071,7 @@ class ArchiveChecker:
             def replacement_chunk(size):
                 chunk = Chunk(None, allocation=CH_ALLOC, size=size)
                 chunk_id, data = cached_hash(chunk, self.key.id_hash)
-                cdata = self.repo_objs.format(chunk_id, {}, data)
+                cdata = self.repo_objs.format(chunk_id, {}, data, ro_type=ROBJ_FILE_STREAM)
                 return chunk_id, size, cdata
 
             offset = 0
@@ -2197,7 +2202,7 @@ class ArchiveChecker:
                     unpacker.resync()
                 for chunk_id, cdata in zip(items, repository.get_many(items)):
                     try:
-                        _, data = self.repo_objs.parse(chunk_id, cdata)
+                        _, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_ARCHIVE_STREAM)
                         unpacker.feed(data)
                         for item in unpacker:
                             valid, reason = valid_item(item)
@@ -2260,7 +2265,7 @@ class ArchiveChecker:
                 mark_as_possibly_superseded(archive_id)
                 cdata = self.repository.get(archive_id)
                 try:
-                    _, data = self.repo_objs.parse(archive_id, cdata)
+                    _, data = self.repo_objs.parse(archive_id, cdata, ro_type=ROBJ_ARCHIVE_META)
                 except IntegrityError as integrity_error:
                     logger.error("Archive metadata block %s is corrupted: %s", bin_to_hex(archive_id), integrity_error)
                     self.error_found = True
@@ -2298,7 +2303,7 @@ class ArchiveChecker:
                 )
                 data = self.key.pack_and_authenticate_metadata(archive.as_dict(), context=b"archive", salt=salt)
                 new_archive_id = self.key.id_hash(data)
-                cdata = self.repo_objs.format(new_archive_id, {}, data)
+                cdata = self.repo_objs.format(new_archive_id, {}, data, ro_type=ROBJ_ARCHIVE_META)
                 add_reference(new_archive_id, len(data), cdata)
                 self.manifest.archives[info.name] = (new_archive_id, info.ts)
             pi.finish()
@@ -2434,13 +2439,13 @@ class ArchiveRecreater:
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         if chunk_id in self.seen_chunks:
             return self.cache.chunk_incref(chunk_id, target.stats)
-        chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False)
+        chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
         self.cache.repository.async_response(wait=False)
         self.seen_chunks.add(chunk_entry.id)
         return chunk_entry
 
     def iter_chunks(self, archive, target, chunks):
-        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _ in chunks])
+        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _ in chunks], ro_type=ROBJ_FILE_STREAM)
         if target.recreate_rechunkify:
             # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
             # (does not load the entire file into memory)
diff --git a/src/borg/archiver/debug_cmd.py b/src/borg/archiver/debug_cmd.py
index 6c0d1892..cab8075c 100644
--- a/src/borg/archiver/debug_cmd.py
+++ b/src/borg/archiver/debug_cmd.py
@@ -35,7 +35,7 @@ class DebugMixIn:
         repo_objs = manifest.repo_objs
         archive = Archive(manifest, args.name)
         for i, item_id in enumerate(archive.metadata.items):
-            _, data = repo_objs.parse(item_id, repository.get(item_id))
+            _, data = repo_objs.parse(item_id, repository.get(item_id), ro_type=ROBJ_ARCHIVE_STREAM)
             filename = "%06d_%s.items" % (i, bin_to_hex(item_id))
             print("Dumping", filename)
             with open(filename, "wb") as fd:
@@ -65,7 +65,8 @@ class DebugMixIn:
             fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
             fd.write(",\n")
 
-            _, data = repo_objs.parse(archive_meta_orig["id"], repository.get(archive_meta_orig["id"]))
+            archive_id = archive_meta_orig["id"]
+            _, data = repo_objs.parse(archive_id, repository.get(archive_id), ro_type=ROBJ_ARCHIVE_META)
             archive_org_dict = msgpack.unpackb(data, object_hook=StableDict)
 
             fd.write('    "_meta":\n')
@@ -77,10 +78,10 @@ class DebugMixIn:
             first = True
             items = []
             for chunk_id in archive_org_dict["item_ptrs"]:
-                _, data = repo_objs.parse(chunk_id, repository.get(chunk_id))
+                _, data = repo_objs.parse(chunk_id, repository.get(chunk_id), ro_type=ROBJ_ARCHIVE_CHUNKIDS)
                 items.extend(msgpack.unpackb(data))
             for item_id in items:
-                _, data = repo_objs.parse(item_id, repository.get(item_id))
+                _, data = repo_objs.parse(item_id, repository.get(item_id), ro_type=ROBJ_ARCHIVE_STREAM)
                 unpacker.feed(data)
                 for item in unpacker:
                     item = prepare_dump_dict(item)
@@ -101,7 +102,7 @@ class DebugMixIn:
     def do_debug_dump_manifest(self, args, repository, manifest):
         """dump decoded repository manifest"""
         repo_objs = manifest.repo_objs
-        _, data = repo_objs.parse(manifest.MANIFEST_ID, repository.get(manifest.MANIFEST_ID))
+        _, data = repo_objs.parse(manifest.MANIFEST_ID, repository.get(manifest.MANIFEST_ID), ro_type=ROBJ_MANIFEST)
 
         meta = prepare_dump_dict(msgpack.unpackb(data, object_hook=StableDict))
 
@@ -116,7 +117,7 @@ class DebugMixIn:
 
         def decrypt_dump(i, id, cdata, tag=None, segment=None, offset=None):
             if cdata is not None:
-                _, data = repo_objs.parse(id, cdata)
+                _, data = repo_objs.parse(id, cdata, ro_type=ROBJ_DONTCARE)
             else:
                 _, data = {}, b""
             tag_str = "" if tag is None else "_" + tag
@@ -211,7 +212,7 @@ class DebugMixIn:
                 break
             for id in ids:
                 cdata = repository.get(id)
-                _, data = repo_objs.parse(id, cdata)
+                _, data = repo_objs.parse(id, cdata, ro_type=ROBJ_DONTCARE)
 
                 # try to locate wanted sequence crossing the border of last_data and data
                 boundary_data = last_data[-(len(wanted) - 1) :] + data[: len(wanted) - 1]
@@ -284,7 +285,7 @@ class DebugMixIn:
             cdata = f.read()
 
         repo_objs = manifest.repo_objs
-        meta, data = repo_objs.parse(id=id, cdata=cdata)
+        meta, data = repo_objs.parse(id=id, cdata=cdata, ro_type=ROBJ_DONTCARE)
 
         with open(args.json_path, "w") as f:
             json.dump(meta, f)
@@ -315,7 +316,8 @@ class DebugMixIn:
             meta = json.load(f)
 
         repo_objs = manifest.repo_objs
-        data_encrypted = repo_objs.format(id=id, meta=meta, data=data)
+        # TODO: support misc repo object types other than ROBJ_FILE_STREAM
+        data_encrypted = repo_objs.format(id=id, meta=meta, data=data, ro_type=ROBJ_FILE_STREAM)
 
         with open(args.object_path, "wb") as f:
             f.write(data_encrypted)
diff --git a/src/borg/archiver/rcompress_cmd.py b/src/borg/archiver/rcompress_cmd.py
index 038fd70b..40d7571d 100644
--- a/src/borg/archiver/rcompress_cmd.py
+++ b/src/borg/archiver/rcompress_cmd.py
@@ -37,7 +37,7 @@ def find_chunks(repository, repo_objs, stats, ctype, clevel, olevel):
         if not chunk_ids:
             break
         for id, chunk_no_data in zip(chunk_ids, repository.get_many(chunk_ids, read_data=False)):
-            meta = repo_objs.parse_meta(id, chunk_no_data)
+            meta = repo_objs.parse_meta(id, chunk_no_data, ro_type=ROBJ_DONTCARE)
             compr_found = meta["ctype"], meta["clevel"], meta.get("olevel", -1)
             if compr_found != compr_wanted:
                 recompress_ids.append(id)
@@ -57,13 +57,14 @@ def process_chunks(repository, repo_objs, stats, recompress_ids, olevel):
     for id, chunk in zip(recompress_ids, repository.get_many(recompress_ids, read_data=True)):
         old_size = len(chunk)
         stats["old_size"] += old_size
-        meta, data = repo_objs.parse(id, chunk)
+        meta, data = repo_objs.parse(id, chunk, ro_type=ROBJ_DONTCARE)
+        ro_type = meta.pop("type", None)
         compr_old = meta["ctype"], meta["clevel"], meta.get("olevel", -1)
         if olevel == -1:
             # if the chunk was obfuscated, but should not be in future, remove related metadata
             meta.pop("olevel", None)
             meta.pop("psize", None)
-        chunk = repo_objs.format(id, meta, data)
+        chunk = repo_objs.format(id, meta, data, ro_type=ro_type)
         compr_done = meta["ctype"], meta["clevel"], meta.get("olevel", -1)
         if compr_done != compr_old:
             # we actually changed something
diff --git a/src/borg/archiver/tar_cmds.py b/src/borg/archiver/tar_cmds.py
index b18cb24d..0504f97e 100644
--- a/src/borg/archiver/tar_cmds.py
+++ b/src/borg/archiver/tar_cmds.py
@@ -115,7 +115,9 @@ class TarMixIn:
             """
             Return a file-like object that reads from the chunks of *item*.
             """
-            chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _ in item.chunks], is_preloaded=True)
+            chunk_iterator = archive.pipeline.fetch_many(
+                [chunk_id for chunk_id, _ in item.chunks], is_preloaded=True, ro_type=ROBJ_FILE_STREAM
+            )
             if pi:
                 info = [remove_surrogates(item.path)]
                 return ChunkIteratorFileWrapper(
diff --git a/src/borg/archiver/transfer_cmd.py b/src/borg/archiver/transfer_cmd.py
index 49529a6c..f820ef63 100644
--- a/src/borg/archiver/transfer_cmd.py
+++ b/src/borg/archiver/transfer_cmd.py
@@ -111,7 +111,11 @@ class TransferMixIn:
                                         # keep compressed payload same, verify via assert_id (that will
                                         # decompress, but avoid needing to compress it again):
                                         meta, data = other_manifest.repo_objs.parse(
-                                            chunk_id, cdata, decompress=True, want_compressed=True
+                                            chunk_id,
+                                            cdata,
+                                            decompress=True,
+                                            want_compressed=True,
+                                            ro_type=ROBJ_FILE_STREAM,
                                         )
                                         meta, data = upgrader.upgrade_compressed_chunk(meta, data)
                                         chunk_entry = cache.add_chunk(
@@ -124,12 +128,20 @@ class TransferMixIn:
                                             size=size,
                                             ctype=meta["ctype"],
                                             clevel=meta["clevel"],
+                                            ro_type=ROBJ_FILE_STREAM,
                                         )
                                     elif args.recompress == "always":
                                         # always decompress and re-compress file data chunks
-                                        meta, data = other_manifest.repo_objs.parse(chunk_id, cdata)
+                                        meta, data = other_manifest.repo_objs.parse(
+                                            chunk_id, cdata, ro_type=ROBJ_FILE_STREAM
+                                        )
                                         chunk_entry = cache.add_chunk(
-                                            chunk_id, meta, data, stats=archive.stats, wait=False
+                                            chunk_id,
+                                            meta,
+                                            data,
+                                            stats=archive.stats,
+                                            wait=False,
+                                            ro_type=ROBJ_FILE_STREAM,
                                         )
                                     else:
                                         raise ValueError(f"unsupported recompress mode: {args.recompress}")
diff --git a/src/borg/cache.py b/src/borg/cache.py
index 4f0032e1..fb99309a 100644
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -12,7 +12,7 @@ logger = create_logger()
 
 files_cache_logger = create_logger("borg.debug.files_cache")
 
-from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED
+from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .helpers import Location
 from .helpers import Error
@@ -939,7 +939,21 @@ class LocalCache(CacheStatsMixin):
         self.cache_config.ignored_features.update(repo_features - my_features)
         self.cache_config.mandatory_features.update(repo_features & my_features)
 
-    def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ctype=None, clevel=None):
+    def add_chunk(
+        self,
+        id,
+        meta,
+        data,
+        *,
+        stats,
+        wait=True,
+        compress=True,
+        size=None,
+        ctype=None,
+        clevel=None,
+        ro_type=ROBJ_FILE_STREAM,
+    ):
+        assert ro_type is not None
         if not self.txn_active:
             self.begin_txn()
         if size is None and compress:
@@ -949,7 +963,9 @@ class LocalCache(CacheStatsMixin):
             return self.chunk_incref(id, stats)
         if size is None:
             raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
-        cdata = self.repo_objs.format(id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel)
+        cdata = self.repo_objs.format(
+            id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type
+        )
         self.repository.put(id, cdata, wait=wait)
         self.chunks.add(id, 1, size)
         stats.update(size, not refcount)
@@ -1113,7 +1129,8 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
     def memorize_file(self, hashed_path, path_hash, st, ids):
         pass
 
-    def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None):
+    def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ro_type=ROBJ_FILE_STREAM):
+        assert ro_type is not None
         if not self._txn_active:
             self.begin_txn()
         if size is None and compress:
@@ -1123,7 +1140,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
         refcount = self.seen_chunk(id, size)
         if refcount:
             return self.chunk_incref(id, stats, size=size)
-        cdata = self.repo_objs.format(id, meta, data, compress=compress)
+        cdata = self.repo_objs.format(id, meta, data, compress=compress, ro_type=ro_type)
         self.repository.put(id, cdata, wait=wait)
         self.chunks.add(id, 1, size)
         stats.update(size, not refcount)
diff --git a/src/borg/constants.py b/src/borg/constants.py
index 54528b61..7f4cbc31 100644
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@@ -33,6 +33,14 @@ UMASK_DEFAULT = 0o077
 # forcing to 0o100XXX later
 STDIN_MODE_DEFAULT = 0o660
 
+# RepoObj types
+ROBJ_MANIFEST = "M"  # Manifest (directory of archives, other metadata) object
+ROBJ_ARCHIVE_META = "A"  # main archive metadata object
+ROBJ_ARCHIVE_CHUNKIDS = "C"  # objects with a list of archive metadata stream chunkids
+ROBJ_ARCHIVE_STREAM = "S"  # archive metadata stream chunk (containing items)
+ROBJ_FILE_STREAM = "F"  # file content stream chunk (containing user data)
+ROBJ_DONTCARE = "*"  # used to parse without type assertion (= accept any type)
+
 # in borg < 1.3, this has been defined like this:
 # 20 MiB minus 41 bytes for a PUT header (because the "size" field in the Repository includes
 # the header, and the total size was set to precisely 20 MiB for borg < 1.3).
diff --git a/src/borg/fuse.py b/src/borg/fuse.py
index 376020d9..92f14587 100644
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@@ -10,6 +10,7 @@ import time
 from collections import defaultdict
 from signal import SIGINT
 
+from .constants import ROBJ_FILE_STREAM
 from .fuse_impl import llfuse, has_pyfuse3
 
 
@@ -688,7 +689,7 @@ class FuseOperations(llfuse.Operations, FuseBackend):
                     # evict fully read chunk from cache
                     del self.data_cache[id]
             else:
-                _, data = self.repo_objs.parse(id, self.repository_uncached.get(id))
+                _, data = self.repo_objs.parse(id, self.repository_uncached.get(id), ro_type=ROBJ_FILE_STREAM)
                 if offset + n < len(data):
                     # chunk was only partially read, cache it
                     self.data_cache[id] = data
diff --git a/src/borg/helpers/misc.py b/src/borg/helpers/misc.py
index d844b763..1f687a0b 100644
--- a/src/borg/helpers/misc.py
+++ b/src/borg/helpers/misc.py
@@ -13,6 +13,7 @@ logger = create_logger()
 
 from . import msgpack
 from .. import __version__ as borg_version
+from ..constants import ROBJ_FILE_STREAM
 
 
 def sysinfo():
@@ -123,7 +124,7 @@ class ChunkIteratorFileWrapper:
 
 def open_item(archive, item):
     """Return file-like object for archived item (with chunks)."""
-    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks])
+    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks], ro_type=ROBJ_FILE_STREAM)
     return ChunkIteratorFileWrapper(chunk_iterator)
 
 
diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py
index b4a455a0..7f845604 100644
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@@ -933,7 +933,7 @@ class ItemFormatter(BaseFormatter):
             hash = self.xxh64()
         elif hash_function in self.hash_algorithms:
             hash = hashlib.new(hash_function)
-        for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]):
+        for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks], ro_type=ROBJ_FILE_STREAM):
             hash.update(data)
         return hash.hexdigest()
 
diff --git a/src/borg/manifest.py b/src/borg/manifest.py
index a9609884..047a2a4f 100644
--- a/src/borg/manifest.py
+++ b/src/borg/manifest.py
@@ -250,7 +250,7 @@ class Manifest:
         if not key:
             key = key_factory(repository, cdata, ro_cls=ro_cls)
         manifest = cls(key, repository, ro_cls=ro_cls)
-        _, data = manifest.repo_objs.parse(cls.MANIFEST_ID, cdata)
+        _, data = manifest.repo_objs.parse(cls.MANIFEST_ID, cdata, ro_type=ROBJ_MANIFEST)
         manifest_dict = key.unpack_and_verify_manifest(data)
         m = ManifestItem(internal_dict=manifest_dict)
         manifest.id = manifest.repo_objs.id_hash(data)
@@ -315,4 +315,4 @@ class Manifest:
         )
         data = self.key.pack_and_authenticate_metadata(manifest.as_dict())
         self.id = self.repo_objs.id_hash(data)
-        self.repository.put(self.MANIFEST_ID, self.repo_objs.format(self.MANIFEST_ID, {}, data))
+        self.repository.put(self.MANIFEST_ID, self.repo_objs.format(self.MANIFEST_ID, {}, data, ro_type=ROBJ_MANIFEST))
diff --git a/src/borg/remote.py b/src/borg/remote.py
index 40b8b62f..4a1551f0 100644
--- a/src/borg/remote.py
+++ b/src/borg/remote.py
@@ -1204,7 +1204,7 @@ def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None
             return csize, decrypted
 
         def transform(id_, data):
-            meta, decrypted = repo_objs.parse(id_, data)
+            meta, decrypted = repo_objs.parse(id_, data, ro_type=ROBJ_DONTCARE)
             csize = meta.get("csize", len(data))
             return csize, decrypted
 
diff --git a/src/borg/repoobj.py b/src/borg/repoobj.py
index 557d4a60..8514db4d 100644
--- a/src/borg/repoobj.py
+++ b/src/borg/repoobj.py
@@ -1,5 +1,6 @@
 from struct import Struct
 
+from .constants import *  # NOQA
 from .helpers import msgpack, workarounds
 from .compress import Compressor, LZ4_COMPRESSOR, get_compressor
 
@@ -35,7 +36,11 @@ class RepoObj:
         size: int = None,
         ctype: int = None,
         clevel: int = None,
+        ro_type: str = None,
     ) -> bytes:
+        assert isinstance(ro_type, str)
+        assert ro_type != ROBJ_DONTCARE
+        meta["type"] = ro_type
         assert isinstance(id, bytes)
         assert isinstance(meta, dict)
         assert isinstance(data, (bytes, memoryview))
@@ -58,11 +63,12 @@ class RepoObj:
         hdr = self.meta_len_hdr.pack(len(meta_encrypted))
         return hdr + meta_encrypted + data_encrypted
 
-    def parse_meta(self, id: bytes, cdata: bytes) -> dict:
+    def parse_meta(self, id: bytes, cdata: bytes, ro_type: str) -> dict:
         # when calling parse_meta, enough cdata needs to be supplied to contain completely the
         # meta_len_hdr and the encrypted, packed metadata. it is allowed to provide more cdata.
         assert isinstance(id, bytes)
         assert isinstance(cdata, bytes)
+        assert isinstance(ro_type, str)
         obj = memoryview(cdata)
         offs = self.meta_len_hdr.size
         hdr = obj[:offs]
@@ -71,10 +77,11 @@ class RepoObj:
         meta_encrypted = obj[offs : offs + len_meta_encrypted]
         meta_packed = self.key.decrypt(id, meta_encrypted)
         meta = msgpack.unpackb(meta_packed)
+        assert ro_type == ROBJ_DONTCARE or meta["type"] == ro_type
         return meta
 
     def parse(
-        self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False
+        self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False, ro_type: str = None
     ) -> tuple[dict, bytes]:
         """
         Parse a repo object into metadata and data (decrypt it, maybe decompress, maybe verify if the chunk plaintext
@@ -86,6 +93,7 @@ class RepoObj:
         - decompress=False, want_compressed=True: quick, not verifying. returns compressed data (caller wants to reuse).
         - decompress=False, want_compressed=False: invalid
         """
+        assert isinstance(ro_type, str)
         assert not (not decompress and not want_compressed), "invalid parameter combination!"
         assert isinstance(id, bytes)
         assert isinstance(cdata, bytes)
@@ -98,6 +106,7 @@ class RepoObj:
         offs += len_meta_encrypted
         meta_packed = self.key.decrypt(id, meta_encrypted)
         meta_compressed = msgpack.unpackb(meta_packed)  # means: before adding more metadata in decompress block
+        assert ro_type == ROBJ_DONTCARE or meta_compressed["type"] == ro_type
         data_encrypted = obj[offs:]
         data_compressed = self.key.decrypt(id, data_encrypted)  # does not include the type/level bytes
         if decompress:
@@ -142,10 +151,12 @@ class RepoObj1:  # legacy
         size: int = None,
         ctype: int = None,
         clevel: int = None,
+        ro_type: str = None,
     ) -> bytes:
         assert isinstance(id, bytes)
         assert meta == {}
         assert isinstance(data, (bytes, memoryview))
+        assert ro_type is not None
         assert compress or size is not None and ctype is not None and clevel is not None
         if compress:
             assert size is None or size == len(data)
@@ -160,11 +171,12 @@ class RepoObj1:  # legacy
         raise NotImplementedError("parse_meta is not available for RepoObj1")
 
     def parse(
-        self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False
+        self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False, ro_type: str = None
     ) -> tuple[dict, bytes]:
         assert not (not decompress and not want_compressed), "invalid parameter combination!"
         assert isinstance(id, bytes)
         assert isinstance(cdata, bytes)
+        assert ro_type is not None
         data_compressed = self.key.decrypt(id, cdata)
         compressor_cls, compression_level = Compressor.detect(data_compressed[:2])
         compressor = compressor_cls(level=compression_level, legacy_mode=True)
diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py
index cefd6293..361a8289 100644
--- a/src/borg/testsuite/archive.py
+++ b/src/borg/testsuite/archive.py
@@ -127,7 +127,8 @@ class MockCache:
         self.objects = {}
         self.repository = self.MockRepo()
 
-    def add_chunk(self, id, meta, data, stats=None, wait=True):
+    def add_chunk(self, id, meta, data, stats=None, wait=True, ro_type=None):
+        assert ro_type is not None
         self.objects[id] = data
         return id, len(data)
 
diff --git a/src/borg/testsuite/archiver/check_cmd.py b/src/borg/testsuite/archiver/check_cmd.py
index 5ddce343..01c313d5 100644
--- a/src/borg/testsuite/archiver/check_cmd.py
+++ b/src/borg/testsuite/archiver/check_cmd.py
@@ -243,7 +243,7 @@ def test_manifest_rebuild_duplicate_archive(archivers, request):
         }
         archive = repo_objs.key.pack_and_authenticate_metadata(archive_dict, context=b"archive")
         archive_id = repo_objs.id_hash(archive)
-        repository.put(archive_id, repo_objs.format(archive_id, {}, archive))
+        repository.put(archive_id, repo_objs.format(archive_id, {}, archive, ro_type=ROBJ_ARCHIVE_META))
         repository.commit(compact=False)
     cmd(archiver, "check", exit_code=1)
     cmd(archiver, "check", "--repair", exit_code=0)
diff --git a/src/borg/testsuite/archiver/checks.py b/src/borg/testsuite/archiver/checks.py
index f6311c8e..c3ed2720 100644
--- a/src/borg/testsuite/archiver/checks.py
+++ b/src/borg/testsuite/archiver/checks.py
@@ -337,6 +337,7 @@ def spoof_manifest(repository):
                     "timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(timespec="microseconds"),
                 }
             ),
+            ro_type=ROBJ_MANIFEST,
         )
         repository.put(Manifest.MANIFEST_ID, cdata)
         repository.commit(compact=False)
@@ -357,6 +358,7 @@ def test_fresh_init_tam_required(archiver):
                     "timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(timespec="microseconds"),
                 }
             ),
+            ro_type=ROBJ_MANIFEST,
         )
         repository.put(Manifest.MANIFEST_ID, cdata)
         repository.commit(compact=False)
@@ -397,7 +399,7 @@ def write_archive_without_tam(repository, archive_name):
         }
     )
     archive_id = manifest.repo_objs.id_hash(archive_data)
-    cdata = manifest.repo_objs.format(archive_id, {}, archive_data)
+    cdata = manifest.repo_objs.format(archive_id, {}, archive_data, ro_type=ROBJ_ARCHIVE_META)
     repository.put(archive_id, cdata)
     manifest.archives[archive_name] = (archive_id, datetime.now())
     manifest.write()
diff --git a/src/borg/testsuite/archiver/rcompress_cmd.py b/src/borg/testsuite/archiver/rcompress_cmd.py
index 3fa8a34b..dbae4483 100644
--- a/src/borg/testsuite/archiver/rcompress_cmd.py
+++ b/src/borg/testsuite/archiver/rcompress_cmd.py
@@ -22,7 +22,9 @@ def test_rcompress(archiver):
                     break
                 for id in ids:
                     chunk = repository.get(id, read_data=True)
-                    meta, data = manifest.repo_objs.parse(id, chunk)  # will also decompress according to metadata
+                    meta, data = manifest.repo_objs.parse(
+                        id, chunk, ro_type=ROBJ_DONTCARE
+                    )  # will also decompress according to metadata
                     m_olevel = meta.get("olevel", -1)
                     m_psize = meta.get("psize", -1)
                     print(
diff --git a/src/borg/testsuite/remote.py b/src/borg/testsuite/remote.py
index b5cb2e64..a2d84bce 100644
--- a/src/borg/testsuite/remote.py
+++ b/src/borg/testsuite/remote.py
@@ -6,6 +6,7 @@ from unittest.mock import patch
 
 import pytest
 
+from ..constants import ROBJ_FILE_STREAM
 from ..remote import SleepingBandwidthLimiter, RepositoryCache, cache_if_remote
 from ..repository import Repository
 from ..crypto.key import PlaintextKey
@@ -205,7 +206,7 @@ class TestRepositoryCache:
 
     def _put_encrypted_object(self, repo_objs, repository, data):
         id_ = repo_objs.id_hash(data)
-        repository.put(id_, repo_objs.format(id_, {}, data))
+        repository.put(id_, repo_objs.format(id_, {}, data, ro_type=ROBJ_FILE_STREAM))
         return id_
 
     @pytest.fixture
diff --git a/src/borg/testsuite/repoobj.py b/src/borg/testsuite/repoobj.py
index 5d11ad93..7f923f57 100644
--- a/src/borg/testsuite/repoobj.py
+++ b/src/borg/testsuite/repoobj.py
@@ -1,5 +1,6 @@
 import pytest
 
+from ..constants import ROBJ_FILE_STREAM, ROBJ_MANIFEST, ROBJ_ARCHIVE_META
 from ..crypto.key import PlaintextKey
 from ..repository import Repository
 from ..repoobj import RepoObj, RepoObj1
@@ -21,14 +22,14 @@ def test_format_parse_roundtrip(key):
     data = b"foobar" * 10
     id = repo_objs.id_hash(data)
     meta = {"custom": "something"}  # size and csize are computed automatically
-    cdata = repo_objs.format(id, meta, data)
+    cdata = repo_objs.format(id, meta, data, ro_type=ROBJ_FILE_STREAM)
 
-    got_meta = repo_objs.parse_meta(id, cdata)
+    got_meta = repo_objs.parse_meta(id, cdata, ro_type=ROBJ_FILE_STREAM)
     assert got_meta["size"] == len(data)
     assert got_meta["csize"] < len(data)
     assert got_meta["custom"] == "something"
 
-    got_meta, got_data = repo_objs.parse(id, cdata)
+    got_meta, got_data = repo_objs.parse(id, cdata, ro_type=ROBJ_FILE_STREAM)
     assert got_meta["size"] == len(data)
     assert got_meta["csize"] < len(data)
     assert got_meta["custom"] == "something"
@@ -44,11 +45,11 @@ def test_format_parse_roundtrip_borg1(key):  # legacy
     data = b"foobar" * 10
     id = repo_objs.id_hash(data)
     meta = {}  # borg1 does not support this kind of metadata
-    cdata = repo_objs.format(id, meta, data)
+    cdata = repo_objs.format(id, meta, data, ro_type=ROBJ_FILE_STREAM)
 
     # borg1 does not support separate metadata and borg2 does not invoke parse_meta for borg1 repos
 
-    got_meta, got_data = repo_objs.parse(id, cdata)
+    got_meta, got_data = repo_objs.parse(id, cdata, ro_type=ROBJ_FILE_STREAM)
     assert got_meta["size"] == len(data)
     assert got_meta["csize"] < len(data)
     assert data == got_data
@@ -67,9 +68,9 @@ def test_borg1_borg2_transition(key):
     len_data = len(data)
     repo_objs1 = RepoObj1(key)
     id = repo_objs1.id_hash(data)
-    borg1_cdata = repo_objs1.format(id, meta, data)
+    borg1_cdata = repo_objs1.format(id, meta, data, ro_type=ROBJ_FILE_STREAM)
     meta1, compr_data1 = repo_objs1.parse(
-        id, borg1_cdata, decompress=True, want_compressed=True
+        id, borg1_cdata, decompress=True, want_compressed=True, ro_type=ROBJ_FILE_STREAM
     )  # avoid re-compression
     # in borg 1, we can only get this metadata after decrypting the whole chunk (and we do not have "size" here):
     assert meta1["ctype"] == LZ4.ID  # default compression
@@ -80,18 +81,49 @@ def test_borg1_borg2_transition(key):
     # note: as we did not decompress, we do not have "size" and we need to get it from somewhere else.
     # here, we just use len_data. for borg transfer, we also know the size from another metadata source.
     borg2_cdata = repo_objs2.format(
-        id, dict(meta1), compr_data1[2:], compress=False, size=len_data, ctype=meta1["ctype"], clevel=meta1["clevel"]
+        id,
+        dict(meta1),
+        compr_data1[2:],
+        compress=False,
+        size=len_data,
+        ctype=meta1["ctype"],
+        clevel=meta1["clevel"],
+        ro_type=ROBJ_FILE_STREAM,
     )
-    meta2, data2 = repo_objs2.parse(id, borg2_cdata)
+    meta2, data2 = repo_objs2.parse(id, borg2_cdata, ro_type=ROBJ_FILE_STREAM)
     assert data2 == data
     assert meta2["ctype"] == LZ4.ID
     assert meta2["clevel"] == 0xFF
     assert meta2["csize"] == meta1["csize"] - 2  # borg2 does not store the type/level bytes there
     assert meta2["size"] == len_data
 
-    meta2 = repo_objs2.parse_meta(id, borg2_cdata)
+    meta2 = repo_objs2.parse_meta(id, borg2_cdata, ro_type=ROBJ_FILE_STREAM)
     # now, in borg 2, we have nice and separately decrypted metadata (no need to decrypt the whole chunk):
     assert meta2["ctype"] == LZ4.ID
     assert meta2["clevel"] == 0xFF
     assert meta2["csize"] == meta1["csize"] - 2  # borg2 does not store the type/level bytes there
     assert meta2["size"] == len_data
+
+
+def test_spoof_manifest(key):
+    repo_objs = RepoObj(key)
+    data = b"fake or malicious manifest data"  # file content could be provided by attacker.
+    id = repo_objs.id_hash(data)
+    # create a repo object containing user data (file content data).
+    cdata = repo_objs.format(id, {}, data, ro_type=ROBJ_FILE_STREAM)
+    # let's assume an attacker somehow managed to replace the manifest with that repo object.
+    # as borg always give the ro_type it wants to read, this should fail:
+    with pytest.raises(AssertionError):
+        repo_objs.parse(id, cdata, ro_type=ROBJ_MANIFEST)
+
+
+def test_spoof_archive(key):
+    repo_objs = RepoObj(key)
+    data = b"fake or malicious archive data"  # file content could be provided by attacker.
+    id = repo_objs.id_hash(data)
+    # create a repo object containing user data (file content data).
+    cdata = repo_objs.format(id, {}, data, ro_type=ROBJ_FILE_STREAM)
+    # let's assume an attacker somehow managed to replace an archive with that repo object.
+    # as borg always give the ro_type it wants to read, this should fail:
+    with pytest.raises(AssertionError):
+        repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)

From 6a68ad5cd6c05ef3493d3f3fd9575aa1047f77eb Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sat, 16 Sep 2023 01:26:10 +0200
Subject: [PATCH 2/8] remove archive TAMs

---
 src/borg/archive.py                      | 38 +++-------------
 src/borg/cache.py                        |  2 +-
 src/borg/crypto/key.py                   | 51 ++++------------------
 src/borg/helpers/parseformat.py          |  3 +-
 src/borg/testsuite/archiver/check_cmd.py |  2 +-
 src/borg/testsuite/archiver/checks.py    | 55 +-----------------------
 src/borg/testsuite/key.py                | 48 ++-------------------
 7 files changed, 22 insertions(+), 177 deletions(-)

diff --git a/src/borg/archive.py b/src/borg/archive.py
index 8db9e28c..02618678 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -535,7 +535,7 @@ class Archive:
     def _load_meta(self, id):
         cdata = self.repository.get(id)
         _, data = self.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)
-        archive, _ = self.key.unpack_and_verify_archive(data)
+        archive = self.key.unpack_archive(data)
         metadata = ArchiveItem(internal_dict=archive)
         if metadata.version not in (1, 2):  # legacy: still need to read v1 archives
             raise Exception("Unknown archive metadata version")
@@ -702,7 +702,7 @@ Duration: {0.duration}
         metadata.update({"size": stats.osize, "nfiles": stats.nfiles})
         metadata.update(additional_metadata or {})
         metadata = ArchiveItem(metadata)
-        data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive")
+        data = self.key.pack_metadata(metadata.as_dict())
         self.id = self.repo_objs.id_hash(data)
         try:
             self.cache.add_chunk(self.id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
@@ -1030,7 +1030,7 @@ Duration: {0.duration}
         setattr(metadata, key, value)
         if "items" in metadata:
             del metadata.items
-        data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive")
+        data = self.key.pack_metadata(metadata.as_dict())
         new_id = self.key.id_hash(data)
         self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
         self.manifest.archives[self.name] = (new_id, metadata.time)
@@ -1998,23 +1998,7 @@ class ArchiveChecker:
             except msgpack.UnpackException:
                 continue
             if valid_archive(archive):
-                # **after** doing the low-level checks and having a strong indication that we
-                # are likely looking at an archive item here, also check the TAM authentication:
-                try:
-                    archive, _ = self.key.unpack_and_verify_archive(data)
-                except IntegrityError as integrity_error:
-                    # TAM issues - do not accept this archive!
-                    # either somebody is trying to attack us with a fake archive data or
-                    # we have an ancient archive made before TAM was a thing (borg < 1.0.9) **and** this repo
-                    # was not correctly upgraded to borg 1.2.5 (see advisory at top of the changelog).
-                    # borg can't tell the difference, so it has to assume this archive might be an attack
-                    # and drops this archive.
-                    name = archive.get(b"name", b"<unknown>").decode("ascii", "replace")
-                    logger.error("Archive TAM authentication issue for archive %s: %s", name, integrity_error)
-                    logger.error("This archive will *not* be added to the rebuilt manifest! It will be deleted.")
-                    self.error_found = True
-                    continue
-                # note: if we get here and verified is False, a TAM is not required.
+                archive = self.key.unpack_archive(data)
                 archive = ArchiveItem(internal_dict=archive)
                 name = archive.name
                 logger.info("Found archive %s", name)
@@ -2271,17 +2255,7 @@ class ArchiveChecker:
                     self.error_found = True
                     del self.manifest.archives[info.name]
                     continue
-                try:
-                    archive, salt = self.key.unpack_and_verify_archive(data)
-                except IntegrityError as integrity_error:
-                    # looks like there is a TAM issue with this archive, this might be an attack!
-                    # when upgrading to borg 1.2.5, users are expected to TAM-authenticate all archives they
-                    # trust, so there shouldn't be any without TAM.
-                    logger.error("Archive TAM authentication issue for archive %s: %s", info.name, integrity_error)
-                    logger.error("This archive will be *removed* from the manifest! It will be deleted.")
-                    self.error_found = True
-                    del self.manifest.archives[info.name]
-                    continue
+                archive = self.key.unpack_archive(data)
                 archive = ArchiveItem(internal_dict=archive)
                 if archive.version != 2:
                     raise Exception("Unknown archive metadata version")
@@ -2301,7 +2275,7 @@ class ArchiveChecker:
                 archive.item_ptrs = archive_put_items(
                     items_buffer.chunks, repo_objs=self.repo_objs, add_reference=add_reference
                 )
-                data = self.key.pack_and_authenticate_metadata(archive.as_dict(), context=b"archive", salt=salt)
+                data = self.key.pack_metadata(archive.as_dict())
                 new_archive_id = self.key.id_hash(data)
                 cdata = self.repo_objs.format(new_archive_id, {}, data, ro_type=ROBJ_ARCHIVE_META)
                 add_reference(new_archive_id, len(data), cdata)
diff --git a/src/borg/cache.py b/src/borg/cache.py
index fb99309a..c18f315a 100644
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -755,7 +755,7 @@ class LocalCache(CacheStatsMixin):
             nonlocal processed_item_metadata_chunks
             csize, data = decrypted_repository.get(archive_id)
             chunk_idx.add(archive_id, 1, len(data))
-            archive, _ = self.key.unpack_and_verify_archive(data)
+            archive = self.key.unpack_archive(data)
             archive = ArchiveItem(internal_dict=archive)
             if archive.version not in (1, 2):  # legacy
                 raise Exception("Unknown archive metadata version")
diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py
index ef9f86ef..42bd3c58 100644
--- a/src/borg/crypto/key.py
+++ b/src/borg/crypto/key.py
@@ -72,15 +72,6 @@ class TAMRequiredError(IntegrityError):
     traceback = False
 
 
-class ArchiveTAMRequiredError(TAMRequiredError):
-    __doc__ = textwrap.dedent(
-        """
-    Archive '{}' is unauthenticated, but it is required for this repository.
-    """
-    ).strip()
-    traceback = False
-
-
 class TAMInvalid(IntegrityError):
     __doc__ = IntegrityError.__doc__
     traceback = False
@@ -90,15 +81,6 @@ class TAMInvalid(IntegrityError):
         super().__init__("Manifest authentication did not verify")
 
 
-class ArchiveTAMInvalid(IntegrityError):
-    __doc__ = IntegrityError.__doc__
-    traceback = False
-
-    def __init__(self):
-        # Error message becomes: "Data integrity error: Archive authentication did not verify"
-        super().__init__("Archive authentication did not verify")
-
-
 class TAMUnsupportedSuiteError(IntegrityError):
     """Could not verify manifest: Unsupported suite {!r}; a newer version is needed."""
 
@@ -242,6 +224,10 @@ class KeyBase:
         tam["hmac"] = hmac.digest(tam_key, packed, "sha512")
         return msgpack.packb(metadata_dict)
 
+    def pack_metadata(self, metadata_dict):
+        metadata_dict = StableDict(metadata_dict)
+        return msgpack.packb(metadata_dict)
+
     def unpack_and_verify_manifest(self, data):
         """Unpack msgpacked *data* and return manifest."""
         if data.startswith(b"\xc1" * 4):
@@ -276,35 +262,14 @@ class KeyBase:
         logger.debug("TAM-verified manifest")
         return unpacked
 
-    def unpack_and_verify_archive(self, data):
-        """Unpack msgpacked *data* and return (object, salt)."""
+    def unpack_archive(self, data):
+        """Unpack msgpacked *data* and return archive metadata dict."""
         data = bytearray(data)
         unpacker = get_limited_unpacker("archive")
         unpacker.feed(data)
         unpacked = unpacker.unpack()
-        if "tam" not in unpacked:
-            archive_name = unpacked.get("name", "<unknown>")
-            raise ArchiveTAMRequiredError(archive_name)
-        tam = unpacked.pop("tam", None)
-        if not isinstance(tam, dict):
-            raise ArchiveTAMInvalid()
-        tam_type = tam.get("type", "<none>")
-        if tam_type != "HKDF_HMAC_SHA512":
-            raise TAMUnsupportedSuiteError(repr(tam_type))
-        tam_hmac = tam.get("hmac")
-        tam_salt = tam.get("salt")
-        if not isinstance(tam_salt, (bytes, str)) or not isinstance(tam_hmac, (bytes, str)):
-            raise ArchiveTAMInvalid()
-        tam_hmac = want_bytes(tam_hmac)  # legacy
-        tam_salt = want_bytes(tam_salt)  # legacy
-        offset = data.index(tam_hmac)
-        data[offset : offset + 64] = bytes(64)
-        tam_key = self._tam_key(tam_salt, context=b"archive")
-        calculated_hmac = hmac.digest(tam_key, data, "sha512")
-        if not hmac.compare_digest(calculated_hmac, tam_hmac):
-            raise ArchiveTAMInvalid()
-        logger.debug("TAM-verified archive")
-        return unpacked, tam_salt
+        unpacked.pop("tam", None)  # legacy
+        return unpacked
 
 
 class PlaintextKey(KeyBase):
diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py
index 7f845604..4b0caf6c 100644
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@@ -723,12 +723,11 @@ class ArchiveFormatter(BaseFormatter):
         "id": "internal ID of the archive",
         "hostname": "hostname of host on which this archive was created",
         "username": "username of user who created this archive",
-        "tam": "TAM authentication state of this archive",
         "size": "size of this archive (data plus metadata, not considering compression and deduplication)",
         "nfiles": "count of files in this archive",
     }
     KEY_GROUPS = (
-        ("archive", "name", "comment", "id", "tam"),
+        ("archive", "name", "comment", "id"),
         ("start", "time", "end", "command_line"),
         ("hostname", "username"),
         ("size", "nfiles"),
diff --git a/src/borg/testsuite/archiver/check_cmd.py b/src/borg/testsuite/archiver/check_cmd.py
index 01c313d5..a932cbae 100644
--- a/src/borg/testsuite/archiver/check_cmd.py
+++ b/src/borg/testsuite/archiver/check_cmd.py
@@ -241,7 +241,7 @@ def test_manifest_rebuild_duplicate_archive(archivers, request):
             "time": "2016-12-15T18:49:51.849711",
             "version": 2,
         }
-        archive = repo_objs.key.pack_and_authenticate_metadata(archive_dict, context=b"archive")
+        archive = repo_objs.key.pack_metadata(archive_dict)
         archive_id = repo_objs.id_hash(archive)
         repository.put(archive_id, repo_objs.format(archive_id, {}, archive, ro_type=ROBJ_ARCHIVE_META))
         repository.commit(compact=False)
diff --git a/src/borg/testsuite/archiver/checks.py b/src/borg/testsuite/archiver/checks.py
index c3ed2720..719ff2ac 100644
--- a/src/borg/testsuite/archiver/checks.py
+++ b/src/borg/testsuite/archiver/checks.py
@@ -8,7 +8,7 @@ import pytest
 from ...cache import Cache, LocalCache
 from ...constants import *  # NOQA
 from ...crypto.key import TAMRequiredError
-from ...helpers import Location, get_security_dir, bin_to_hex, archive_ts_now
+from ...helpers import Location, get_security_dir, bin_to_hex
 from ...helpers import EXIT_ERROR
 from ...helpers import msgpack
 from ...manifest import Manifest, MandatoryFeatureUnsupported
@@ -382,59 +382,6 @@ def test_not_required(archiver):
         cmd(archiver, "rlist")
 
 
-#  Begin archive TAM tests
-def write_archive_without_tam(repository, archive_name):
-    manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-    archive_data = msgpack.packb(
-        {
-            "version": 2,
-            "name": archive_name,
-            "item_ptrs": [],
-            "command_line": "",
-            "hostname": "",
-            "username": "",
-            "time": archive_ts_now().isoformat(timespec="microseconds"),
-            "size": 0,
-            "nfiles": 0,
-        }
-    )
-    archive_id = manifest.repo_objs.id_hash(archive_data)
-    cdata = manifest.repo_objs.format(archive_id, {}, archive_data, ro_type=ROBJ_ARCHIVE_META)
-    repository.put(archive_id, cdata)
-    manifest.archives[archive_name] = (archive_id, datetime.now())
-    manifest.write()
-    repository.commit(compact=False)
-
-
-def test_check_rebuild_manifest(archiver):
-    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    create_src_archive(archiver, "archive_tam")
-    repository = Repository(archiver.repository_path, exclusive=True)
-    with repository:
-        write_archive_without_tam(repository, "archive_no_tam")
-        repository.delete(Manifest.MANIFEST_ID)  # kill manifest, so check has to rebuild it
-        repository.commit(compact=False)
-    cmd(archiver, "check", "--repair")
-    output = cmd(archiver, "rlist", "--format='{name}{NL}'")
-    assert "archive_tam" in output  # TAM-verified archive is in rebuilt manifest
-    assert "archive_no_tam" not in output  # check got rid of untrusted not TAM-verified archive
-
-
-def test_check_rebuild_refcounts(archiver):
-    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    create_src_archive(archiver, "archive_tam")
-    archive_id_pre_check = cmd(archiver, "rlist", "--format='{name} {id}{NL}'")
-    repository = Repository(archiver.repository_path, exclusive=True)
-    with repository:
-        write_archive_without_tam(repository, "archive_no_tam")
-    cmd(archiver, "check", "--repair")
-    output = cmd(archiver, "rlist", "--format='{name}{NL}'")
-    assert "archive_tam" in output  # TAM-verified archive still there
-    assert "archive_no_tam" not in output  # check got rid of untrusted not TAM-verified archive
-    archive_id_post_check = cmd(archiver, "rlist", "--format='{name} {id}{NL}'")
-    assert archive_id_post_check == archive_id_pre_check  # rebuild_refcounts didn't change archive_tam archive id
-
-
 # Begin Remote Tests
 def test_remote_repo_restrict_to_path(remote_archiver):
     original_location, repo_path = remote_archiver.repository_location, remote_archiver.repository_path
diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py
index 749c792e..c862484e 100644
--- a/src/borg/testsuite/key.py
+++ b/src/borg/testsuite/key.py
@@ -11,7 +11,7 @@ from ..crypto.key import AEADKeyBase
 from ..crypto.key import AESOCBRepoKey, AESOCBKeyfileKey, CHPORepoKey, CHPOKeyfileKey
 from ..crypto.key import Blake2AESOCBRepoKey, Blake2AESOCBKeyfileKey, Blake2CHPORepoKey, Blake2CHPOKeyfileKey
 from ..crypto.key import ID_HMAC_SHA_256, ID_BLAKE2b_256
-from ..crypto.key import TAMRequiredError, TAMInvalid, TAMUnsupportedSuiteError, ArchiveTAMInvalid
+from ..crypto.key import TAMRequiredError, TAMInvalid, TAMUnsupportedSuiteError
 from ..crypto.key import UnsupportedManifestError, UnsupportedKeyFormatError
 from ..crypto.key import identify_key
 from ..crypto.low_level import IntegrityError as IntegrityErrorBase
@@ -276,15 +276,11 @@ class TestTAM:
         blob = msgpack.packb({})
         with pytest.raises(TAMRequiredError):
             key.unpack_and_verify_manifest(blob)
-        with pytest.raises(TAMRequiredError):
-            key.unpack_and_verify_archive(blob)
 
     def test_unknown_type(self, key):
         blob = msgpack.packb({"tam": {"type": "HMAC_VOLLBIT"}})
         with pytest.raises(TAMUnsupportedSuiteError):
             key.unpack_and_verify_manifest(blob)
-        with pytest.raises(TAMUnsupportedSuiteError):
-            key.unpack_and_verify_archive(blob)
 
     @pytest.mark.parametrize(
         "tam, exc",
@@ -300,20 +296,6 @@ class TestTAM:
         with pytest.raises(exc):
             key.unpack_and_verify_manifest(blob)
 
-    @pytest.mark.parametrize(
-        "tam, exc",
-        (
-            ({}, TAMUnsupportedSuiteError),
-            ({"type": b"\xff"}, TAMUnsupportedSuiteError),
-            (None, ArchiveTAMInvalid),
-            (1234, ArchiveTAMInvalid),
-        ),
-    )
-    def test_invalid_archive(self, key, tam, exc):
-        blob = msgpack.packb({"tam": tam})
-        with pytest.raises(exc):
-            key.unpack_and_verify_archive(blob)
-
     @pytest.mark.parametrize(
         "hmac, salt",
         (({}, bytes(64)), (bytes(64), {}), (None, bytes(64)), (bytes(64), None)),
@@ -329,8 +311,6 @@ class TestTAM:
         blob = msgpack.packb(data)
         with pytest.raises(TAMInvalid):
             key.unpack_and_verify_manifest(blob)
-        with pytest.raises(ArchiveTAMInvalid):
-            key.unpack_and_verify_archive(blob)
 
     def test_round_trip_manifest(self, key):
         data = {"foo": "bar"}
@@ -346,15 +326,10 @@ class TestTAM:
 
     def test_round_trip_archive(self, key):
         data = {"foo": "bar"}
-        blob = key.pack_and_authenticate_metadata(data, context=b"archive")
-        assert blob.startswith(b"\x82")
-
-        unpacked = msgpack.unpackb(blob)
-        assert unpacked["tam"]["type"] == "HKDF_HMAC_SHA512"
-
-        unpacked, _ = key.unpack_and_verify_archive(blob)
+        blob = key.pack_metadata(data)
+        unpacked = key.unpack_archive(blob)
         assert unpacked["foo"] == "bar"
-        assert "tam" not in unpacked
+        assert "tam" not in unpacked  # legacy
 
     @pytest.mark.parametrize("which", ("hmac", "salt"))
     def test_tampered_manifest(self, key, which):
@@ -371,21 +346,6 @@ class TestTAM:
         with pytest.raises(TAMInvalid):
             key.unpack_and_verify_manifest(blob)
 
-    @pytest.mark.parametrize("which", ("hmac", "salt"))
-    def test_tampered_archive(self, key, which):
-        data = {"foo": "bar"}
-        blob = key.pack_and_authenticate_metadata(data, context=b"archive")
-        assert blob.startswith(b"\x82")
-
-        unpacked = msgpack.unpackb(blob, object_hook=StableDict)
-        assert len(unpacked["tam"][which]) == 64
-        unpacked["tam"][which] = unpacked["tam"][which][0:32] + bytes(32)
-        assert len(unpacked["tam"][which]) == 64
-        blob = msgpack.packb(unpacked)
-
-        with pytest.raises(ArchiveTAMInvalid):
-            key.unpack_and_verify_archive(blob)
-
 
 def test_decrypt_key_file_unsupported_algorithm():
     """We will add more algorithms in the future. We should raise a helpful error."""

From 1cf62d8fc7162ac313c2e1f4ffcc8341b392c113 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sat, 16 Sep 2023 02:02:44 +0200
Subject: [PATCH 3/8] remove manifest TAMs

---
 src/borg/crypto/key.py                | 66 +++---------------------
 src/borg/manifest.py                  |  4 +-
 src/borg/testsuite/archiver/checks.py | 65 +-----------------------
 src/borg/testsuite/key.py             | 72 ++-------------------------
 4 files changed, 15 insertions(+), 192 deletions(-)

diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py
index 42bd3c58..8884faec 100644
--- a/src/borg/crypto/key.py
+++ b/src/borg/crypto/key.py
@@ -21,7 +21,7 @@ from ..helpers import bin_to_hex
 from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded, PassphraseWrong
 from ..helpers import msgpack
 from ..helpers import workarounds
-from ..item import Key, EncryptedKey, want_bytes
+from ..item import Key, EncryptedKey
 from ..manifest import Manifest
 from ..platform import SaveFile
 from ..repoobj import RepoObj
@@ -63,30 +63,6 @@ class UnsupportedKeyFormatError(Error):
     """Your borg key is stored in an unsupported format. Try using a newer version of borg."""
 
 
-class TAMRequiredError(IntegrityError):
-    __doc__ = textwrap.dedent(
-        """
-    Manifest is unauthenticated, but it is required for this repository. Is somebody attacking you?
-    """
-    ).strip()
-    traceback = False
-
-
-class TAMInvalid(IntegrityError):
-    __doc__ = IntegrityError.__doc__
-    traceback = False
-
-    def __init__(self):
-        # Error message becomes: "Data integrity error: Manifest authentication did not verify"
-        super().__init__("Manifest authentication did not verify")
-
-
-class TAMUnsupportedSuiteError(IntegrityError):
-    """Could not verify manifest: Unsupported suite {!r}; a newer version is needed."""
-
-    traceback = False
-
-
 def key_creator(repository, args, *, other_key=None):
     for key in AVAILABLE_KEY_TYPES:
         if key.ARG_NAME == args.encryption:
@@ -214,21 +190,15 @@ class KeyBase:
             output_length=64,
         )
 
-    def pack_and_authenticate_metadata(self, metadata_dict, context=b"manifest", salt=None):
-        if salt is None:
-            salt = os.urandom(64)
-        metadata_dict = StableDict(metadata_dict)
-        tam = metadata_dict["tam"] = StableDict({"type": "HKDF_HMAC_SHA512", "hmac": bytes(64), "salt": salt})
-        packed = msgpack.packb(metadata_dict)
-        tam_key = self._tam_key(salt, context)
-        tam["hmac"] = hmac.digest(tam_key, packed, "sha512")
-        return msgpack.packb(metadata_dict)
-
     def pack_metadata(self, metadata_dict):
         metadata_dict = StableDict(metadata_dict)
         return msgpack.packb(metadata_dict)
 
-    def unpack_and_verify_manifest(self, data):
+    def pack_and_authenticate_metadata(self, metadata_dict, context):  # TODO: remove
+        metadata_dict = StableDict(metadata_dict)
+        return msgpack.packb(metadata_dict)
+
+    def unpack_manifest(self, data):
         """Unpack msgpacked *data* and return manifest."""
         if data.startswith(b"\xc1" * 4):
             # This is a manifest from the future, we can't read it.
@@ -237,29 +207,7 @@ class KeyBase:
         unpacker = get_limited_unpacker("manifest")
         unpacker.feed(data)
         unpacked = unpacker.unpack()
-        if AUTHENTICATED_NO_KEY:
-            return unpacked
-        if "tam" not in unpacked:
-            raise TAMRequiredError(self.repository._location.canonical_path())
-        tam = unpacked.pop("tam", None)
-        if not isinstance(tam, dict):
-            raise TAMInvalid()
-        tam_type = tam.get("type", "<none>")
-        if tam_type != "HKDF_HMAC_SHA512":
-            raise TAMUnsupportedSuiteError(repr(tam_type))
-        tam_hmac = tam.get("hmac")
-        tam_salt = tam.get("salt")
-        if not isinstance(tam_salt, (bytes, str)) or not isinstance(tam_hmac, (bytes, str)):
-            raise TAMInvalid()
-        tam_hmac = want_bytes(tam_hmac)  # legacy
-        tam_salt = want_bytes(tam_salt)  # legacy
-        offset = data.index(tam_hmac)
-        data[offset : offset + 64] = bytes(64)
-        tam_key = self._tam_key(tam_salt, context=b"manifest")
-        calculated_hmac = hmac.digest(tam_key, data, "sha512")
-        if not hmac.compare_digest(calculated_hmac, tam_hmac):
-            raise TAMInvalid()
-        logger.debug("TAM-verified manifest")
+        unpacked.pop("tam", None)  # legacy
         return unpacked
 
     def unpack_archive(self, data):
diff --git a/src/borg/manifest.py b/src/borg/manifest.py
index 047a2a4f..aa7e6143 100644
--- a/src/borg/manifest.py
+++ b/src/borg/manifest.py
@@ -251,7 +251,7 @@ class Manifest:
             key = key_factory(repository, cdata, ro_cls=ro_cls)
         manifest = cls(key, repository, ro_cls=ro_cls)
         _, data = manifest.repo_objs.parse(cls.MANIFEST_ID, cdata, ro_type=ROBJ_MANIFEST)
-        manifest_dict = key.unpack_and_verify_manifest(data)
+        manifest_dict = key.unpack_manifest(data)
         m = ManifestItem(internal_dict=manifest_dict)
         manifest.id = manifest.repo_objs.id_hash(data)
         if m.get("version") not in (1, 2):
@@ -313,6 +313,6 @@ class Manifest:
             timestamp=self.timestamp,
             config=StableDict(self.config),
         )
-        data = self.key.pack_and_authenticate_metadata(manifest.as_dict())
+        data = self.key.pack_metadata(manifest.as_dict())
         self.id = self.repo_objs.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.repo_objs.format(self.MANIFEST_ID, {}, data, ro_type=ROBJ_MANIFEST))
diff --git a/src/borg/testsuite/archiver/checks.py b/src/borg/testsuite/archiver/checks.py
index 719ff2ac..0fbf7343 100644
--- a/src/borg/testsuite/archiver/checks.py
+++ b/src/borg/testsuite/archiver/checks.py
@@ -1,22 +1,19 @@
 import os
 import shutil
-from datetime import datetime, timezone, timedelta
 from unittest.mock import patch
 
 import pytest
 
 from ...cache import Cache, LocalCache
 from ...constants import *  # NOQA
-from ...crypto.key import TAMRequiredError
 from ...helpers import Location, get_security_dir, bin_to_hex
 from ...helpers import EXIT_ERROR
-from ...helpers import msgpack
 from ...manifest import Manifest, MandatoryFeatureUnsupported
 from ...remote import RemoteRepository, PathNotAllowed
 from ...repository import Repository
 from .. import llfuse
 from .. import changedir
-from . import cmd, _extract_repository_id, open_repository, check_cache, create_test_files, create_src_archive
+from . import cmd, _extract_repository_id, open_repository, check_cache, create_test_files
 from . import _set_repository_id, create_regular_file, assert_creates_file, generate_archiver_tests, RK_ENCRYPTION
 
 pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote")  # NOQA
@@ -322,66 +319,6 @@ def test_check_cache(archivers, request):
         check_cache(archiver)
 
 
-#  Begin manifest TAM tests
-def spoof_manifest(repository):
-    with repository:
-        manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-        cdata = manifest.repo_objs.format(
-            Manifest.MANIFEST_ID,
-            {},
-            msgpack.packb(
-                {
-                    "version": 1,
-                    "archives": {},
-                    "config": {},
-                    "timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(timespec="microseconds"),
-                }
-            ),
-            ro_type=ROBJ_MANIFEST,
-        )
-        repository.put(Manifest.MANIFEST_ID, cdata)
-        repository.commit(compact=False)
-
-
-def test_fresh_init_tam_required(archiver):
-    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    repository = Repository(archiver.repository_path, exclusive=True)
-    with repository:
-        manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-        cdata = manifest.repo_objs.format(
-            Manifest.MANIFEST_ID,
-            {},
-            msgpack.packb(
-                {
-                    "version": 1,
-                    "archives": {},
-                    "timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(timespec="microseconds"),
-                }
-            ),
-            ro_type=ROBJ_MANIFEST,
-        )
-        repository.put(Manifest.MANIFEST_ID, cdata)
-        repository.commit(compact=False)
-
-    with pytest.raises(TAMRequiredError):
-        cmd(archiver, "rlist")
-
-
-def test_not_required(archiver):
-    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    create_src_archive(archiver, "archive1234")
-    repository = Repository(archiver.repository_path, exclusive=True)
-    # Manifest must be authenticated now
-    output = cmd(archiver, "rlist", "--debug")
-    assert "archive1234" in output
-    assert "TAM-verified manifest" in output
-    # Try to spoof / modify pre-1.0.9
-    spoof_manifest(repository)
-    # Fails
-    with pytest.raises(TAMRequiredError):
-        cmd(archiver, "rlist")
-
-
 # Begin Remote Tests
 def test_remote_repo_restrict_to_path(remote_archiver):
     original_location, repo_path = remote_archiver.repository_location, remote_archiver.repository_path
diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py
index c862484e..7f4f8dce 100644
--- a/src/borg/testsuite/key.py
+++ b/src/borg/testsuite/key.py
@@ -11,13 +11,11 @@ from ..crypto.key import AEADKeyBase
 from ..crypto.key import AESOCBRepoKey, AESOCBKeyfileKey, CHPORepoKey, CHPOKeyfileKey
 from ..crypto.key import Blake2AESOCBRepoKey, Blake2AESOCBKeyfileKey, Blake2CHPORepoKey, Blake2CHPOKeyfileKey
 from ..crypto.key import ID_HMAC_SHA_256, ID_BLAKE2b_256
-from ..crypto.key import TAMRequiredError, TAMInvalid, TAMUnsupportedSuiteError
 from ..crypto.key import UnsupportedManifestError, UnsupportedKeyFormatError
 from ..crypto.key import identify_key
 from ..crypto.low_level import IntegrityError as IntegrityErrorBase
 from ..helpers import IntegrityError
 from ..helpers import Location
-from ..helpers import StableDict
 from ..helpers import msgpack
 from ..constants import KEY_ALGORITHMS
 
@@ -266,63 +264,18 @@ class TestTAM:
     def test_unpack_future(self, key):
         blob = b"\xc1\xc1\xc1\xc1foobar"
         with pytest.raises(UnsupportedManifestError):
-            key.unpack_and_verify_manifest(blob)
+            key.unpack_manifest(blob)
 
         blob = b"\xc1\xc1\xc1"
         with pytest.raises(msgpack.UnpackException):
-            key.unpack_and_verify_manifest(blob)
-
-    def test_missing(self, key):
-        blob = msgpack.packb({})
-        with pytest.raises(TAMRequiredError):
-            key.unpack_and_verify_manifest(blob)
-
-    def test_unknown_type(self, key):
-        blob = msgpack.packb({"tam": {"type": "HMAC_VOLLBIT"}})
-        with pytest.raises(TAMUnsupportedSuiteError):
-            key.unpack_and_verify_manifest(blob)
-
-    @pytest.mark.parametrize(
-        "tam, exc",
-        (
-            ({}, TAMUnsupportedSuiteError),
-            ({"type": b"\xff"}, TAMUnsupportedSuiteError),
-            (None, TAMInvalid),
-            (1234, TAMInvalid),
-        ),
-    )
-    def test_invalid_manifest(self, key, tam, exc):
-        blob = msgpack.packb({"tam": tam})
-        with pytest.raises(exc):
-            key.unpack_and_verify_manifest(blob)
-
-    @pytest.mark.parametrize(
-        "hmac, salt",
-        (({}, bytes(64)), (bytes(64), {}), (None, bytes(64)), (bytes(64), None)),
-        ids=["ed-b64", "b64-ed", "n-b64", "b64-n"],
-    )
-    def test_wrong_types(self, key, hmac, salt):
-        data = {"tam": {"type": "HKDF_HMAC_SHA512", "hmac": hmac, "salt": salt}}
-        tam = data["tam"]
-        if hmac is None:
-            del tam["hmac"]
-        if salt is None:
-            del tam["salt"]
-        blob = msgpack.packb(data)
-        with pytest.raises(TAMInvalid):
-            key.unpack_and_verify_manifest(blob)
+            key.unpack_manifest(blob)
 
     def test_round_trip_manifest(self, key):
         data = {"foo": "bar"}
-        blob = key.pack_and_authenticate_metadata(data, context=b"manifest")
-        assert blob.startswith(b"\x82")
-
-        unpacked = msgpack.unpackb(blob)
-        assert unpacked["tam"]["type"] == "HKDF_HMAC_SHA512"
-
-        unpacked = key.unpack_and_verify_manifest(blob)
+        blob = key.pack_metadata(data)
+        unpacked = key.unpack_manifest(blob)
         assert unpacked["foo"] == "bar"
-        assert "tam" not in unpacked
+        assert "tam" not in unpacked  # legacy
 
     def test_round_trip_archive(self, key):
         data = {"foo": "bar"}
@@ -331,21 +284,6 @@ class TestTAM:
         assert unpacked["foo"] == "bar"
         assert "tam" not in unpacked  # legacy
 
-    @pytest.mark.parametrize("which", ("hmac", "salt"))
-    def test_tampered_manifest(self, key, which):
-        data = {"foo": "bar"}
-        blob = key.pack_and_authenticate_metadata(data, context=b"manifest")
-        assert blob.startswith(b"\x82")
-
-        unpacked = msgpack.unpackb(blob, object_hook=StableDict)
-        assert len(unpacked["tam"][which]) == 64
-        unpacked["tam"][which] = unpacked["tam"][which][0:32] + bytes(32)
-        assert len(unpacked["tam"][which]) == 64
-        blob = msgpack.packb(unpacked)
-
-        with pytest.raises(TAMInvalid):
-            key.unpack_and_verify_manifest(blob)
-
 
 def test_decrypt_key_file_unsupported_algorithm():
     """We will add more algorithms in the future. We should raise a helpful error."""

From cb4676048afb73d91d42821dbf4017b78c5b72c7 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sat, 16 Sep 2023 02:21:54 +0200
Subject: [PATCH 4/8] remove remainders of TAM support

---
 src/borg/crypto/key.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py
index 8884faec..978007d1 100644
--- a/src/borg/crypto/key.py
+++ b/src/borg/crypto/key.py
@@ -182,22 +182,10 @@ class KeyBase:
             id_str = bin_to_hex(id) if id is not None else "(unknown)"
             raise IntegrityError(f"Chunk {id_str}: Invalid encryption envelope")
 
-    def _tam_key(self, salt, context):
-        return hkdf_hmac_sha512(
-            ikm=self.id_key + self.crypt_key,
-            salt=salt,
-            info=b"borg-metadata-authentication-" + context,
-            output_length=64,
-        )
-
     def pack_metadata(self, metadata_dict):
         metadata_dict = StableDict(metadata_dict)
         return msgpack.packb(metadata_dict)
 
-    def pack_and_authenticate_metadata(self, metadata_dict, context):  # TODO: remove
-        metadata_dict = StableDict(metadata_dict)
-        return msgpack.packb(metadata_dict)
-
     def unpack_manifest(self, data):
         """Unpack msgpacked *data* and return manifest."""
         if data.startswith(b"\xc1" * 4):
@@ -248,9 +236,6 @@ class PlaintextKey(KeyBase):
         self.assert_type(data[0], id)
         return memoryview(data)[1:]
 
-    def _tam_key(self, salt, context):
-        return salt + context
-
 
 def random_blake2b_256_key():
     # This might look a bit curious, but is the same construction used in the keyed mode of BLAKE2b.
@@ -749,7 +734,6 @@ class AuthenticatedKeyBase(AESKeyBase, FlexiKey):
             self.enc_hmac_key = NOPE
             self.id_key = NOPE
             self.chunk_seed = 0
-            self.tam_required = False
             return True
         return super()._load(key_data, passphrase)
 

From 170380c657a55995d3ec494d8e3e415ff4a4f89f Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sat, 16 Sep 2023 18:31:46 +0200
Subject: [PATCH 5/8] raise IntegrityError if ro_type is not as expected

---
 src/borg/repoobj.py           | 7 +++++--
 src/borg/testsuite/repoobj.py | 5 +++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/borg/repoobj.py b/src/borg/repoobj.py
index 8514db4d..3fb2534a 100644
--- a/src/borg/repoobj.py
+++ b/src/borg/repoobj.py
@@ -2,6 +2,7 @@ from struct import Struct
 
 from .constants import *  # NOQA
 from .helpers import msgpack, workarounds
+from .helpers.errors import IntegrityError
 from .compress import Compressor, LZ4_COMPRESSOR, get_compressor
 
 # workaround for lost passphrase or key in "authenticated" or "authenticated-blake2" mode
@@ -77,7 +78,8 @@ class RepoObj:
         meta_encrypted = obj[offs : offs + len_meta_encrypted]
         meta_packed = self.key.decrypt(id, meta_encrypted)
         meta = msgpack.unpackb(meta_packed)
-        assert ro_type == ROBJ_DONTCARE or meta["type"] == ro_type
+        if ro_type != ROBJ_DONTCARE and meta["type"] != ro_type:
+            raise IntegrityError(f"ro_type expected: {ro_type} got: {meta['type']}")
         return meta
 
     def parse(
@@ -106,7 +108,8 @@ class RepoObj:
         offs += len_meta_encrypted
         meta_packed = self.key.decrypt(id, meta_encrypted)
         meta_compressed = msgpack.unpackb(meta_packed)  # means: before adding more metadata in decompress block
-        assert ro_type == ROBJ_DONTCARE or meta_compressed["type"] == ro_type
+        if ro_type != ROBJ_DONTCARE and meta_compressed["type"] != ro_type:
+            raise IntegrityError(f"ro_type expected: {ro_type} got: {meta_compressed['type']}")
         data_encrypted = obj[offs:]
         data_compressed = self.key.decrypt(id, data_encrypted)  # does not include the type/level bytes
         if decompress:
diff --git a/src/borg/testsuite/repoobj.py b/src/borg/testsuite/repoobj.py
index 7f923f57..44c364d8 100644
--- a/src/borg/testsuite/repoobj.py
+++ b/src/borg/testsuite/repoobj.py
@@ -2,6 +2,7 @@ import pytest
 
 from ..constants import ROBJ_FILE_STREAM, ROBJ_MANIFEST, ROBJ_ARCHIVE_META
 from ..crypto.key import PlaintextKey
+from ..helpers.errors import IntegrityError
 from ..repository import Repository
 from ..repoobj import RepoObj, RepoObj1
 from ..compress import LZ4
@@ -113,7 +114,7 @@ def test_spoof_manifest(key):
     cdata = repo_objs.format(id, {}, data, ro_type=ROBJ_FILE_STREAM)
     # let's assume an attacker somehow managed to replace the manifest with that repo object.
     # as borg always give the ro_type it wants to read, this should fail:
-    with pytest.raises(AssertionError):
+    with pytest.raises(IntegrityError):
         repo_objs.parse(id, cdata, ro_type=ROBJ_MANIFEST)
 
 
@@ -125,5 +126,5 @@ def test_spoof_archive(key):
     cdata = repo_objs.format(id, {}, data, ro_type=ROBJ_FILE_STREAM)
     # let's assume an attacker somehow managed to replace an archive with that repo object.
     # as borg always give the ro_type it wants to read, this should fail:
-    with pytest.raises(AssertionError):
+    with pytest.raises(IntegrityError):
         repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)

From 6b4697b4798724a0b6bcbea9f80c24d83d679831 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sat, 16 Sep 2023 18:36:26 +0200
Subject: [PATCH 6/8] tests: borg check notices/repairs a spoofed manifest

---
 src/borg/testsuite/archiver/check_cmd.py | 37 +++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/borg/testsuite/archiver/check_cmd.py b/src/borg/testsuite/archiver/check_cmd.py
index a932cbae..40c22926 100644
--- a/src/borg/testsuite/archiver/check_cmd.py
+++ b/src/borg/testsuite/archiver/check_cmd.py
@@ -1,3 +1,4 @@
+from datetime import datetime, timezone, timedelta
 import shutil
 from unittest.mock import patch
 
@@ -5,7 +6,7 @@ import pytest
 
 from ...archive import ChunkBuffer
 from ...constants import *  # NOQA
-from ...helpers import bin_to_hex
+from ...helpers import bin_to_hex, msgpack
 from ...manifest import Manifest
 from ...repository import Repository
 from . import cmd, src_file, create_src_archive, open_archive, generate_archiver_tests, RK_ENCRYPTION
@@ -205,6 +206,40 @@ def test_corrupted_manifest(archivers, request):
     cmd(archiver, "check", exit_code=0)
 
 
+def test_spoofed_manifest(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    check_cmd_setup(archiver)
+    archive, repository = open_archive(archiver.repository_path, "archive1")
+    with repository:
+        manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
+        cdata = manifest.repo_objs.format(
+            Manifest.MANIFEST_ID,
+            {},
+            msgpack.packb(
+                {
+                    "version": 1,
+                    "archives": {},
+                    "config": {},
+                    "timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(timespec="microseconds"),
+                }
+            ),
+            # we assume that an attacker can put a file into backup src files that contains a fake manifest.
+            # but, the attacker can not influence the ro_type borg will use to store user file data:
+            ro_type=ROBJ_FILE_STREAM,  # a real manifest is stored with ROBJ_MANIFEST
+        )
+        # maybe a repo-side attacker could manage to move the fake manifest file chunk over to the manifest ID.
+        # we simulate this here by directly writing the fake manifest data to the manifest ID.
+        repository.put(Manifest.MANIFEST_ID, cdata)
+        repository.commit(compact=False)
+    # borg should notice that the manifest has the wrong ro_type.
+    cmd(archiver, "check", exit_code=1)
+    # borg check --repair should remove the corrupted manifest and rebuild a new one.
+    output = cmd(archiver, "check", "-v", "--repair", exit_code=0)
+    assert "archive1" in output
+    assert "archive2" in output
+    cmd(archiver, "check", exit_code=0)
+
+
 def test_manifest_rebuild_corrupted_chunk(archivers, request):
     archiver = request.getfixturevalue(archivers)
     check_cmd_setup(archiver)

From d1fde1164552e2f0fc6aa09c9e3c0db202c5d760 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sat, 16 Sep 2023 20:25:07 +0200
Subject: [PATCH 7/8] tests: borg check must not add a spoofed archive to
 manifest

also: do a small optimisation in borg check:

if the type of the repo object is not ROBJ_ARCHIVE_META, we
can skip the object, it can not contain valid archive meta data.

if the type is correct, this is already a sufficient check, so
we can be quite sure that there will be valid archive metadata
in the object.
---
 src/borg/archive.py                      |  7 ++--
 src/borg/testsuite/archiver/check_cmd.py | 41 ++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/src/borg/archive.py b/src/borg/archive.py
index 02618678..b048d5c1 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -1975,7 +1975,6 @@ class ArchiveChecker:
         # lost manifest on a older borg version than the most recent one that was ever used
         # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
-        archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS]
         pi = ProgressIndicatorPercent(
             total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01, msgid="check.rebuild_manifest"
         )
@@ -1983,14 +1982,12 @@ class ArchiveChecker:
             pi.show()
             cdata = self.repository.get(chunk_id)
             try:
-                _, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
+                meta, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
             except IntegrityErrorBase as exc:
                 logger.error("Skipping corrupted chunk: %s", exc)
                 self.error_found = True
                 continue
-            if not valid_msgpacked_dict(data, archive_keys_serialized):
-                continue
-            if b"command_line" not in data or b"\xa7version\x02" not in data:
+            if meta["type"] != ROBJ_ARCHIVE_META:
                 continue
             try:
                 archive = msgpack.unpackb(data)
diff --git a/src/borg/testsuite/archiver/check_cmd.py b/src/borg/testsuite/archiver/check_cmd.py
index 40c22926..cf4c6baf 100644
--- a/src/borg/testsuite/archiver/check_cmd.py
+++ b/src/borg/testsuite/archiver/check_cmd.py
@@ -288,6 +288,47 @@ def test_manifest_rebuild_duplicate_archive(archivers, request):
     assert "archive2" in output
 
 
+def test_spoofed_archive(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    check_cmd_setup(archiver)
+    archive, repository = open_archive(archiver.repository_path, "archive1")
+    repo_objs = archive.repo_objs
+    with repository:
+        # attacker would corrupt or delete the manifest to trigger a rebuild of it:
+        manifest = repository.get(Manifest.MANIFEST_ID)
+        corrupted_manifest = manifest + b"corrupted!"
+        repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
+        archive_dict = {
+            "command_line": "",
+            "item_ptrs": [],
+            "hostname": "foo",
+            "username": "bar",
+            "name": "archive_spoofed",
+            "time": "2016-12-15T18:49:51.849711",
+            "version": 2,
+        }
+        archive = repo_objs.key.pack_metadata(archive_dict)
+        archive_id = repo_objs.id_hash(archive)
+        repository.put(
+            archive_id,
+            repo_objs.format(
+                archive_id,
+                {},
+                archive,
+                # we assume that an attacker can put a file into backup src files that contains a fake archive.
+                # but, the attacker can not influence the ro_type borg will use to store user file data:
+                ro_type=ROBJ_FILE_STREAM,  # a real archive is stored with ROBJ_ARCHIVE_META
+            ),
+        )
+        repository.commit(compact=False)
+    cmd(archiver, "check", exit_code=1)
+    cmd(archiver, "check", "--repair", "--debug", exit_code=0)
+    output = cmd(archiver, "rlist")
+    assert "archive1" in output
+    assert "archive2" in output
+    assert "archive_spoofed" not in output
+
+
 def test_extra_chunks(archivers, request):
     archiver = request.getfixturevalue(archivers)
     if archiver.get_kind() == "remote":

From bd1d7345919a5777ae1df0b963e38919ea65e47a Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sat, 16 Sep 2023 03:00:04 +0200
Subject: [PATCH 8/8] docs: removed TAMs, introduce typed repo objects

---
 docs/internals/data-structures.rst |  5 ---
 docs/internals/security.rst        | 54 +++++++++---------------------
 2 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/docs/internals/data-structures.rst b/docs/internals/data-structures.rst
index 76147876..8d1562ff 100644
--- a/docs/internals/data-structures.rst
+++ b/docs/internals/data-structures.rst
@@ -365,7 +365,6 @@ or modified. It looks like this:
                 'time': '2017-05-05T12:42:22.942864',
             },
         },
-        'tam': ...,
     }
 
 The *version* field can be either 1 or 2. The versions differ in the
@@ -379,10 +378,6 @@ the repository. It is used by *borg check*, which verifies that all keys
 in all items are a subset of these keys. Thus, an older version of *borg check*
 supporting this mechanism can correctly detect keys introduced in later versions.
 
-The *tam* key is part of the :ref:`tertiary authentication mechanism <tam_description>`
-(formerly known as "tertiary authentication for metadata") and authenticates
-the manifest, since an ID check is not possible.
-
 *config* is a general-purpose location for additional metadata. All versions
 of Borg preserve its contents.
 
diff --git a/docs/internals/security.rst b/docs/internals/security.rst
index 38d693d0..fef56d61 100644
--- a/docs/internals/security.rst
+++ b/docs/internals/security.rst
@@ -67,43 +67,26 @@ in a particular part of its own data structure assigns this meaning.
 This results in a directed acyclic graph of authentication from the manifest
 to the data chunks of individual files.
 
-.. _tam_description:
+Above used to be all for borg 1.x and was the reason why it needed the
+tertiary authentication mechanism (TAM) for manifest and archives.
 
-.. rubric:: Authenticating the manifest
+borg 2 now stores the ro_type ("meaning") of a repo object's data into that
+object's metadata (like e.g.: manifest vs. archive vs. user file content data).
+When loading data from the repo, borg verifies that the type of object it got
+matches the type it wanted. borg 2 does not use TAMs any more.
 
-Since the manifest has a fixed ID (000...000) the aforementioned authentication
-does not apply to it, indeed, cannot apply to it; it is impossible to authenticate
-the root node of a DAG through its edges, since the root node has no incoming edges.
+As both the object's metadata and data are AEAD encrypted and also bound to
+the object ID (via giving the ID as AAD), there is no way an attacker (without
+access to the borg key) could change the type of the object or move content
+to a different object ID.
 
-With the scheme as described so far an attacker could easily replace the manifest,
-therefore Borg includes a tertiary authentication mechanism (TAM) that is applied
-to the manifest (see :ref:`tam_vuln`).
+This effectively 'anchors' the manifest (and also other metadata, like archives)
+to the key, which is controlled by the client, thereby anchoring the entire DAG,
+making it impossible for an attacker to add, remove or modify any part of the
+DAG without Borg being able to detect the tampering.
 
-TAM works by deriving a separate key through HKDF_ from the other encryption and
-authentication keys and calculating the HMAC of the metadata to authenticate [#]_::
-
-    # RANDOM(n) returns n random bytes
-    salt = RANDOM(64)
-
-    ikm = id_key || crypt_key
-    # *context* depends on the operation, for manifest authentication it is
-    # the ASCII string "borg-metadata-authentication-manifest".
-    tam_key = HKDF-SHA-512(ikm, salt, context)
-
-    # *data* is a dict-like structure
-    data[hmac] = zeroes
-    packed = pack(data)
-    data[hmac] = HMAC(tam_key, packed)
-    packed_authenticated = pack(data)
-
-Since an attacker cannot gain access to this key and also cannot make the
-client authenticate arbitrary data using this mechanism, the attacker is unable
-to forge the authentication.
-
-This effectively 'anchors' the manifest to the key, which is controlled by the
-client, thereby anchoring the entire DAG, making it impossible for an attacker
-to add, remove or modify any part of the DAG without Borg being able to detect
-the tampering.
+Passphrase notes
+----------------
 
 Note that when using BORG_PASSPHRASE the attacker cannot swap the *entire*
 repository against a new repository with e.g. repokey mode and no passphrase,
@@ -113,11 +96,6 @@ However, interactively a user might not notice this kind of attack
 immediately, if she assumes that the reason for the absent passphrase
 prompt is a set BORG_PASSPHRASE. See issue :issue:`2169` for details.
 
-.. [#] The reason why the authentication tag is stored in the packed
-       data itself is that older Borg versions can still read the
-       manifest this way, while a changed layout would have broken
-       compatibility.
-
 .. _security_encryption:
 
 Encryption