rebuild_archives_directory: accelerate by only reading metadata

We are only interested in archive metadata objects here, thus for most repo objects it is enough to read the repoobj's metadata and determine the object's type. Only if it is the right type of object, we need to read the full object (metadata and data).
2025-03-11 22:53:27 +00:00 · 2024-11-02 20:16:03 +01:00 · 2024-11-02 20:16:03 +01:00 · c35cbc9028
commit c35cbc9028
parent 299c05287f
1 changed files with 11 additions and 1 deletions
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -1833,6 +1833,16 @@ class ArchiveChecker:
        )
        for chunk_id, _ in self.chunks.iteritems():
            pi.show()
+            cdata = self.repository.get(chunk_id, read_data=False)  # only get metadata
+            try:
+                meta = self.repo_objs.parse_meta(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
+            except IntegrityErrorBase as exc:
+                logger.error("Skipping corrupted chunk: %s", exc)
+                self.error_found = True
+                continue
+            if meta["type"] != ROBJ_ARCHIVE_META:
+                continue
+            # now we know it is an archive metadata chunk, load the full object from the repo:
            cdata = self.repository.get(chunk_id)
            try:
                meta, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
@ -1841,7 +1851,7 @@ class ArchiveChecker:
                self.error_found = True
                continue
            if meta["type"] != ROBJ_ARCHIVE_META:
-                continue
+                continue  # should never happen
            try:
                archive = msgpack.unpackb(data)
            # Ignore exceptions that might be raised when feeding msgpack with invalid data