From ec1d89f4778d9addcfb936f10805b1d075704466 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 27 Aug 2024 02:32:29 +0200 Subject: [PATCH] compact: better stats - compression factor - dedup factor - repo size All values are approx. values without considering overheads. --- src/borg/archiver/compact_cmd.py | 24 +++++++++++++++------- src/borg/testsuite/archiver/compact_cmd.py | 6 +++--- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/borg/archiver/compact_cmd.py b/src/borg/archiver/compact_cmd.py index 629da8395..e0d37e3ac 100644 --- a/src/borg/archiver/compact_cmd.py +++ b/src/borg/archiver/compact_cmd.py @@ -20,13 +20,19 @@ class ArchiveGarbageCollector: self.repository = repository assert isinstance(repository, (Repository, RemoteRepository)) self.manifest = manifest - self.repository_chunks = None # what we have in the repository + self.repository_chunks = None # what we have in the repository, id -> stored_size self.used_chunks = None # what archives currently reference self.wanted_chunks = None # chunks that would be nice to have for next borg check --repair self.total_files = None # overall number of source files written to all archives in this repo self.total_size = None # overall size of source file content data written to all archives self.archives_count = None # number of archives + @property + def repository_size(self): + if self.repository_chunks is None: + return None + return sum(self.repository_chunks.values()) # sum of stored sizes + def garbage_collect(self): """Removes unused chunks from a repository.""" logger.info("Starting compaction / garbage collection...") @@ -53,7 +59,7 @@ class ArchiveGarbageCollector: break marker = result[-1][0] for id, stored_size in result: - repository_chunks[id] = 0 # plaintext size unknown + repository_chunks[id] = stored_size return repository_chunks def analyze_archives(self) -> Tuple[Dict[bytes, int], Dict[bytes, int], int, int, int]: @@ -110,6 +116,7 @@ class ArchiveGarbageCollector: logger.warning(f"{len(missing_found)} previously missing objects re-appeared!" + run_repair) set_ec(EXIT_WARNING) + repo_size_before = self.repository_size referenced_chunks = set(self.used_chunks) | set(self.wanted_chunks) unused = set(self.repository_chunks) - referenced_chunks logger.info(f"Repository has {len(unused)} objects to delete.") @@ -123,15 +130,18 @@ class ArchiveGarbageCollector: self.repository.delete(id) del self.repository_chunks[id] pi.finish() + repo_size_after = self.repository_size count = len(self.repository_chunks) - logger.info(f"Repository has {count} objects now.") - logger.info(f"Overall statistics, considering all {self.archives_count} archives in this repository:") - logger.info(f"Source files count (before deduplication): {self.total_files}") - logger.info(f"Source files size (before deduplication): {format_file_size(self.total_size, precision=0)}") + logger.info(f"Source data size was {format_file_size(self.total_size, precision=0)} in {self.total_files} files.") dsize = sum(self.used_chunks[id] for id in self.repository_chunks) - logger.info(f"Deduplicated size (before compression, encryption): {format_file_size(dsize, precision=0)}") + logger.info(f"Repository size is {format_file_size(self.repository_size, precision=0)} in {count} objects.") + if self.total_size != 0: + logger.info(f"Space reduction factor due to deduplication: {dsize / self.total_size:.3f}") + if dsize != 0: + logger.info(f"Space reduction factor due to compression: {self.repository_size / dsize:.3f}") + logger.info(f"Compaction saved {format_file_size(repo_size_before - repo_size_after, precision=0)}.") class CompactMixIn: diff --git a/src/borg/testsuite/archiver/compact_cmd.py b/src/borg/testsuite/archiver/compact_cmd.py index c1dc3fcb7..1b90ecf5b 100644 --- a/src/borg/testsuite/archiver/compact_cmd.py +++ b/src/borg/testsuite/archiver/compact_cmd.py @@ -11,7 +11,7 @@ def test_compact_empty_repository(archivers, request): output = cmd(archiver, "compact", "-v", exit_code=0) assert "Starting compaction" in output - assert "Repository has 0 objects now." in output + assert "Repository size is 0 B in 0 objects." in output assert "Finished compaction" in output @@ -25,7 +25,7 @@ def test_compact_after_deleting_all_archives(archivers, request): output = cmd(archiver, "compact", "-v", exit_code=0) assert "Starting compaction" in output assert "Deleting " in output - assert "Repository has 0 objects now." in output + assert "Repository size is 0 B in 0 objects." in output assert "Finished compaction" in output @@ -40,5 +40,5 @@ def test_compact_after_deleting_some_archives(archivers, request): output = cmd(archiver, "compact", "-v", exit_code=0) assert "Starting compaction" in output assert "Deleting " in output - assert "Repository has 0 objects now, using approx. 0 B." not in output + assert "Repository size is 0 B in 0 objects." not in output assert "Finished compaction" in output