From 9f8b967a6f45bb7dbbf1f37cf231ea82f149a0f6 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 26 May 2017 12:30:15 +0200 Subject: [PATCH] cache sync: initialize master index to known capacity --- src/borg/cache.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/borg/cache.py b/src/borg/cache.py index c9fa70b7f..cd3a9951d 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -603,6 +603,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" # deallocates old hashindex, creates empty hashindex: chunk_idx.clear() cleanup_outdated(cached_ids - archive_ids) + # Explicitly set the initial hash table capacity to avoid performance issues + # due to hash table "resonance". + master_index_capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR) if archive_ids: chunk_idx = None if self.progress: @@ -630,7 +633,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" # Do not make this an else branch; the FileIntegrityError exception handler # above can remove *archive_id* from *cached_ids*. logger.info('Fetching and building archive index for %s ...', archive_name) - archive_chunk_idx = ChunkIndex() + archive_chunk_idx = ChunkIndex(master_index_capacity) fetch_and_build_idx(archive_id, repository, self.key, archive_chunk_idx) logger.info("Merging into master chunks index ...") if chunk_idx is None: @@ -641,7 +644,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" else: chunk_idx.merge(archive_chunk_idx) else: - chunk_idx = chunk_idx or ChunkIndex() + chunk_idx = chunk_idx or ChunkIndex(master_index_capacity) logger.info('Fetching archive index for %s ...', archive_name) fetch_and_build_idx(archive_id, repository, self.key, chunk_idx) if self.progress: