cache sync: initialize master index to known capacity

This commit is contained in:
Marian Beermann 2017-05-26 12:30:15 +02:00
parent 740898d83b
commit 9f8b967a6f
1 changed files with 5 additions and 2 deletions

View File

@ -603,6 +603,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
# deallocates old hashindex, creates empty hashindex:
chunk_idx.clear()
cleanup_outdated(cached_ids - archive_ids)
# Explicitly set the initial hash table capacity to avoid performance issues
# due to hash table "resonance".
master_index_capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR)
if archive_ids:
chunk_idx = None
if self.progress:
@ -630,7 +633,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
# Do not make this an else branch; the FileIntegrityError exception handler
# above can remove *archive_id* from *cached_ids*.
logger.info('Fetching and building archive index for %s ...', archive_name)
archive_chunk_idx = ChunkIndex()
archive_chunk_idx = ChunkIndex(master_index_capacity)
fetch_and_build_idx(archive_id, repository, self.key, archive_chunk_idx)
logger.info("Merging into master chunks index ...")
if chunk_idx is None:
@ -641,7 +644,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
else:
chunk_idx.merge(archive_chunk_idx)
else:
chunk_idx = chunk_idx or ChunkIndex()
chunk_idx = chunk_idx or ChunkIndex(master_index_capacity)
logger.info('Fetching archive index for %s ...', archive_name)
fetch_and_build_idx(archive_id, repository, self.key, chunk_idx)
if self.progress: