Merge pull request #3898 from ThomasWaldmann/move-ht-load-calculation

move capacity calculation to IndexBase, fixes #2646
This commit is contained in:
TW 2018-06-14 22:23:10 +02:00 committed by GitHub
commit 3bdfa869d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 12 deletions

View File

@ -1332,11 +1332,10 @@ class ArchiveChecker:
def init_chunks(self):
"""Fetch a list of all object keys from repository
"""
# Explicitly set the initial hash table capacity to avoid performance issues
# Explicitly set the initial usable hash table capacity to avoid performance issues
# due to hash table "resonance".
# Since reconstruction of archive items can add some new chunks, add 10 % headroom
capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
self.chunks = ChunkIndex(capacity)
# Since reconstruction of archive items can add some new chunks, add 10 % headroom.
self.chunks = ChunkIndex(usable=len(self.repository) * 1.1)
marker = None
while True:
result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)

View File

@ -780,11 +780,11 @@ class LocalCache(CacheStatsMixin):
# deallocates old hashindex, creates empty hashindex:
chunk_idx.clear()
cleanup_outdated(cached_ids - archive_ids)
# Explicitly set the initial hash table capacity to avoid performance issues
# Explicitly set the usable initial hash table capacity to avoid performance issues
# due to hash table "resonance".
master_index_capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR)
master_index_capacity = len(self.repository)
if archive_ids:
chunk_idx = None if not self.do_cache else ChunkIndex(master_index_capacity)
chunk_idx = None if not self.do_cache else ChunkIndex(usable=master_index_capacity)
pi = ProgressIndicatorPercent(total=len(archive_ids), step=0.1,
msg='%3.0f%% Syncing chunks cache. Processing archive %s',
msgid='cache.sync')
@ -805,7 +805,7 @@ class LocalCache(CacheStatsMixin):
logger.info("Merging into master chunks index ...")
chunk_idx.merge(archive_chunk_idx)
else:
chunk_idx = chunk_idx or ChunkIndex(master_index_capacity)
chunk_idx = chunk_idx or ChunkIndex(usable=master_index_capacity)
logger.info('Fetching archive index for %s ...', archive_name)
fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx)
if not self.do_cache:
@ -1087,12 +1087,11 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
def begin_txn(self):
self._txn_active = True
# Explicitly set the initial hash table capacity to avoid performance issues
# Explicitly set the initial usable hash table capacity to avoid performance issues
# due to hash table "resonance".
# Since we're creating an archive, add 10 % from the start.
num_chunks = len(self.repository)
capacity = int(num_chunks / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
self.chunks = ChunkIndex(capacity)
self.chunks = ChunkIndex(usable=num_chunks * 1.1)
pi = ProgressIndicatorPercent(total=num_chunks, msg='Downloading chunk list... %3.0f%%',
msgid='cache.download_chunks')
t0 = perf_counter()

View File

@ -84,7 +84,7 @@ cdef class IndexBase:
MAX_LOAD_FACTOR = HASH_MAX_LOAD
MAX_VALUE = _MAX_VALUE
def __cinit__(self, capacity=0, path=None, permit_compact=False):
def __cinit__(self, capacity=0, path=None, permit_compact=False, usable=None):
self.key_size = self._key_size
if path:
if isinstance(path, (str, bytes)):
@ -94,6 +94,8 @@ cdef class IndexBase:
self.index = hashindex_read(path, permit_compact)
assert self.index, 'hashindex_read() returned NULL with no exception set'
else:
if usable is not None:
capacity = int(usable / self.MAX_LOAD_FACTOR)
self.index = hashindex_init(capacity, self.key_size, self.value_size)
if not self.index:
raise Exception('hashindex_init failed')