mirror of https://github.com/borgbackup/borg.git
Merge pull request #3898 from ThomasWaldmann/move-ht-load-calculation
move capacity calculation to IndexBase, fixes #2646
This commit is contained in:
commit
3bdfa869d6
|
@ -1332,11 +1332,10 @@ class ArchiveChecker:
|
||||||
def init_chunks(self):
|
def init_chunks(self):
|
||||||
"""Fetch a list of all object keys from repository
|
"""Fetch a list of all object keys from repository
|
||||||
"""
|
"""
|
||||||
# Explicitly set the initial hash table capacity to avoid performance issues
|
# Explicitly set the initial usable hash table capacity to avoid performance issues
|
||||||
# due to hash table "resonance".
|
# due to hash table "resonance".
|
||||||
# Since reconstruction of archive items can add some new chunks, add 10 % headroom
|
# Since reconstruction of archive items can add some new chunks, add 10 % headroom.
|
||||||
capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
|
self.chunks = ChunkIndex(usable=len(self.repository) * 1.1)
|
||||||
self.chunks = ChunkIndex(capacity)
|
|
||||||
marker = None
|
marker = None
|
||||||
while True:
|
while True:
|
||||||
result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
|
result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
|
||||||
|
|
|
@ -780,11 +780,11 @@ class LocalCache(CacheStatsMixin):
|
||||||
# deallocates old hashindex, creates empty hashindex:
|
# deallocates old hashindex, creates empty hashindex:
|
||||||
chunk_idx.clear()
|
chunk_idx.clear()
|
||||||
cleanup_outdated(cached_ids - archive_ids)
|
cleanup_outdated(cached_ids - archive_ids)
|
||||||
# Explicitly set the initial hash table capacity to avoid performance issues
|
# Explicitly set the usable initial hash table capacity to avoid performance issues
|
||||||
# due to hash table "resonance".
|
# due to hash table "resonance".
|
||||||
master_index_capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR)
|
master_index_capacity = len(self.repository)
|
||||||
if archive_ids:
|
if archive_ids:
|
||||||
chunk_idx = None if not self.do_cache else ChunkIndex(master_index_capacity)
|
chunk_idx = None if not self.do_cache else ChunkIndex(usable=master_index_capacity)
|
||||||
pi = ProgressIndicatorPercent(total=len(archive_ids), step=0.1,
|
pi = ProgressIndicatorPercent(total=len(archive_ids), step=0.1,
|
||||||
msg='%3.0f%% Syncing chunks cache. Processing archive %s',
|
msg='%3.0f%% Syncing chunks cache. Processing archive %s',
|
||||||
msgid='cache.sync')
|
msgid='cache.sync')
|
||||||
|
@ -805,7 +805,7 @@ class LocalCache(CacheStatsMixin):
|
||||||
logger.info("Merging into master chunks index ...")
|
logger.info("Merging into master chunks index ...")
|
||||||
chunk_idx.merge(archive_chunk_idx)
|
chunk_idx.merge(archive_chunk_idx)
|
||||||
else:
|
else:
|
||||||
chunk_idx = chunk_idx or ChunkIndex(master_index_capacity)
|
chunk_idx = chunk_idx or ChunkIndex(usable=master_index_capacity)
|
||||||
logger.info('Fetching archive index for %s ...', archive_name)
|
logger.info('Fetching archive index for %s ...', archive_name)
|
||||||
fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx)
|
fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx)
|
||||||
if not self.do_cache:
|
if not self.do_cache:
|
||||||
|
@ -1087,12 +1087,11 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
|
||||||
|
|
||||||
def begin_txn(self):
|
def begin_txn(self):
|
||||||
self._txn_active = True
|
self._txn_active = True
|
||||||
# Explicitly set the initial hash table capacity to avoid performance issues
|
# Explicitly set the initial usable hash table capacity to avoid performance issues
|
||||||
# due to hash table "resonance".
|
# due to hash table "resonance".
|
||||||
# Since we're creating an archive, add 10 % from the start.
|
# Since we're creating an archive, add 10 % from the start.
|
||||||
num_chunks = len(self.repository)
|
num_chunks = len(self.repository)
|
||||||
capacity = int(num_chunks / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
|
self.chunks = ChunkIndex(usable=num_chunks * 1.1)
|
||||||
self.chunks = ChunkIndex(capacity)
|
|
||||||
pi = ProgressIndicatorPercent(total=num_chunks, msg='Downloading chunk list... %3.0f%%',
|
pi = ProgressIndicatorPercent(total=num_chunks, msg='Downloading chunk list... %3.0f%%',
|
||||||
msgid='cache.download_chunks')
|
msgid='cache.download_chunks')
|
||||||
t0 = perf_counter()
|
t0 = perf_counter()
|
||||||
|
|
|
@ -84,7 +84,7 @@ cdef class IndexBase:
|
||||||
MAX_LOAD_FACTOR = HASH_MAX_LOAD
|
MAX_LOAD_FACTOR = HASH_MAX_LOAD
|
||||||
MAX_VALUE = _MAX_VALUE
|
MAX_VALUE = _MAX_VALUE
|
||||||
|
|
||||||
def __cinit__(self, capacity=0, path=None, permit_compact=False):
|
def __cinit__(self, capacity=0, path=None, permit_compact=False, usable=None):
|
||||||
self.key_size = self._key_size
|
self.key_size = self._key_size
|
||||||
if path:
|
if path:
|
||||||
if isinstance(path, (str, bytes)):
|
if isinstance(path, (str, bytes)):
|
||||||
|
@ -94,6 +94,8 @@ cdef class IndexBase:
|
||||||
self.index = hashindex_read(path, permit_compact)
|
self.index = hashindex_read(path, permit_compact)
|
||||||
assert self.index, 'hashindex_read() returned NULL with no exception set'
|
assert self.index, 'hashindex_read() returned NULL with no exception set'
|
||||||
else:
|
else:
|
||||||
|
if usable is not None:
|
||||||
|
capacity = int(usable / self.MAX_LOAD_FACTOR)
|
||||||
self.index = hashindex_init(capacity, self.key_size, self.value_size)
|
self.index = hashindex_init(capacity, self.key_size, self.value_size)
|
||||||
if not self.index:
|
if not self.index:
|
||||||
raise Exception('hashindex_init failed')
|
raise Exception('hashindex_init failed')
|
||||||
|
|
Loading…
Reference in New Issue