mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-01 12:45:34 +00:00
replace LRUCache
internals with OrderedDict
Replacing the internals should make the implementation faster and simpler since the order tracking is done by the `OrderedDict`. Furthermore, this commit adds type hints to `LRUCache` and renames the `upd` method to `replace` to make its use more clear.
This commit is contained in:
parent
ac4337a921
commit
e683c80c75
5 changed files with 54 additions and 40 deletions
|
@ -1236,7 +1236,7 @@ def stat_attrs(self, st, path, fd=None):
|
||||||
# (hash_func, chunk_length) -> chunk_hash
|
# (hash_func, chunk_length) -> chunk_hash
|
||||||
# we play safe and have the hash_func in the mapping key, in case we
|
# we play safe and have the hash_func in the mapping key, in case we
|
||||||
# have different hash_funcs within the same borg run.
|
# have different hash_funcs within the same borg run.
|
||||||
zero_chunk_ids = LRUCache(10, dispose=lambda _: None)
|
zero_chunk_ids = LRUCache(10) # type: ignore[var-annotated]
|
||||||
|
|
||||||
|
|
||||||
def cached_hash(chunk, id_hash):
|
def cached_hash(chunk, id_hash):
|
||||||
|
|
|
@ -115,7 +115,7 @@ def __init__(self, decrypted_repository):
|
||||||
# tend to re-read the same chunks over and over.
|
# tend to re-read the same chunks over and over.
|
||||||
# The capacity is kept low because increasing it does not provide any significant advantage,
|
# The capacity is kept low because increasing it does not provide any significant advantage,
|
||||||
# but makes LRUCache's square behaviour noticeable and consumes more memory.
|
# but makes LRUCache's square behaviour noticeable and consumes more memory.
|
||||||
self.chunks = LRUCache(capacity=10, dispose=lambda _: None)
|
self.chunks = LRUCache(capacity=10)
|
||||||
|
|
||||||
# Instrumentation
|
# Instrumentation
|
||||||
# Count of indirect items, i.e. data is cached in the object cache, not directly in this cache
|
# Count of indirect items, i.e. data is cached in the object cache, not directly in this cache
|
||||||
|
@ -252,7 +252,7 @@ def __init__(self, manifest, args, decrypted_repository):
|
||||||
# not contained in archives.
|
# not contained in archives.
|
||||||
self._items = {}
|
self._items = {}
|
||||||
# cache up to <FILES> Items
|
# cache up to <FILES> Items
|
||||||
self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None)
|
self._inode_cache = LRUCache(capacity=FILES)
|
||||||
# _inode_count is the current count of synthetic inodes, i.e. those in self._items
|
# _inode_count is the current count of synthetic inodes, i.e. those in self._items
|
||||||
self.inode_count = 0
|
self.inode_count = 0
|
||||||
# Maps inode numbers to the inode number of the parent
|
# Maps inode numbers to the inode number of the parent
|
||||||
|
@ -445,8 +445,8 @@ def __init__(self, manifest, args, decrypted_repository):
|
||||||
self.decrypted_repository = decrypted_repository
|
self.decrypted_repository = decrypted_repository
|
||||||
data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1))
|
data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1))
|
||||||
logger.debug("mount data cache capacity: %d chunks", data_cache_capacity)
|
logger.debug("mount data cache capacity: %d chunks", data_cache_capacity)
|
||||||
self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
|
self.data_cache = LRUCache(capacity=data_cache_capacity)
|
||||||
self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None)
|
self._last_pos = LRUCache(capacity=FILES)
|
||||||
|
|
||||||
def sig_info_handler(self, sig_no, stack):
|
def sig_info_handler(self, sig_no, stack):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
@ -689,7 +689,7 @@ def read(self, fh, offset, size):
|
||||||
size -= n
|
size -= n
|
||||||
if not size:
|
if not size:
|
||||||
if fh in self._last_pos:
|
if fh in self._last_pos:
|
||||||
self._last_pos.upd(fh, (chunk_no, chunk_offset))
|
self._last_pos.replace(fh, (chunk_no, chunk_offset))
|
||||||
else:
|
else:
|
||||||
self._last_pos[fh] = (chunk_no, chunk_offset)
|
self._last_pos[fh] = (chunk_no, chunk_offset)
|
||||||
break
|
break
|
||||||
|
|
|
@ -1,57 +1,71 @@
|
||||||
|
from collections import OrderedDict
|
||||||
|
from collections.abc import Callable, ItemsView, Iterator, KeysView, MutableMapping, ValuesView
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
sentinel = object()
|
sentinel = object()
|
||||||
|
K = TypeVar("K")
|
||||||
|
V = TypeVar("V")
|
||||||
|
|
||||||
|
|
||||||
class LRUCache:
|
class LRUCache(MutableMapping[K, V]):
|
||||||
def __init__(self, capacity, dispose):
|
"""
|
||||||
self._cache = {}
|
Mapping which maintains a maximum size by dropping the least recently used value.
|
||||||
self._lru = []
|
Items are passed to dispose before being removed and replacing an item without
|
||||||
|
removing it first is forbidden.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_cache: OrderedDict[K, V]
|
||||||
|
|
||||||
|
_capacity: int
|
||||||
|
|
||||||
|
_dispose: Callable[[V], None]
|
||||||
|
|
||||||
|
def __init__(self, capacity: int, dispose: Callable[[V], None] = lambda _: None):
|
||||||
|
self._cache = OrderedDict()
|
||||||
self._capacity = capacity
|
self._capacity = capacity
|
||||||
self._dispose = dispose
|
self._dispose = dispose
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key: K, value: V) -> None:
|
||||||
assert key not in self._cache, (
|
assert key not in self._cache, (
|
||||||
"Unexpected attempt to replace a cached item," " without first deleting the old item."
|
"Unexpected attempt to replace a cached item," " without first deleting the old item."
|
||||||
)
|
)
|
||||||
self._lru.append(key)
|
while len(self._cache) >= self._capacity:
|
||||||
while len(self._lru) > self._capacity:
|
self._dispose(self._cache.popitem(last=False)[1])
|
||||||
del self[self._lru[0]]
|
|
||||||
self._cache[key] = value
|
self._cache[key] = value
|
||||||
|
self._cache.move_to_end(key)
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key: K) -> V:
|
||||||
value = self._cache[key] # raise KeyError if not found
|
self._cache.move_to_end(key) # raise KeyError if not found
|
||||||
self._lru.remove(key)
|
return self._cache[key]
|
||||||
self._lru.append(key)
|
|
||||||
return value
|
|
||||||
|
|
||||||
def __delitem__(self, key):
|
def __delitem__(self, key: K) -> None:
|
||||||
value = self._cache.pop(key) # raise KeyError if not found
|
self._dispose(self._cache.pop(key))
|
||||||
self._dispose(value)
|
|
||||||
self._lru.remove(key)
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key: object) -> bool:
|
||||||
return key in self._cache
|
return key in self._cache
|
||||||
|
|
||||||
def get(self, key, default=None):
|
def __len__(self) -> int:
|
||||||
value = self._cache.get(key, sentinel)
|
return len(self._cache)
|
||||||
if value is sentinel:
|
|
||||||
return default
|
|
||||||
self._lru.remove(key)
|
|
||||||
self._lru.append(key)
|
|
||||||
return value
|
|
||||||
|
|
||||||
def upd(self, key, value):
|
def replace(self, key: K, value: V) -> None:
|
||||||
# special use only: update the value for an existing key without having to dispose it first
|
"""Replace an item which is already present, not disposing it in the process"""
|
||||||
# this method complements __setitem__ which should be used for the normal use case.
|
# this method complements __setitem__ which should be used for the normal use case.
|
||||||
assert key in self._cache, "Unexpected attempt to update a non-existing item."
|
assert key in self._cache, "Unexpected attempt to update a non-existing item."
|
||||||
self._cache[key] = value
|
self._cache[key] = value
|
||||||
|
|
||||||
def clear(self):
|
def clear(self) -> None:
|
||||||
for value in self._cache.values():
|
for value in self._cache.values():
|
||||||
self._dispose(value)
|
self._dispose(value)
|
||||||
self._cache.clear()
|
self._cache.clear()
|
||||||
|
|
||||||
def items(self):
|
def __iter__(self) -> Iterator[K]:
|
||||||
return self._cache.items()
|
return iter(self._cache)
|
||||||
|
|
||||||
def __len__(self):
|
def keys(self) -> KeysView[K]:
|
||||||
return len(self._cache)
|
return self._cache.keys()
|
||||||
|
|
||||||
|
def values(self) -> ValuesView[V]:
|
||||||
|
return self._cache.values()
|
||||||
|
|
||||||
|
def items(self) -> ItemsView[K, V]:
|
||||||
|
return self._cache.items()
|
||||||
|
|
|
@ -1536,7 +1536,7 @@ def clean_old():
|
||||||
else:
|
else:
|
||||||
# we only have fresh enough stuff here.
|
# we only have fresh enough stuff here.
|
||||||
# update the timestamp of the lru cache entry.
|
# update the timestamp of the lru cache entry.
|
||||||
self.fds.upd(segment, (now, fd))
|
self.fds.replace(segment, (now, fd))
|
||||||
return fd
|
return fd
|
||||||
|
|
||||||
def close_segment(self):
|
def close_segment(self):
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
class TestLRUCache:
|
class TestLRUCache:
|
||||||
def test_lrucache(self):
|
def test_lrucache(self):
|
||||||
c = LRUCache(2, dispose=lambda _: None)
|
c = LRUCache(2)
|
||||||
assert len(c) == 0
|
assert len(c) == 0
|
||||||
assert c.items() == set()
|
assert c.items() == set()
|
||||||
for i, x in enumerate("abc"):
|
for i, x in enumerate("abc"):
|
||||||
|
|
Loading…
Reference in a new issue