replace `LRUCache` internals with `OrderedDict`

Replacing the internals should make the implementation faster
and simpler since the order tracking is done by the `OrderedDict`.

Furthermore, this commit adds type hints to `LRUCache` and
renames the `upd` method to `replace` to make its use more clear.
This commit is contained in:
Eric Wolf 2023-06-10 18:16:40 +02:00
parent ac4337a921
commit e683c80c75
No known key found for this signature in database
GPG Key ID: 80D2DA428A4A537F
5 changed files with 54 additions and 40 deletions

View File

@ -1236,7 +1236,7 @@ class MetadataCollector:
# (hash_func, chunk_length) -> chunk_hash
# we play safe and have the hash_func in the mapping key, in case we
# have different hash_funcs within the same borg run.
zero_chunk_ids = LRUCache(10, dispose=lambda _: None)
zero_chunk_ids = LRUCache(10) # type: ignore[var-annotated]
def cached_hash(chunk, id_hash):

View File

@ -115,7 +115,7 @@ class ItemCache:
# tend to re-read the same chunks over and over.
# The capacity is kept low because increasing it does not provide any significant advantage,
# but makes LRUCache's square behaviour noticeable and consumes more memory.
self.chunks = LRUCache(capacity=10, dispose=lambda _: None)
self.chunks = LRUCache(capacity=10)
# Instrumentation
# Count of indirect items, i.e. data is cached in the object cache, not directly in this cache
@ -252,7 +252,7 @@ class FuseBackend:
# not contained in archives.
self._items = {}
# cache up to <FILES> Items
self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None)
self._inode_cache = LRUCache(capacity=FILES)
# _inode_count is the current count of synthetic inodes, i.e. those in self._items
self.inode_count = 0
# Maps inode numbers to the inode number of the parent
@ -445,8 +445,8 @@ class FuseOperations(llfuse.Operations, FuseBackend):
self.decrypted_repository = decrypted_repository
data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1))
logger.debug("mount data cache capacity: %d chunks", data_cache_capacity)
self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None)
self.data_cache = LRUCache(capacity=data_cache_capacity)
self._last_pos = LRUCache(capacity=FILES)
def sig_info_handler(self, sig_no, stack):
logger.debug(
@ -689,7 +689,7 @@ class FuseOperations(llfuse.Operations, FuseBackend):
size -= n
if not size:
if fh in self._last_pos:
self._last_pos.upd(fh, (chunk_no, chunk_offset))
self._last_pos.replace(fh, (chunk_no, chunk_offset))
else:
self._last_pos[fh] = (chunk_no, chunk_offset)
break

View File

@ -1,57 +1,71 @@
from collections import OrderedDict
from collections.abc import Callable, ItemsView, Iterator, KeysView, MutableMapping, ValuesView
from typing import TypeVar
sentinel = object()
K = TypeVar("K")
V = TypeVar("V")
class LRUCache:
def __init__(self, capacity, dispose):
self._cache = {}
self._lru = []
class LRUCache(MutableMapping[K, V]):
"""
Mapping which maintains a maximum size by dropping the least recently used value.
Items are passed to dispose before being removed and replacing an item without
removing it first is forbidden.
"""
_cache: OrderedDict[K, V]
_capacity: int
_dispose: Callable[[V], None]
def __init__(self, capacity: int, dispose: Callable[[V], None] = lambda _: None):
self._cache = OrderedDict()
self._capacity = capacity
self._dispose = dispose
def __setitem__(self, key, value):
def __setitem__(self, key: K, value: V) -> None:
assert key not in self._cache, (
"Unexpected attempt to replace a cached item," " without first deleting the old item."
)
self._lru.append(key)
while len(self._lru) > self._capacity:
del self[self._lru[0]]
while len(self._cache) >= self._capacity:
self._dispose(self._cache.popitem(last=False)[1])
self._cache[key] = value
self._cache.move_to_end(key)
def __getitem__(self, key):
value = self._cache[key] # raise KeyError if not found
self._lru.remove(key)
self._lru.append(key)
return value
def __getitem__(self, key: K) -> V:
self._cache.move_to_end(key) # raise KeyError if not found
return self._cache[key]
def __delitem__(self, key):
value = self._cache.pop(key) # raise KeyError if not found
self._dispose(value)
self._lru.remove(key)
def __delitem__(self, key: K) -> None:
self._dispose(self._cache.pop(key))
def __contains__(self, key):
def __contains__(self, key: object) -> bool:
return key in self._cache
def get(self, key, default=None):
value = self._cache.get(key, sentinel)
if value is sentinel:
return default
self._lru.remove(key)
self._lru.append(key)
return value
def __len__(self) -> int:
return len(self._cache)
def upd(self, key, value):
# special use only: update the value for an existing key without having to dispose it first
def replace(self, key: K, value: V) -> None:
"""Replace an item which is already present, not disposing it in the process"""
# this method complements __setitem__ which should be used for the normal use case.
assert key in self._cache, "Unexpected attempt to update a non-existing item."
self._cache[key] = value
def clear(self):
def clear(self) -> None:
for value in self._cache.values():
self._dispose(value)
self._cache.clear()
def items(self):
return self._cache.items()
def __iter__(self) -> Iterator[K]:
return iter(self._cache)
def __len__(self):
return len(self._cache)
def keys(self) -> KeysView[K]:
return self._cache.keys()
def values(self) -> ValuesView[V]:
return self._cache.values()
def items(self) -> ItemsView[K, V]:
return self._cache.items()

View File

@ -1536,7 +1536,7 @@ class LoggedIO:
else:
# we only have fresh enough stuff here.
# update the timestamp of the lru cache entry.
self.fds.upd(segment, (now, fd))
self.fds.replace(segment, (now, fd))
return fd
def close_segment(self):

View File

@ -7,7 +7,7 @@ from ..helpers.lrucache import LRUCache
class TestLRUCache:
def test_lrucache(self):
c = LRUCache(2, dispose=lambda _: None)
c = LRUCache(2)
assert len(c) == 0
assert c.items() == set()
for i, x in enumerate("abc"):