From e683c80c75397fbd0970ebfb45c04626508c9db4 Mon Sep 17 00:00:00 2001 From: Eric Wolf Date: Sat, 10 Jun 2023 18:16:40 +0200 Subject: [PATCH] replace `LRUCache` internals with `OrderedDict` Replacing the internals should make the implementation faster and simpler since the order tracking is done by the `OrderedDict`. Furthermore, this commit adds type hints to `LRUCache` and renames the `upd` method to `replace` to make its use more clear. --- src/borg/archive.py | 2 +- src/borg/fuse.py | 10 ++--- src/borg/helpers/lrucache.py | 78 ++++++++++++++++++++-------------- src/borg/repository.py | 2 +- src/borg/testsuite/lrucache.py | 2 +- 5 files changed, 54 insertions(+), 40 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 3caf50608..a9036ba08 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1236,7 +1236,7 @@ def stat_attrs(self, st, path, fd=None): # (hash_func, chunk_length) -> chunk_hash # we play safe and have the hash_func in the mapping key, in case we # have different hash_funcs within the same borg run. -zero_chunk_ids = LRUCache(10, dispose=lambda _: None) +zero_chunk_ids = LRUCache(10) # type: ignore[var-annotated] def cached_hash(chunk, id_hash): diff --git a/src/borg/fuse.py b/src/borg/fuse.py index a748aec19..c03f31ee1 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -115,7 +115,7 @@ def __init__(self, decrypted_repository): # tend to re-read the same chunks over and over. # The capacity is kept low because increasing it does not provide any significant advantage, # but makes LRUCache's square behaviour noticeable and consumes more memory. - self.chunks = LRUCache(capacity=10, dispose=lambda _: None) + self.chunks = LRUCache(capacity=10) # Instrumentation # Count of indirect items, i.e. data is cached in the object cache, not directly in this cache @@ -252,7 +252,7 @@ def __init__(self, manifest, args, decrypted_repository): # not contained in archives. self._items = {} # cache up to Items - self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None) + self._inode_cache = LRUCache(capacity=FILES) # _inode_count is the current count of synthetic inodes, i.e. those in self._items self.inode_count = 0 # Maps inode numbers to the inode number of the parent @@ -445,8 +445,8 @@ def __init__(self, manifest, args, decrypted_repository): self.decrypted_repository = decrypted_repository data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1)) logger.debug("mount data cache capacity: %d chunks", data_cache_capacity) - self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None) - self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None) + self.data_cache = LRUCache(capacity=data_cache_capacity) + self._last_pos = LRUCache(capacity=FILES) def sig_info_handler(self, sig_no, stack): logger.debug( @@ -689,7 +689,7 @@ def read(self, fh, offset, size): size -= n if not size: if fh in self._last_pos: - self._last_pos.upd(fh, (chunk_no, chunk_offset)) + self._last_pos.replace(fh, (chunk_no, chunk_offset)) else: self._last_pos[fh] = (chunk_no, chunk_offset) break diff --git a/src/borg/helpers/lrucache.py b/src/borg/helpers/lrucache.py index 097fe4c52..a57abf7e3 100644 --- a/src/borg/helpers/lrucache.py +++ b/src/borg/helpers/lrucache.py @@ -1,57 +1,71 @@ +from collections import OrderedDict +from collections.abc import Callable, ItemsView, Iterator, KeysView, MutableMapping, ValuesView +from typing import TypeVar + sentinel = object() +K = TypeVar("K") +V = TypeVar("V") -class LRUCache: - def __init__(self, capacity, dispose): - self._cache = {} - self._lru = [] +class LRUCache(MutableMapping[K, V]): + """ + Mapping which maintains a maximum size by dropping the least recently used value. + Items are passed to dispose before being removed and replacing an item without + removing it first is forbidden. + """ + + _cache: OrderedDict[K, V] + + _capacity: int + + _dispose: Callable[[V], None] + + def __init__(self, capacity: int, dispose: Callable[[V], None] = lambda _: None): + self._cache = OrderedDict() self._capacity = capacity self._dispose = dispose - def __setitem__(self, key, value): + def __setitem__(self, key: K, value: V) -> None: assert key not in self._cache, ( "Unexpected attempt to replace a cached item," " without first deleting the old item." ) - self._lru.append(key) - while len(self._lru) > self._capacity: - del self[self._lru[0]] + while len(self._cache) >= self._capacity: + self._dispose(self._cache.popitem(last=False)[1]) self._cache[key] = value + self._cache.move_to_end(key) - def __getitem__(self, key): - value = self._cache[key] # raise KeyError if not found - self._lru.remove(key) - self._lru.append(key) - return value + def __getitem__(self, key: K) -> V: + self._cache.move_to_end(key) # raise KeyError if not found + return self._cache[key] - def __delitem__(self, key): - value = self._cache.pop(key) # raise KeyError if not found - self._dispose(value) - self._lru.remove(key) + def __delitem__(self, key: K) -> None: + self._dispose(self._cache.pop(key)) - def __contains__(self, key): + def __contains__(self, key: object) -> bool: return key in self._cache - def get(self, key, default=None): - value = self._cache.get(key, sentinel) - if value is sentinel: - return default - self._lru.remove(key) - self._lru.append(key) - return value + def __len__(self) -> int: + return len(self._cache) - def upd(self, key, value): - # special use only: update the value for an existing key without having to dispose it first + def replace(self, key: K, value: V) -> None: + """Replace an item which is already present, not disposing it in the process""" # this method complements __setitem__ which should be used for the normal use case. assert key in self._cache, "Unexpected attempt to update a non-existing item." self._cache[key] = value - def clear(self): + def clear(self) -> None: for value in self._cache.values(): self._dispose(value) self._cache.clear() - def items(self): - return self._cache.items() + def __iter__(self) -> Iterator[K]: + return iter(self._cache) - def __len__(self): - return len(self._cache) + def keys(self) -> KeysView[K]: + return self._cache.keys() + + def values(self) -> ValuesView[V]: + return self._cache.values() + + def items(self) -> ItemsView[K, V]: + return self._cache.items() diff --git a/src/borg/repository.py b/src/borg/repository.py index c2340d401..82b7bccdf 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -1536,7 +1536,7 @@ def clean_old(): else: # we only have fresh enough stuff here. # update the timestamp of the lru cache entry. - self.fds.upd(segment, (now, fd)) + self.fds.replace(segment, (now, fd)) return fd def close_segment(self): diff --git a/src/borg/testsuite/lrucache.py b/src/borg/testsuite/lrucache.py index 6b0d661e3..a3b13c1b7 100644 --- a/src/borg/testsuite/lrucache.py +++ b/src/borg/testsuite/lrucache.py @@ -7,7 +7,7 @@ class TestLRUCache: def test_lrucache(self): - c = LRUCache(2, dispose=lambda _: None) + c = LRUCache(2) assert len(c) == 0 assert c.items() == set() for i, x in enumerate("abc"):