replace LRUCache internals with OrderedDict

Replacing the internals should make the implementation faster and simpler since the order tracking is done by the `OrderedDict`. Furthermore, this commit adds type hints to `LRUCache` and renames the `upd` method to `replace` to make its use more clear.
2025-01-01 12:45:34 +00:00 · 2023-06-10 18:16:40 +02:00 · 2023-06-10 18:16:40 +02:00 · e683c80c75
commit e683c80c75
parent ac4337a921
5 changed files with 54 additions and 40 deletions
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -1236,7 +1236,7 @@ def stat_attrs(self, st, path, fd=None):
 # (hash_func, chunk_length) -> chunk_hash
 # we play safe and have the hash_func in the mapping key, in case we
 # have different hash_funcs within the same borg run.
-zero_chunk_ids = LRUCache(10, dispose=lambda _: None)
+zero_chunk_ids = LRUCache(10)  # type: ignore[var-annotated]
 def cached_hash(chunk, id_hash):
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@ -115,7 +115,7 @@ def __init__(self, decrypted_repository):
        # tend to re-read the same chunks over and over.
        # The capacity is kept low because increasing it does not provide any significant advantage,
        # but makes LRUCache's square behaviour noticeable and consumes more memory.
-        self.chunks = LRUCache(capacity=10, dispose=lambda _: None)
+        self.chunks = LRUCache(capacity=10)
        # Instrumentation
        # Count of indirect items, i.e. data is cached in the object cache, not directly in this cache
@ -252,7 +252,7 @@ def __init__(self, manifest, args, decrypted_repository):
        # not contained in archives.
        self._items = {}
        # cache up to <FILES> Items
-        self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None)
+        self._inode_cache = LRUCache(capacity=FILES)
        # _inode_count is the current count of synthetic inodes, i.e. those in self._items
        self.inode_count = 0
        # Maps inode numbers to the inode number of the parent
@ -445,8 +445,8 @@ def __init__(self, manifest, args, decrypted_repository):
        self.decrypted_repository = decrypted_repository
        data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1))
        logger.debug("mount data cache capacity: %d chunks", data_cache_capacity)
-        self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
+        self.data_cache = LRUCache(capacity=data_cache_capacity)
-        self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None)
+        self._last_pos = LRUCache(capacity=FILES)
    def sig_info_handler(self, sig_no, stack):
        logger.debug(
@ -689,7 +689,7 @@ def read(self, fh, offset, size):
            size -= n
            if not size:
                if fh in self._last_pos:
-                    self._last_pos.upd(fh, (chunk_no, chunk_offset))
+                    self._last_pos.replace(fh, (chunk_no, chunk_offset))
                else:
                    self._last_pos[fh] = (chunk_no, chunk_offset)
                break
--- a/src/borg/helpers/lrucache.py
+++ b/src/borg/helpers/lrucache.py
@ -1,57 +1,71 @@
 from collections import OrderedDict
 from collections.abc import Callable, ItemsView, Iterator, KeysView, MutableMapping, ValuesView
 from typing import TypeVar
 sentinel = object()
 K = TypeVar("K")
 V = TypeVar("V")
-class LRUCache:
+class LRUCache(MutableMapping[K, V]):
-    def __init__(self, capacity, dispose):
+    """
-        self._cache = {}
+    Mapping which maintains a maximum size by dropping the least recently used value.
-        self._lru = []
+    Items are passed to dispose before being removed and replacing an item without
    removing it first is forbidden.
    """
    _cache: OrderedDict[K, V]
    _capacity: int
    _dispose: Callable[[V], None]
    def __init__(self, capacity: int, dispose: Callable[[V], None] = lambda _: None):
        self._cache = OrderedDict()
        self._capacity = capacity
        self._dispose = dispose
-    def __setitem__(self, key, value):
+    def __setitem__(self, key: K, value: V) -> None:
        assert key not in self._cache, (
            "Unexpected attempt to replace a cached item," " without first deleting the old item."
        )
-        self._lru.append(key)
+        while len(self._cache) >= self._capacity:
-        while len(self._lru) > self._capacity:
+            self._dispose(self._cache.popitem(last=False)[1])
            del self[self._lru[0]]
        self._cache[key] = value
        self._cache.move_to_end(key)
-    def __getitem__(self, key):
+    def __getitem__(self, key: K) -> V:
-        value = self._cache[key]  # raise KeyError if not found
+        self._cache.move_to_end(key)  # raise KeyError if not found
-        self._lru.remove(key)
+        return self._cache[key]
        self._lru.append(key)
        return value
-    def __delitem__(self, key):
+    def __delitem__(self, key: K) -> None:
-        value = self._cache.pop(key)  # raise KeyError if not found
+        self._dispose(self._cache.pop(key))
        self._dispose(value)
        self._lru.remove(key)
-    def __contains__(self, key):
+    def __contains__(self, key: object) -> bool:
        return key in self._cache
-    def get(self, key, default=None):
+    def __len__(self) -> int:
-        value = self._cache.get(key, sentinel)
+        return len(self._cache)
        if value is sentinel:
            return default
        self._lru.remove(key)
        self._lru.append(key)
        return value
-    def upd(self, key, value):
+    def replace(self, key: K, value: V) -> None:
-        # special use only: update the value for an existing key without having to dispose it first
+        """Replace an item which is already present, not disposing it in the process"""
        # this method complements __setitem__ which should be used for the normal use case.
        assert key in self._cache, "Unexpected attempt to update a non-existing item."
        self._cache[key] = value
-    def clear(self):
+    def clear(self) -> None:
        for value in self._cache.values():
            self._dispose(value)
        self._cache.clear()
-    def items(self):
+    def __iter__(self) -> Iterator[K]:
-        return self._cache.items()
+        return iter(self._cache)
-    def __len__(self):
+    def keys(self) -> KeysView[K]:
-        return len(self._cache)
+        return self._cache.keys()
    def values(self) -> ValuesView[V]:
        return self._cache.values()
    def items(self) -> ItemsView[K, V]:
        return self._cache.items()
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@ -1536,7 +1536,7 @@ def clean_old():
        else:
            # we only have fresh enough stuff here.
            # update the timestamp of the lru cache entry.
-            self.fds.upd(segment, (now, fd))
+            self.fds.replace(segment, (now, fd))
        return fd
    def close_segment(self):
--- a/src/borg/testsuite/lrucache.py
+++ b/src/borg/testsuite/lrucache.py
@ -7,7 +7,7 @@
 class TestLRUCache:
    def test_lrucache(self):
-        c = LRUCache(2, dispose=lambda _: None)
+        c = LRUCache(2)
        assert len(c) == 0
        assert c.items() == set()
        for i, x in enumerate("abc"):