replace `LRUCache` internals with `OrderedDict`

Replacing the internals should make the implementation faster and simpler since the order tracking is done by the `OrderedDict`. Furthermore, this commit adds type hints to `LRUCache` and renames the `upd` method to `replace` to make its use more clear.
2023-06-10 18:16:40 +02:00 · 2023-06-10 18:16:40 +02:00 · e683c80c75
parent ac4337a921
commit e683c80c75
5 changed files with 54 additions and 40 deletions
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -1236,7 +1236,7 @@ class MetadataCollector:
 # (hash_func, chunk_length) -> chunk_hash
 # we play safe and have the hash_func in the mapping key, in case we
 # have different hash_funcs within the same borg run.
-zero_chunk_ids = LRUCache(10, dispose=lambda _: None)
+zero_chunk_ids = LRUCache(10)  # type: ignore[var-annotated]


 def cached_hash(chunk, id_hash):
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@ -115,7 +115,7 @@ class ItemCache:
        # tend to re-read the same chunks over and over.
        # The capacity is kept low because increasing it does not provide any significant advantage,
        # but makes LRUCache's square behaviour noticeable and consumes more memory.
-        self.chunks = LRUCache(capacity=10, dispose=lambda _: None)
+        self.chunks = LRUCache(capacity=10)

        # Instrumentation
        # Count of indirect items, i.e. data is cached in the object cache, not directly in this cache
@ -252,7 +252,7 @@ class FuseBackend:
        # not contained in archives.
        self._items = {}
        # cache up to <FILES> Items
-        self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None)
+        self._inode_cache = LRUCache(capacity=FILES)
        # _inode_count is the current count of synthetic inodes, i.e. those in self._items
        self.inode_count = 0
        # Maps inode numbers to the inode number of the parent
@ -445,8 +445,8 @@ class FuseOperations(llfuse.Operations, FuseBackend):
        self.decrypted_repository = decrypted_repository
        data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1))
        logger.debug("mount data cache capacity: %d chunks", data_cache_capacity)
-        self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
-        self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None)
+        self.data_cache = LRUCache(capacity=data_cache_capacity)
+        self._last_pos = LRUCache(capacity=FILES)

    def sig_info_handler(self, sig_no, stack):
        logger.debug(
@ -689,7 +689,7 @@ class FuseOperations(llfuse.Operations, FuseBackend):
            size -= n
            if not size:
                if fh in self._last_pos:
-                    self._last_pos.upd(fh, (chunk_no, chunk_offset))
+                    self._last_pos.replace(fh, (chunk_no, chunk_offset))
                else:
                    self._last_pos[fh] = (chunk_no, chunk_offset)
                break
--- a/src/borg/helpers/lrucache.py
+++ b/src/borg/helpers/lrucache.py
@ -1,57 +1,71 @@
+from collections import OrderedDict
+from collections.abc import Callable, ItemsView, Iterator, KeysView, MutableMapping, ValuesView
+from typing import TypeVar
+
 sentinel = object()
+K = TypeVar("K")
+V = TypeVar("V")


-class LRUCache:
-    def __init__(self, capacity, dispose):
-        self._cache = {}
-        self._lru = []
+class LRUCache(MutableMapping[K, V]):
+    """
+    Mapping which maintains a maximum size by dropping the least recently used value.
+    Items are passed to dispose before being removed and replacing an item without
+    removing it first is forbidden.
+    """
+
+    _cache: OrderedDict[K, V]
+
+    _capacity: int
+
+    _dispose: Callable[[V], None]
+
+    def __init__(self, capacity: int, dispose: Callable[[V], None] = lambda _: None):
+        self._cache = OrderedDict()
        self._capacity = capacity
        self._dispose = dispose

-    def __setitem__(self, key, value):
+    def __setitem__(self, key: K, value: V) -> None:
        assert key not in self._cache, (
            "Unexpected attempt to replace a cached item," " without first deleting the old item."
        )
-        self._lru.append(key)
-        while len(self._lru) > self._capacity:
-            del self[self._lru[0]]
+        while len(self._cache) >= self._capacity:
+            self._dispose(self._cache.popitem(last=False)[1])
        self._cache[key] = value
+        self._cache.move_to_end(key)

-    def __getitem__(self, key):
-        value = self._cache[key]  # raise KeyError if not found
-        self._lru.remove(key)
-        self._lru.append(key)
-        return value
+    def __getitem__(self, key: K) -> V:
+        self._cache.move_to_end(key)  # raise KeyError if not found
+        return self._cache[key]

-    def __delitem__(self, key):
-        value = self._cache.pop(key)  # raise KeyError if not found
-        self._dispose(value)
-        self._lru.remove(key)
+    def __delitem__(self, key: K) -> None:
+        self._dispose(self._cache.pop(key))

-    def __contains__(self, key):
+    def __contains__(self, key: object) -> bool:
        return key in self._cache

-    def get(self, key, default=None):
-        value = self._cache.get(key, sentinel)
-        if value is sentinel:
-            return default
-        self._lru.remove(key)
-        self._lru.append(key)
-        return value
+    def __len__(self) -> int:
+        return len(self._cache)

-    def upd(self, key, value):
-        # special use only: update the value for an existing key without having to dispose it first
+    def replace(self, key: K, value: V) -> None:
+        """Replace an item which is already present, not disposing it in the process"""
        # this method complements __setitem__ which should be used for the normal use case.
        assert key in self._cache, "Unexpected attempt to update a non-existing item."
        self._cache[key] = value

-    def clear(self):
+    def clear(self) -> None:
        for value in self._cache.values():
            self._dispose(value)
        self._cache.clear()

-    def items(self):
-        return self._cache.items()
+    def __iter__(self) -> Iterator[K]:
+        return iter(self._cache)

-    def __len__(self):
-        return len(self._cache)
+    def keys(self) -> KeysView[K]:
+        return self._cache.keys()
+
+    def values(self) -> ValuesView[V]:
+        return self._cache.values()
+
+    def items(self) -> ItemsView[K, V]:
+        return self._cache.items()
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@ -1536,7 +1536,7 @@ class LoggedIO:
        else:
            # we only have fresh enough stuff here.
            # update the timestamp of the lru cache entry.
-            self.fds.upd(segment, (now, fd))
+            self.fds.replace(segment, (now, fd))
        return fd

    def close_segment(self):
--- a/src/borg/testsuite/lrucache.py
+++ b/src/borg/testsuite/lrucache.py
@ -7,7 +7,7 @@ from ..helpers.lrucache import LRUCache

 class TestLRUCache:
    def test_lrucache(self):
-        c = LRUCache(2, dispose=lambda _: None)
+        c = LRUCache(2)
        assert len(c) == 0
        assert c.items() == set()
        for i, x in enumerate("abc"):