mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-01 12:45:34 +00:00
replace LRUCache
internals with OrderedDict
Replacing the internals should make the implementation faster and simpler since the order tracking is done by the `OrderedDict`. Furthermore, this commit adds type hints to `LRUCache` and renames the `upd` method to `replace` to make its use more clear.
This commit is contained in:
parent
ac4337a921
commit
e683c80c75
5 changed files with 54 additions and 40 deletions
|
@ -1236,7 +1236,7 @@ def stat_attrs(self, st, path, fd=None):
|
|||
# (hash_func, chunk_length) -> chunk_hash
|
||||
# we play safe and have the hash_func in the mapping key, in case we
|
||||
# have different hash_funcs within the same borg run.
|
||||
zero_chunk_ids = LRUCache(10, dispose=lambda _: None)
|
||||
zero_chunk_ids = LRUCache(10) # type: ignore[var-annotated]
|
||||
|
||||
|
||||
def cached_hash(chunk, id_hash):
|
||||
|
|
|
@ -115,7 +115,7 @@ def __init__(self, decrypted_repository):
|
|||
# tend to re-read the same chunks over and over.
|
||||
# The capacity is kept low because increasing it does not provide any significant advantage,
|
||||
# but makes LRUCache's square behaviour noticeable and consumes more memory.
|
||||
self.chunks = LRUCache(capacity=10, dispose=lambda _: None)
|
||||
self.chunks = LRUCache(capacity=10)
|
||||
|
||||
# Instrumentation
|
||||
# Count of indirect items, i.e. data is cached in the object cache, not directly in this cache
|
||||
|
@ -252,7 +252,7 @@ def __init__(self, manifest, args, decrypted_repository):
|
|||
# not contained in archives.
|
||||
self._items = {}
|
||||
# cache up to <FILES> Items
|
||||
self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None)
|
||||
self._inode_cache = LRUCache(capacity=FILES)
|
||||
# _inode_count is the current count of synthetic inodes, i.e. those in self._items
|
||||
self.inode_count = 0
|
||||
# Maps inode numbers to the inode number of the parent
|
||||
|
@ -445,8 +445,8 @@ def __init__(self, manifest, args, decrypted_repository):
|
|||
self.decrypted_repository = decrypted_repository
|
||||
data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1))
|
||||
logger.debug("mount data cache capacity: %d chunks", data_cache_capacity)
|
||||
self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
|
||||
self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None)
|
||||
self.data_cache = LRUCache(capacity=data_cache_capacity)
|
||||
self._last_pos = LRUCache(capacity=FILES)
|
||||
|
||||
def sig_info_handler(self, sig_no, stack):
|
||||
logger.debug(
|
||||
|
@ -689,7 +689,7 @@ def read(self, fh, offset, size):
|
|||
size -= n
|
||||
if not size:
|
||||
if fh in self._last_pos:
|
||||
self._last_pos.upd(fh, (chunk_no, chunk_offset))
|
||||
self._last_pos.replace(fh, (chunk_no, chunk_offset))
|
||||
else:
|
||||
self._last_pos[fh] = (chunk_no, chunk_offset)
|
||||
break
|
||||
|
|
|
@ -1,57 +1,71 @@
|
|||
from collections import OrderedDict
|
||||
from collections.abc import Callable, ItemsView, Iterator, KeysView, MutableMapping, ValuesView
|
||||
from typing import TypeVar
|
||||
|
||||
sentinel = object()
|
||||
K = TypeVar("K")
|
||||
V = TypeVar("V")
|
||||
|
||||
|
||||
class LRUCache:
|
||||
def __init__(self, capacity, dispose):
|
||||
self._cache = {}
|
||||
self._lru = []
|
||||
class LRUCache(MutableMapping[K, V]):
|
||||
"""
|
||||
Mapping which maintains a maximum size by dropping the least recently used value.
|
||||
Items are passed to dispose before being removed and replacing an item without
|
||||
removing it first is forbidden.
|
||||
"""
|
||||
|
||||
_cache: OrderedDict[K, V]
|
||||
|
||||
_capacity: int
|
||||
|
||||
_dispose: Callable[[V], None]
|
||||
|
||||
def __init__(self, capacity: int, dispose: Callable[[V], None] = lambda _: None):
|
||||
self._cache = OrderedDict()
|
||||
self._capacity = capacity
|
||||
self._dispose = dispose
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
def __setitem__(self, key: K, value: V) -> None:
|
||||
assert key not in self._cache, (
|
||||
"Unexpected attempt to replace a cached item," " without first deleting the old item."
|
||||
)
|
||||
self._lru.append(key)
|
||||
while len(self._lru) > self._capacity:
|
||||
del self[self._lru[0]]
|
||||
while len(self._cache) >= self._capacity:
|
||||
self._dispose(self._cache.popitem(last=False)[1])
|
||||
self._cache[key] = value
|
||||
self._cache.move_to_end(key)
|
||||
|
||||
def __getitem__(self, key):
|
||||
value = self._cache[key] # raise KeyError if not found
|
||||
self._lru.remove(key)
|
||||
self._lru.append(key)
|
||||
return value
|
||||
def __getitem__(self, key: K) -> V:
|
||||
self._cache.move_to_end(key) # raise KeyError if not found
|
||||
return self._cache[key]
|
||||
|
||||
def __delitem__(self, key):
|
||||
value = self._cache.pop(key) # raise KeyError if not found
|
||||
self._dispose(value)
|
||||
self._lru.remove(key)
|
||||
def __delitem__(self, key: K) -> None:
|
||||
self._dispose(self._cache.pop(key))
|
||||
|
||||
def __contains__(self, key):
|
||||
def __contains__(self, key: object) -> bool:
|
||||
return key in self._cache
|
||||
|
||||
def get(self, key, default=None):
|
||||
value = self._cache.get(key, sentinel)
|
||||
if value is sentinel:
|
||||
return default
|
||||
self._lru.remove(key)
|
||||
self._lru.append(key)
|
||||
return value
|
||||
def __len__(self) -> int:
|
||||
return len(self._cache)
|
||||
|
||||
def upd(self, key, value):
|
||||
# special use only: update the value for an existing key without having to dispose it first
|
||||
def replace(self, key: K, value: V) -> None:
|
||||
"""Replace an item which is already present, not disposing it in the process"""
|
||||
# this method complements __setitem__ which should be used for the normal use case.
|
||||
assert key in self._cache, "Unexpected attempt to update a non-existing item."
|
||||
self._cache[key] = value
|
||||
|
||||
def clear(self):
|
||||
def clear(self) -> None:
|
||||
for value in self._cache.values():
|
||||
self._dispose(value)
|
||||
self._cache.clear()
|
||||
|
||||
def items(self):
|
||||
return self._cache.items()
|
||||
def __iter__(self) -> Iterator[K]:
|
||||
return iter(self._cache)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._cache)
|
||||
def keys(self) -> KeysView[K]:
|
||||
return self._cache.keys()
|
||||
|
||||
def values(self) -> ValuesView[V]:
|
||||
return self._cache.values()
|
||||
|
||||
def items(self) -> ItemsView[K, V]:
|
||||
return self._cache.items()
|
||||
|
|
|
@ -1536,7 +1536,7 @@ def clean_old():
|
|||
else:
|
||||
# we only have fresh enough stuff here.
|
||||
# update the timestamp of the lru cache entry.
|
||||
self.fds.upd(segment, (now, fd))
|
||||
self.fds.replace(segment, (now, fd))
|
||||
return fd
|
||||
|
||||
def close_segment(self):
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
class TestLRUCache:
|
||||
def test_lrucache(self):
|
||||
c = LRUCache(2, dispose=lambda _: None)
|
||||
c = LRUCache(2)
|
||||
assert len(c) == 0
|
||||
assert c.items() == set()
|
||||
for i, x in enumerate("abc"):
|
||||
|
|
Loading…
Reference in a new issue