1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-01 12:45:34 +00:00

replace LRUCache internals with OrderedDict

Replacing the internals should make the implementation faster
and simpler since the order tracking is done by the `OrderedDict`.

Furthermore, this commit adds type hints to `LRUCache` and
renames the `upd` method to `replace` to make its use more clear.
This commit is contained in:
Eric Wolf 2023-06-10 18:16:40 +02:00
parent ac4337a921
commit e683c80c75
No known key found for this signature in database
GPG key ID: 80D2DA428A4A537F
5 changed files with 54 additions and 40 deletions

View file

@ -1236,7 +1236,7 @@ def stat_attrs(self, st, path, fd=None):
# (hash_func, chunk_length) -> chunk_hash # (hash_func, chunk_length) -> chunk_hash
# we play safe and have the hash_func in the mapping key, in case we # we play safe and have the hash_func in the mapping key, in case we
# have different hash_funcs within the same borg run. # have different hash_funcs within the same borg run.
zero_chunk_ids = LRUCache(10, dispose=lambda _: None) zero_chunk_ids = LRUCache(10) # type: ignore[var-annotated]
def cached_hash(chunk, id_hash): def cached_hash(chunk, id_hash):

View file

@ -115,7 +115,7 @@ def __init__(self, decrypted_repository):
# tend to re-read the same chunks over and over. # tend to re-read the same chunks over and over.
# The capacity is kept low because increasing it does not provide any significant advantage, # The capacity is kept low because increasing it does not provide any significant advantage,
# but makes LRUCache's square behaviour noticeable and consumes more memory. # but makes LRUCache's square behaviour noticeable and consumes more memory.
self.chunks = LRUCache(capacity=10, dispose=lambda _: None) self.chunks = LRUCache(capacity=10)
# Instrumentation # Instrumentation
# Count of indirect items, i.e. data is cached in the object cache, not directly in this cache # Count of indirect items, i.e. data is cached in the object cache, not directly in this cache
@ -252,7 +252,7 @@ def __init__(self, manifest, args, decrypted_repository):
# not contained in archives. # not contained in archives.
self._items = {} self._items = {}
# cache up to <FILES> Items # cache up to <FILES> Items
self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None) self._inode_cache = LRUCache(capacity=FILES)
# _inode_count is the current count of synthetic inodes, i.e. those in self._items # _inode_count is the current count of synthetic inodes, i.e. those in self._items
self.inode_count = 0 self.inode_count = 0
# Maps inode numbers to the inode number of the parent # Maps inode numbers to the inode number of the parent
@ -445,8 +445,8 @@ def __init__(self, manifest, args, decrypted_repository):
self.decrypted_repository = decrypted_repository self.decrypted_repository = decrypted_repository
data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1)) data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1))
logger.debug("mount data cache capacity: %d chunks", data_cache_capacity) logger.debug("mount data cache capacity: %d chunks", data_cache_capacity)
self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None) self.data_cache = LRUCache(capacity=data_cache_capacity)
self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None) self._last_pos = LRUCache(capacity=FILES)
def sig_info_handler(self, sig_no, stack): def sig_info_handler(self, sig_no, stack):
logger.debug( logger.debug(
@ -689,7 +689,7 @@ def read(self, fh, offset, size):
size -= n size -= n
if not size: if not size:
if fh in self._last_pos: if fh in self._last_pos:
self._last_pos.upd(fh, (chunk_no, chunk_offset)) self._last_pos.replace(fh, (chunk_no, chunk_offset))
else: else:
self._last_pos[fh] = (chunk_no, chunk_offset) self._last_pos[fh] = (chunk_no, chunk_offset)
break break

View file

@ -1,57 +1,71 @@
from collections import OrderedDict
from collections.abc import Callable, ItemsView, Iterator, KeysView, MutableMapping, ValuesView
from typing import TypeVar
sentinel = object() sentinel = object()
K = TypeVar("K")
V = TypeVar("V")
class LRUCache: class LRUCache(MutableMapping[K, V]):
def __init__(self, capacity, dispose): """
self._cache = {} Mapping which maintains a maximum size by dropping the least recently used value.
self._lru = [] Items are passed to dispose before being removed and replacing an item without
removing it first is forbidden.
"""
_cache: OrderedDict[K, V]
_capacity: int
_dispose: Callable[[V], None]
def __init__(self, capacity: int, dispose: Callable[[V], None] = lambda _: None):
self._cache = OrderedDict()
self._capacity = capacity self._capacity = capacity
self._dispose = dispose self._dispose = dispose
def __setitem__(self, key, value): def __setitem__(self, key: K, value: V) -> None:
assert key not in self._cache, ( assert key not in self._cache, (
"Unexpected attempt to replace a cached item," " without first deleting the old item." "Unexpected attempt to replace a cached item," " without first deleting the old item."
) )
self._lru.append(key) while len(self._cache) >= self._capacity:
while len(self._lru) > self._capacity: self._dispose(self._cache.popitem(last=False)[1])
del self[self._lru[0]]
self._cache[key] = value self._cache[key] = value
self._cache.move_to_end(key)
def __getitem__(self, key): def __getitem__(self, key: K) -> V:
value = self._cache[key] # raise KeyError if not found self._cache.move_to_end(key) # raise KeyError if not found
self._lru.remove(key) return self._cache[key]
self._lru.append(key)
return value
def __delitem__(self, key): def __delitem__(self, key: K) -> None:
value = self._cache.pop(key) # raise KeyError if not found self._dispose(self._cache.pop(key))
self._dispose(value)
self._lru.remove(key)
def __contains__(self, key): def __contains__(self, key: object) -> bool:
return key in self._cache return key in self._cache
def get(self, key, default=None): def __len__(self) -> int:
value = self._cache.get(key, sentinel) return len(self._cache)
if value is sentinel:
return default
self._lru.remove(key)
self._lru.append(key)
return value
def upd(self, key, value): def replace(self, key: K, value: V) -> None:
# special use only: update the value for an existing key without having to dispose it first """Replace an item which is already present, not disposing it in the process"""
# this method complements __setitem__ which should be used for the normal use case. # this method complements __setitem__ which should be used for the normal use case.
assert key in self._cache, "Unexpected attempt to update a non-existing item." assert key in self._cache, "Unexpected attempt to update a non-existing item."
self._cache[key] = value self._cache[key] = value
def clear(self): def clear(self) -> None:
for value in self._cache.values(): for value in self._cache.values():
self._dispose(value) self._dispose(value)
self._cache.clear() self._cache.clear()
def items(self): def __iter__(self) -> Iterator[K]:
return self._cache.items() return iter(self._cache)
def __len__(self): def keys(self) -> KeysView[K]:
return len(self._cache) return self._cache.keys()
def values(self) -> ValuesView[V]:
return self._cache.values()
def items(self) -> ItemsView[K, V]:
return self._cache.items()

View file

@ -1536,7 +1536,7 @@ def clean_old():
else: else:
# we only have fresh enough stuff here. # we only have fresh enough stuff here.
# update the timestamp of the lru cache entry. # update the timestamp of the lru cache entry.
self.fds.upd(segment, (now, fd)) self.fds.replace(segment, (now, fd))
return fd return fd
def close_segment(self): def close_segment(self):

View file

@ -7,7 +7,7 @@
class TestLRUCache: class TestLRUCache:
def test_lrucache(self): def test_lrucache(self):
c = LRUCache(2, dispose=lambda _: None) c = LRUCache(2)
assert len(c) == 0 assert len(c) == 0
assert c.items() == set() assert c.items() == set()
for i, x in enumerate("abc"): for i, x in enumerate("abc"):