From d126265fe4e0b8016004e898a58faad3a9127339 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Thu, 28 Jul 2016 15:57:03 -0400 Subject: [PATCH] borg mount: cache partially read data chunks Cherry-pick of bfb00df from master to 1.0-maint --- borg/archiver.py | 5 +++++ borg/fuse.py | 17 +++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index d91bdd3e0..76aa33ea1 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1275,6 +1275,11 @@ def build_parser(self, args=None, prog=None): option is given the command will run in the background until the filesystem is ``umounted``. + The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users + to tweak the performance. It sets the number of cached data chunks; additional + memory usage can be up to ~8 MiB times this number. The default is the number + of CPU cores. + For mount options, see the fuse(8) manual page. Additional mount options supported by borg: diff --git a/borg/fuse.py b/borg/fuse.py index 7b75b8b8e..09c05d6f8 100644 --- a/borg/fuse.py +++ b/borg/fuse.py @@ -13,6 +13,7 @@ from .archive import Archive from .helpers import daemonize, bigint_to_int from .logger import create_logger +from .lrucache import LRUCache logger = create_logger() @@ -62,6 +63,9 @@ def __init__(self, key, repository, manifest, archive, cached_repo): self.pending_archives = {} self.accounted_chunks = {} self.cache = ItemCache() + data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1)) + logger.debug('mount data cache capacity: %d chunks', data_cache_capacity) + self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None) if archive: self.process_archive(archive) else: @@ -282,8 +286,17 @@ def read(self, fh, offset, size): offset -= s continue n = min(size, s - offset) - chunk = self.key.decrypt(id, self.repository.get(id)) - parts.append(chunk[offset:offset + n]) + if id in self.data_cache: + data = self.data_cache[id] + if offset + n == len(data): + # evict fully read chunk from cache + del self.data_cache[id] + else: + data = self.key.decrypt(id, self.repository.get(id)) + if offset + n < len(data): + # chunk was only partially read, cache it + self.data_cache[id] = data + parts.append(data[offset:offset + n]) offset = 0 size -= n if not size: