mirror of https://github.com/borgbackup/borg.git
fuse: refactor ItemCache
This commit is contained in:
parent
9fd79a9e56
commit
3b928a4558
258
src/borg/fuse.py
258
src/borg/fuse.py
|
@ -2,6 +2,7 @@ import errno
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import stat
|
import stat
|
||||||
|
import struct
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
@ -35,76 +36,162 @@ else:
|
||||||
|
|
||||||
|
|
||||||
class ItemCache:
|
class ItemCache:
|
||||||
GROW_BY = 2 * 1024 * 1024
|
"""
|
||||||
|
This is the "meat" of the file system's metadata storage.
|
||||||
|
|
||||||
|
This class generates inode numbers that efficiently index items in archives,
|
||||||
|
and retrieves items from these inode numbers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Approximately ~230000 items (depends on the average number of items per metadata chunk)
|
||||||
|
# Since growing a bytearray has to copy it, growing it will converge to o(n), however,
|
||||||
|
# this is not yet relevant due to the swiftness of copying memory. If it becomes an issue,
|
||||||
|
# use an anonymous mmap and just resize that (or, if on 64 bit, make it so big you never need
|
||||||
|
# to resize it in the first place; that's free).
|
||||||
|
GROW_META_BY = 2 * 1024 * 1024
|
||||||
|
|
||||||
|
indirect_entry_struct = struct.Struct('=cII')
|
||||||
|
assert indirect_entry_struct.size == 9
|
||||||
|
|
||||||
def __init__(self, decrypted_repository):
|
def __init__(self, decrypted_repository):
|
||||||
self.decrypted_repository = decrypted_repository
|
self.decrypted_repository = decrypted_repository
|
||||||
self.data = bytearray()
|
# self.meta, the "meta-array" is a densely packed array of metadata about where items can be found.
|
||||||
self.writeptr = 0
|
# It is indexed by the inode number minus self.offset. (This is in a way eerily similar to how the first
|
||||||
self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
|
# unices did this).
|
||||||
|
# The meta-array contains chunk IDs and item entries (described in inode_for_current_item).
|
||||||
|
# The chunk IDs are referenced by item entries through relative offsets,
|
||||||
|
# which are bounded by the metadata chunk size.
|
||||||
|
self.meta = bytearray()
|
||||||
|
# The current write offset in self.meta
|
||||||
|
self.write_offset = 0
|
||||||
|
|
||||||
|
# Offset added to meta-indices, resulting in an inode,
|
||||||
|
# or substracted from inodes, resulting in a meta-indices.
|
||||||
self.offset = 1000000
|
self.offset = 1000000
|
||||||
|
|
||||||
def new_stream(self):
|
# A temporary file that contains direct items, i.e. items directly cached in this layer.
|
||||||
self.stream_offset = 0
|
# These are items that span more than one chunk and thus cannot be efficiently cached
|
||||||
self.chunk_begin = 0
|
# by the object cache (self.decrypted_repository), which would require variable-length structures;
|
||||||
self.chunk_length = 0
|
# possible but not worth the effort, see inode_for_current_item.
|
||||||
self.current_item = b''
|
self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
|
||||||
|
|
||||||
def set_current_id(self, chunk_id, chunk_length):
|
# A small LRU cache for chunks requested by ItemCache.get() from the object cache,
|
||||||
self.chunk_id = chunk_id
|
# this significantly speeds up directory traversal and similar operations which
|
||||||
self.chunk_begin += self.chunk_length
|
# tend to re-read the same chunks over and over.
|
||||||
self.chunk_length = chunk_length
|
# The capacity is kept low because increasing it does not provide any significant advantage,
|
||||||
|
# but makes LRUCache's square behaviour noticeable as well as consuming some memory.
|
||||||
|
self.chunks = LRUCache(capacity=10, dispose=lambda _: None)
|
||||||
|
|
||||||
def write_bytes(self, msgpacked_bytes):
|
# Instrumentation
|
||||||
self.current_item += msgpacked_bytes
|
# Count of indirect items, i.e. data is cached in the object cache, in this cache
|
||||||
self.stream_offset += len(msgpacked_bytes)
|
self.indirect_items = 0
|
||||||
|
# Count of direct items, i.e. data is in self.fd
|
||||||
def unpacked(self):
|
self.direct_items = 0
|
||||||
msgpacked_bytes = self.current_item
|
|
||||||
self.current_item = b''
|
|
||||||
self.last_context_sensitive = self.stream_offset - len(msgpacked_bytes) <= self.chunk_begin
|
|
||||||
self.last_length = len(msgpacked_bytes)
|
|
||||||
self.last_item = msgpacked_bytes
|
|
||||||
|
|
||||||
def inode_for_current_item(self):
|
|
||||||
if self.writeptr + 37 >= len(self.data):
|
|
||||||
self.data = self.data + bytes(self.GROW_BY)
|
|
||||||
|
|
||||||
if self.last_context_sensitive:
|
|
||||||
pos = self.fd.seek(0, io.SEEK_END)
|
|
||||||
self.fd.write(self.last_item)
|
|
||||||
self.data[self.writeptr:self.writeptr+9] = b'S' + pos.to_bytes(8, 'little')
|
|
||||||
self.writeptr += 9
|
|
||||||
return self.writeptr - 9 + self.offset
|
|
||||||
else:
|
|
||||||
self.data[self.writeptr:self.writeptr+1] = b'I'
|
|
||||||
self.data[self.writeptr+1:self.writeptr+33] = self.chunk_id
|
|
||||||
last_item_offset = self.stream_offset - self.last_length
|
|
||||||
last_item_offset -= self.chunk_begin
|
|
||||||
self.data[self.writeptr+33:self.writeptr+37] = last_item_offset.to_bytes(4, 'little')
|
|
||||||
self.writeptr += 37
|
|
||||||
return self.writeptr - 37 + self.offset
|
|
||||||
|
|
||||||
def get(self, inode):
|
def get(self, inode):
|
||||||
offset = inode - self.offset
|
offset = inode - self.offset
|
||||||
if offset < 0:
|
if offset < 0:
|
||||||
raise ValueError('ItemCache.get() called with an invalid inode number')
|
raise ValueError('ItemCache.get() called with an invalid inode number')
|
||||||
is_context_sensitive = self.data[offset] == ord(b'S')
|
if self.meta[offset] == ord(b'S'):
|
||||||
|
fd_offset = int.from_bytes(self.meta[offset + 1:offset + 9], 'little')
|
||||||
# print(is_context_sensitive)
|
|
||||||
if is_context_sensitive:
|
|
||||||
fd_offset = int.from_bytes(self.data[offset+1:offset+9], 'little')
|
|
||||||
self.fd.seek(fd_offset, io.SEEK_SET)
|
self.fd.seek(fd_offset, io.SEEK_SET)
|
||||||
return Item(internal_dict=next(msgpack.Unpacker(self.fd, read_size=1024)))
|
return Item(internal_dict=next(msgpack.Unpacker(self.fd, read_size=1024)))
|
||||||
else:
|
else:
|
||||||
chunk_id = bytes(self.data[offset+1:offset+33])
|
_, chunk_id_relative_offset, chunk_offset = self.indirect_entry_struct.unpack_from(self.meta, offset)
|
||||||
chunk_offset = int.from_bytes(self.data[offset+33:offset+37], 'little')
|
chunk_id_offset = offset - chunk_id_relative_offset
|
||||||
csize, chunk = next(self.decrypted_repository.get_many([chunk_id]))
|
# bytearray slices are bytearrays as well, explicitly convert to bytes()
|
||||||
|
chunk_id = bytes(self.meta[chunk_id_offset:chunk_id_offset + 32])
|
||||||
|
chunk_offset = int.from_bytes(self.meta[offset + 5:offset + 9], 'little')
|
||||||
|
chunk = self.chunks.get(chunk_id)
|
||||||
|
if not chunk:
|
||||||
|
csize, chunk = next(self.decrypted_repository.get_many([chunk_id]))
|
||||||
|
self.chunks[chunk_id] = chunk
|
||||||
data = memoryview(chunk)[chunk_offset:]
|
data = memoryview(chunk)[chunk_offset:]
|
||||||
unpacker = msgpack.Unpacker()
|
unpacker = msgpack.Unpacker()
|
||||||
unpacker.feed(data)
|
unpacker.feed(data)
|
||||||
return Item(internal_dict=next(unpacker))
|
return Item(internal_dict=next(unpacker))
|
||||||
|
|
||||||
|
def iter_archive_items(self, archive_item_ids):
|
||||||
|
unpacker = msgpack.Unpacker()
|
||||||
|
|
||||||
|
stream_offset = 0
|
||||||
|
chunk_begin = 0
|
||||||
|
last_chunk_length = 0
|
||||||
|
msgpacked_bytes = b''
|
||||||
|
|
||||||
|
write_offset = self.write_offset
|
||||||
|
meta = self.meta
|
||||||
|
pack_indirect_into = self.indirect_entry_struct.pack_into
|
||||||
|
|
||||||
|
def write_bytes(append_msgpacked_bytes):
|
||||||
|
nonlocal msgpacked_bytes
|
||||||
|
nonlocal stream_offset
|
||||||
|
msgpacked_bytes += append_msgpacked_bytes
|
||||||
|
stream_offset += len(append_msgpacked_bytes)
|
||||||
|
|
||||||
|
for key, (csize, data) in zip(archive_item_ids, self.decrypted_repository.get_many(archive_item_ids)):
|
||||||
|
# Store the chunk ID in the meta-array
|
||||||
|
if write_offset + 32 >= len(meta):
|
||||||
|
self.meta = meta = meta + bytes(self.GROW_META_BY)
|
||||||
|
meta[write_offset:write_offset + 32] = key
|
||||||
|
current_id_offset = write_offset
|
||||||
|
write_offset += 32
|
||||||
|
|
||||||
|
# The chunk boundaries cannot be tracked through write_bytes, because the unpack state machine
|
||||||
|
# *can* and *will* consume partial items, so calls to write_bytes are unrelated to chunk boundaries.
|
||||||
|
chunk_begin += last_chunk_length
|
||||||
|
last_chunk_length = len(data)
|
||||||
|
|
||||||
|
unpacker.feed(data)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
item = unpacker.unpack(write_bytes)
|
||||||
|
except msgpack.OutOfData:
|
||||||
|
# Need more data, feed the next chunk
|
||||||
|
break
|
||||||
|
|
||||||
|
current_item_length = len(msgpacked_bytes)
|
||||||
|
current_spans_chunks = stream_offset - current_item_length <= chunk_begin
|
||||||
|
current_item = msgpacked_bytes
|
||||||
|
msgpacked_bytes = b''
|
||||||
|
|
||||||
|
if write_offset + 9 >= len(meta):
|
||||||
|
self.meta = meta = meta + bytes(self.GROW_META_BY)
|
||||||
|
|
||||||
|
# item entries in the meta-array come in two different flavours, both nine bytes long.
|
||||||
|
# (1) for items that span chunks:
|
||||||
|
#
|
||||||
|
# 'S' + 8 byte offset into the self.fd file, where the msgpacked item starts.
|
||||||
|
#
|
||||||
|
# (2) for items that are completely contained in one chunk, which usually is the great majority
|
||||||
|
# (about 700:1 for system backups)
|
||||||
|
#
|
||||||
|
# 'I' + 4 byte offset where the chunk ID is + 4 byte offset in the chunk
|
||||||
|
# where the msgpacked items starts
|
||||||
|
#
|
||||||
|
# The chunk ID offset is the number of bytes _back_ from the start of the entry, i.e.:
|
||||||
|
#
|
||||||
|
# |Chunk ID| .... |S1234abcd|
|
||||||
|
# ^------ offset ----------^
|
||||||
|
|
||||||
|
if current_spans_chunks:
|
||||||
|
pos = self.fd.seek(0, io.SEEK_END)
|
||||||
|
self.fd.write(current_item)
|
||||||
|
meta[write_offset:write_offset + 9] = b'S' + pos.to_bytes(8, 'little')
|
||||||
|
write_offset += 9
|
||||||
|
self.direct_items += 1
|
||||||
|
inode = write_offset - 9 + self.offset
|
||||||
|
else:
|
||||||
|
item_offset = stream_offset - current_item_length - chunk_begin
|
||||||
|
pack_indirect_into(meta, write_offset, b'I', write_offset - current_id_offset, item_offset)
|
||||||
|
write_offset += 9
|
||||||
|
self.indirect_items += 1
|
||||||
|
inode = write_offset - 9 + self.offset
|
||||||
|
|
||||||
|
yield inode, Item(internal_dict=item)
|
||||||
|
|
||||||
|
self.write_offset = write_offset
|
||||||
|
|
||||||
|
|
||||||
class FuseOperations(llfuse.Operations):
|
class FuseOperations(llfuse.Operations):
|
||||||
"""Export archive as a fuse filesystem
|
"""Export archive as a fuse filesystem
|
||||||
|
@ -120,9 +207,17 @@ class FuseOperations(llfuse.Operations):
|
||||||
self.args = args
|
self.args = args
|
||||||
self.manifest = manifest
|
self.manifest = manifest
|
||||||
self.key = key
|
self.key = key
|
||||||
self._inode_count = 0
|
# Maps inode numbers to Item instances. This is used for synthetic inodes,
|
||||||
|
# i.e. file-system objects that are made up by FuseOperations and are not contained
|
||||||
|
# in the archives. For example archive directories or intermediate directories
|
||||||
|
# not contained in archives.
|
||||||
self.items = {}
|
self.items = {}
|
||||||
|
# _inode_count is the current count of synthetic inodes, i.e. those in self.items
|
||||||
|
self._inode_count = 0
|
||||||
|
# Maps inode numbers to the inode number of the parent
|
||||||
self.parent = {}
|
self.parent = {}
|
||||||
|
# Maps inode numbers to a dictionary mapping byte directory entry names to their inode numbers,
|
||||||
|
# i.e. this contains all dirents of everything that is mounted. (It becomes really big).
|
||||||
self.contents = defaultdict(dict)
|
self.contents = defaultdict(dict)
|
||||||
self.default_uid = os.getuid()
|
self.default_uid = os.getuid()
|
||||||
self.default_gid = os.getgid()
|
self.default_gid = os.getgid()
|
||||||
|
@ -150,15 +245,15 @@ class FuseOperations(llfuse.Operations):
|
||||||
self.pending_archives[archive_inode] = archive_name
|
self.pending_archives[archive_inode] = archive_name
|
||||||
|
|
||||||
def sig_info_handler(self, sig_no, stack):
|
def sig_info_handler(self, sig_no, stack):
|
||||||
logger.debug('fuse: %d inodes, %d synth inodes, %d edges (%s)',
|
logger.debug('fuse: %d synth inodes, %d edges (%s)',
|
||||||
self._inode_count, len(self.items), len(self.parent),
|
self._inode_count, len(self.parent),
|
||||||
# getsizeof is the size of the dict itself; key and value are two small-ish integers,
|
# getsizeof is the size of the dict itself; key and value are two small-ish integers,
|
||||||
# which are shared due to code structure (this has been verified).
|
# which are shared due to code structure (this has been verified).
|
||||||
format_file_size(sys.getsizeof(self.parent) + len(self.parent) * sys.getsizeof(self._inode_count)))
|
format_file_size(sys.getsizeof(self.parent) + len(self.parent) * sys.getsizeof(self._inode_count)))
|
||||||
logger.debug('fuse: %d pending archives', len(self.pending_archives))
|
logger.debug('fuse: %d pending archives', len(self.pending_archives))
|
||||||
logger.debug('fuse: ItemCache %d entries, meta-array %s, dependent items %s',
|
logger.debug('fuse: ItemCache %d entries (%d direct, %d indirect), meta-array size %s, direct items size %s',
|
||||||
self._inode_count - len(self.items),
|
self.cache.direct_items + self.cache.indirect_items, self.cache.direct_items, self.cache.indirect_items,
|
||||||
format_file_size(sys.getsizeof(self.cache.data)),
|
format_file_size(sys.getsizeof(self.cache.meta)),
|
||||||
format_file_size(os.stat(self.cache.fd.fileno()).st_size))
|
format_file_size(os.stat(self.cache.fd.fileno()).st_size))
|
||||||
logger.debug('fuse: data cache: %d/%d entries, %s', len(self.data_cache.items()), self.data_cache._capacity,
|
logger.debug('fuse: data cache: %d/%d entries, %s', len(self.data_cache.items()), self.data_cache._capacity,
|
||||||
format_file_size(sum(len(chunk) for key, chunk in self.data_cache.items())))
|
format_file_size(sum(len(chunk) for key, chunk in self.data_cache.items())))
|
||||||
|
@ -212,42 +307,31 @@ class FuseOperations(llfuse.Operations):
|
||||||
"""
|
"""
|
||||||
self.file_versions = {} # for versions mode: original path -> version
|
self.file_versions = {} # for versions mode: original path -> version
|
||||||
t0 = time.perf_counter()
|
t0 = time.perf_counter()
|
||||||
unpacker = msgpack.Unpacker()
|
|
||||||
archive = Archive(self.repository_uncached, self.key, self.manifest, archive_name,
|
archive = Archive(self.repository_uncached, self.key, self.manifest, archive_name,
|
||||||
consider_part_files=self.args.consider_part_files)
|
consider_part_files=self.args.consider_part_files)
|
||||||
self.cache.new_stream()
|
for item_inode, item in self.cache.iter_archive_items(archive.metadata.items):
|
||||||
for key, (csize, data) in zip(archive.metadata.items, self.decrypted_repository.get_many(archive.metadata.items)):
|
path = os.fsencode(item.path)
|
||||||
self.cache.set_current_id(key, len(data))
|
is_dir = stat.S_ISDIR(item.mode)
|
||||||
unpacker.feed(data)
|
if is_dir:
|
||||||
while True:
|
|
||||||
try:
|
try:
|
||||||
item = unpacker.unpack(self.cache.write_bytes)
|
# This can happen if an archive was created with a command line like
|
||||||
except msgpack.OutOfData:
|
# $ borg create ... dir1/file dir1
|
||||||
break
|
# In this case the code below will have created a default_dir inode for dir1 already.
|
||||||
self.cache.unpacked()
|
inode = self._find_inode(path, prefix)
|
||||||
item = Item(internal_dict=item)
|
except KeyError:
|
||||||
path = os.fsencode(item.path)
|
pass
|
||||||
is_dir = stat.S_ISDIR(item.mode)
|
else:
|
||||||
if is_dir:
|
self.items[inode] = item
|
||||||
try:
|
continue
|
||||||
# This can happen if an archive was created with a command line like
|
segments = prefix + path.split(b'/')
|
||||||
# $ borg create ... dir1/file dir1
|
parent = 1
|
||||||
# In this case the code below will have created a default_dir inode for dir1 already.
|
for segment in segments[:-1]:
|
||||||
inode = self._find_inode(path, prefix)
|
parent = self.process_inner(segment, parent)
|
||||||
except KeyError:
|
self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode)
|
||||||
pass
|
|
||||||
else:
|
|
||||||
self.items[inode] = item
|
|
||||||
continue
|
|
||||||
segments = prefix + path.split(b'/')
|
|
||||||
parent = 1
|
|
||||||
for segment in segments[:-1]:
|
|
||||||
parent = self.process_inner(segment, parent)
|
|
||||||
self.process_leaf(segments[-1], item, parent, prefix, is_dir)
|
|
||||||
duration = time.perf_counter() - t0
|
duration = time.perf_counter() - t0
|
||||||
logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name)
|
logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name)
|
||||||
|
|
||||||
def process_leaf(self, name, item, parent, prefix, is_dir):
|
def process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
|
||||||
def file_version(item):
|
def file_version(item):
|
||||||
if 'chunks' in item:
|
if 'chunks' in item:
|
||||||
ident = 0
|
ident = 0
|
||||||
|
@ -290,7 +374,7 @@ class FuseOperations(llfuse.Operations):
|
||||||
item.nlink = item.get('nlink', 1) + 1
|
item.nlink = item.get('nlink', 1) + 1
|
||||||
self.items[inode] = item
|
self.items[inode] = item
|
||||||
else:
|
else:
|
||||||
inode = self.cache.inode_for_current_item()
|
inode = item_inode
|
||||||
self.parent[inode] = parent
|
self.parent[inode] = parent
|
||||||
if name:
|
if name:
|
||||||
self.contents[parent][name] = inode
|
self.contents[parent][name] = inode
|
||||||
|
|
|
@ -28,6 +28,14 @@ class LRUCache:
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
return key in self._cache
|
return key in self._cache
|
||||||
|
|
||||||
|
def get(self, key, default=None):
|
||||||
|
value = self._cache.get(key, default)
|
||||||
|
if value is default:
|
||||||
|
return value
|
||||||
|
self._lru.remove(key)
|
||||||
|
self._lru.append(key)
|
||||||
|
return value
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
for value in self._cache.values():
|
for value in self._cache.values():
|
||||||
self._dispose(value)
|
self._dispose(value)
|
||||||
|
|
|
@ -19,7 +19,10 @@ class TestLRUCache:
|
||||||
assert 'b' in c
|
assert 'b' in c
|
||||||
with pytest.raises(KeyError):
|
with pytest.raises(KeyError):
|
||||||
c['a']
|
c['a']
|
||||||
|
assert c.get('a') is None
|
||||||
|
assert c.get('a', 'foo') == 'foo'
|
||||||
assert c['b'] == 1
|
assert c['b'] == 1
|
||||||
|
assert c.get('b') == 1
|
||||||
assert c['c'] == 2
|
assert c['c'] == 2
|
||||||
c['d'] = 3
|
c['d'] = 3
|
||||||
assert len(c) == 2
|
assert len(c) == 2
|
||||||
|
|
Loading…
Reference in New Issue