fuse: refactor ItemCache

2017-06-14 12:15:46 +02:00 · 2017-06-14 12:15:46 +02:00 · 3b928a4558
parent 9fd79a9e56
commit 3b928a4558
3 changed files with 182 additions and 87 deletions
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@ -2,6 +2,7 @@ import errno
 import io
 import os
 import stat
 import struct
 import sys
 import tempfile
 import time
@ -35,76 +36,162 @@ else:
 class ItemCache:
-    GROW_BY = 2 * 1024 * 1024
+    """
    This is the "meat" of the file system's metadata storage.
    This class generates inode numbers that efficiently index items in archives,
    and retrieves items from these inode numbers.
    """
    # Approximately ~230000 items (depends on the average number of items per metadata chunk)
    # Since growing a bytearray has to copy it, growing it will converge to o(n), however,
    # this is not yet relevant due to the swiftness of copying memory. If it becomes an issue,
    # use an anonymous mmap and just resize that (or, if on 64 bit, make it so big you never need
    # to resize it in the first place; that's free).
    GROW_META_BY = 2 * 1024 * 1024
    indirect_entry_struct = struct.Struct('=cII')
    assert indirect_entry_struct.size == 9
    def __init__(self, decrypted_repository):
        self.decrypted_repository = decrypted_repository
-        self.data = bytearray()
+        # self.meta, the "meta-array" is a densely packed array of metadata about where items can be found.
-        self.writeptr = 0
+        # It is indexed by the inode number minus self.offset. (This is in a way eerily similar to how the first
-        self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
+        # unices did this).
        # The meta-array contains chunk IDs and item entries (described in inode_for_current_item).
        # The chunk IDs are referenced by item entries through relative offsets,
        # which are bounded by the metadata chunk size.
        self.meta = bytearray()
        # The current write offset in self.meta
        self.write_offset = 0
        # Offset added to meta-indices, resulting in an inode,
        # or substracted from inodes, resulting in a meta-indices.
        self.offset = 1000000
-    def new_stream(self):
+        # A temporary file that contains direct items, i.e. items directly cached in this layer.
-        self.stream_offset = 0
+        # These are items that span more than one chunk and thus cannot be efficiently cached
-        self.chunk_begin = 0
+        # by the object cache (self.decrypted_repository), which would require variable-length structures;
-        self.chunk_length = 0
+        # possible but not worth the effort, see inode_for_current_item.
-        self.current_item = b''
+        self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
-    def set_current_id(self, chunk_id, chunk_length):
+        # A small LRU cache for chunks requested by ItemCache.get() from the object cache,
-        self.chunk_id = chunk_id
+        # this significantly speeds up directory traversal and similar operations which
-        self.chunk_begin += self.chunk_length
+        # tend to re-read the same chunks over and over.
-        self.chunk_length = chunk_length
+        # The capacity is kept low because increasing it does not provide any significant advantage,
        # but makes LRUCache's square behaviour noticeable as well as consuming some memory.
        self.chunks = LRUCache(capacity=10, dispose=lambda _: None)
-    def write_bytes(self, msgpacked_bytes):
+        # Instrumentation
-        self.current_item += msgpacked_bytes
+        # Count of indirect items, i.e. data is cached in the object cache, in this cache
-        self.stream_offset += len(msgpacked_bytes)
+        self.indirect_items = 0
-
+        # Count of direct items, i.e. data is in self.fd
-    def unpacked(self):
+        self.direct_items = 0
        msgpacked_bytes = self.current_item
        self.current_item = b''
        self.last_context_sensitive = self.stream_offset - len(msgpacked_bytes) <= self.chunk_begin
        self.last_length = len(msgpacked_bytes)
        self.last_item = msgpacked_bytes
    def inode_for_current_item(self):
        if self.writeptr + 37 >= len(self.data):
            self.data = self.data + bytes(self.GROW_BY)
        if self.last_context_sensitive:
            pos = self.fd.seek(0, io.SEEK_END)
            self.fd.write(self.last_item)
            self.data[self.writeptr:self.writeptr+9] = b'S' + pos.to_bytes(8, 'little')
            self.writeptr += 9
            return self.writeptr - 9 + self.offset
        else:
            self.data[self.writeptr:self.writeptr+1] = b'I'
            self.data[self.writeptr+1:self.writeptr+33] = self.chunk_id
            last_item_offset = self.stream_offset - self.last_length
            last_item_offset -= self.chunk_begin
            self.data[self.writeptr+33:self.writeptr+37] = last_item_offset.to_bytes(4, 'little')
            self.writeptr += 37
            return self.writeptr - 37 + self.offset
    def get(self, inode):
        offset = inode - self.offset
        if offset < 0:
            raise ValueError('ItemCache.get() called with an invalid inode number')
-        is_context_sensitive = self.data[offset] == ord(b'S')
+        if self.meta[offset] == ord(b'S'):
-
+            fd_offset = int.from_bytes(self.meta[offset + 1:offset + 9], 'little')
        # print(is_context_sensitive)
        if is_context_sensitive:
            fd_offset = int.from_bytes(self.data[offset+1:offset+9], 'little')
            self.fd.seek(fd_offset, io.SEEK_SET)
            return Item(internal_dict=next(msgpack.Unpacker(self.fd, read_size=1024)))
        else:
-            chunk_id = bytes(self.data[offset+1:offset+33])
+            _, chunk_id_relative_offset, chunk_offset = self.indirect_entry_struct.unpack_from(self.meta, offset)
-            chunk_offset = int.from_bytes(self.data[offset+33:offset+37], 'little')
+            chunk_id_offset = offset - chunk_id_relative_offset
-            csize, chunk = next(self.decrypted_repository.get_many([chunk_id]))
+            # bytearray slices are bytearrays as well, explicitly convert to bytes()
            chunk_id = bytes(self.meta[chunk_id_offset:chunk_id_offset + 32])
            chunk_offset = int.from_bytes(self.meta[offset + 5:offset + 9], 'little')
            chunk = self.chunks.get(chunk_id)
            if not chunk:
                csize, chunk = next(self.decrypted_repository.get_many([chunk_id]))
                self.chunks[chunk_id] = chunk
            data = memoryview(chunk)[chunk_offset:]
            unpacker = msgpack.Unpacker()
            unpacker.feed(data)
            return Item(internal_dict=next(unpacker))
    def iter_archive_items(self, archive_item_ids):
        unpacker = msgpack.Unpacker()
        stream_offset = 0
        chunk_begin = 0
        last_chunk_length = 0
        msgpacked_bytes = b''
        write_offset = self.write_offset
        meta = self.meta
        pack_indirect_into = self.indirect_entry_struct.pack_into
        def write_bytes(append_msgpacked_bytes):
            nonlocal msgpacked_bytes
            nonlocal stream_offset
            msgpacked_bytes += append_msgpacked_bytes
            stream_offset += len(append_msgpacked_bytes)
        for key, (csize, data) in zip(archive_item_ids, self.decrypted_repository.get_many(archive_item_ids)):
            # Store the chunk ID in the meta-array
            if write_offset + 32 >= len(meta):
                self.meta = meta = meta + bytes(self.GROW_META_BY)
            meta[write_offset:write_offset + 32] = key
            current_id_offset = write_offset
            write_offset += 32
            # The chunk boundaries cannot be tracked through write_bytes, because the unpack state machine
            # *can* and *will* consume partial items, so calls to write_bytes are unrelated to chunk boundaries.
            chunk_begin += last_chunk_length
            last_chunk_length = len(data)
            unpacker.feed(data)
            while True:
                try:
                    item = unpacker.unpack(write_bytes)
                except msgpack.OutOfData:
                    # Need more data, feed the next chunk
                    break
                current_item_length = len(msgpacked_bytes)
                current_spans_chunks = stream_offset - current_item_length <= chunk_begin
                current_item = msgpacked_bytes
                msgpacked_bytes = b''
                if write_offset + 9 >= len(meta):
                    self.meta = meta = meta + bytes(self.GROW_META_BY)
                # item entries in the meta-array come in two different flavours, both nine bytes long.
                # (1) for items that span chunks:
                #
                #     'S' + 8 byte offset into the self.fd file, where the msgpacked item starts.
                #
                # (2) for items that are completely contained in one chunk, which usually is the great majority
                #     (about 700:1 for system backups)
                #
                #     'I' + 4 byte offset where the chunk ID is + 4 byte offset in the chunk
                #     where the msgpacked items starts
                #
                #     The chunk ID offset is the number of bytes _back_ from the start of the entry, i.e.:
                #
                #     |Chunk ID| ....          |S1234abcd|
                #      ^------ offset ----------^
                if current_spans_chunks:
                    pos = self.fd.seek(0, io.SEEK_END)
                    self.fd.write(current_item)
                    meta[write_offset:write_offset + 9] = b'S' + pos.to_bytes(8, 'little')
                    write_offset += 9
                    self.direct_items += 1
                    inode = write_offset - 9 + self.offset
                else:
                    item_offset = stream_offset - current_item_length - chunk_begin
                    pack_indirect_into(meta, write_offset, b'I', write_offset - current_id_offset, item_offset)
                    write_offset += 9
                    self.indirect_items += 1
                    inode = write_offset - 9 + self.offset
                yield inode, Item(internal_dict=item)
        self.write_offset = write_offset
 class FuseOperations(llfuse.Operations):
    """Export archive as a fuse filesystem
@ -120,9 +207,17 @@ class FuseOperations(llfuse.Operations):
        self.args = args
        self.manifest = manifest
        self.key = key
-        self._inode_count = 0
+        # Maps inode numbers to Item instances. This is used for synthetic inodes,
        # i.e. file-system objects that are made up by FuseOperations and are not contained
        # in the archives. For example archive directories or intermediate directories
        # not contained in archives.
        self.items = {}
        # _inode_count is the current count of synthetic inodes, i.e. those in self.items
        self._inode_count = 0
        # Maps inode numbers to the inode number of the parent
        self.parent = {}
        # Maps inode numbers to a dictionary mapping byte directory entry names to their inode numbers,
        # i.e. this contains all dirents of everything that is mounted. (It becomes really big).
        self.contents = defaultdict(dict)
        self.default_uid = os.getuid()
        self.default_gid = os.getgid()
@ -150,15 +245,15 @@ class FuseOperations(llfuse.Operations):
                    self.pending_archives[archive_inode] = archive_name
    def sig_info_handler(self, sig_no, stack):
-        logger.debug('fuse: %d inodes, %d synth inodes, %d edges (%s)',
+        logger.debug('fuse: %d synth inodes, %d edges (%s)',
-                     self._inode_count, len(self.items), len(self.parent),
+                     self._inode_count, len(self.parent),
                     # getsizeof is the size of the dict itself; key and value are two small-ish integers,
                     # which are shared due to code structure (this has been verified).
                     format_file_size(sys.getsizeof(self.parent) + len(self.parent) * sys.getsizeof(self._inode_count)))
        logger.debug('fuse: %d pending archives', len(self.pending_archives))
-        logger.debug('fuse: ItemCache %d entries, meta-array %s, dependent items %s',
+        logger.debug('fuse: ItemCache %d entries (%d direct, %d indirect), meta-array size %s, direct items size %s',
-                     self._inode_count - len(self.items),
+                     self.cache.direct_items + self.cache.indirect_items, self.cache.direct_items, self.cache.indirect_items,
-                     format_file_size(sys.getsizeof(self.cache.data)),
+                     format_file_size(sys.getsizeof(self.cache.meta)),
                     format_file_size(os.stat(self.cache.fd.fileno()).st_size))
        logger.debug('fuse: data cache: %d/%d entries, %s', len(self.data_cache.items()), self.data_cache._capacity,
                     format_file_size(sum(len(chunk) for key, chunk in self.data_cache.items())))
@ -212,42 +307,31 @@ class FuseOperations(llfuse.Operations):
        """
        self.file_versions = {}  # for versions mode: original path -> version
        t0 = time.perf_counter()
        unpacker = msgpack.Unpacker()
        archive = Archive(self.repository_uncached, self.key, self.manifest, archive_name,
                          consider_part_files=self.args.consider_part_files)
-        self.cache.new_stream()
+        for item_inode, item in self.cache.iter_archive_items(archive.metadata.items):
-        for key, (csize, data) in zip(archive.metadata.items, self.decrypted_repository.get_many(archive.metadata.items)):
+            path = os.fsencode(item.path)
-            self.cache.set_current_id(key, len(data))
+            is_dir = stat.S_ISDIR(item.mode)
-            unpacker.feed(data)
+            if is_dir:
            while True:
                try:
-                    item = unpacker.unpack(self.cache.write_bytes)
+                    # This can happen if an archive was created with a command line like
-                except msgpack.OutOfData:
+                    # $ borg create ... dir1/file dir1
-                    break
+                    # In this case the code below will have created a default_dir inode for dir1 already.
-                self.cache.unpacked()
+                    inode = self._find_inode(path, prefix)
-                item = Item(internal_dict=item)
+                except KeyError:
-                path = os.fsencode(item.path)
+                    pass
-                is_dir = stat.S_ISDIR(item.mode)
+                else:
-                if is_dir:
+                    self.items[inode] = item
-                    try:
+                    continue
-                        # This can happen if an archive was created with a command line like
+            segments = prefix + path.split(b'/')
-                        # $ borg create ... dir1/file dir1
+            parent = 1
-                        # In this case the code below will have created a default_dir inode for dir1 already.
+            for segment in segments[:-1]:
-                        inode = self._find_inode(path, prefix)
+                parent = self.process_inner(segment, parent)
-                    except KeyError:
+            self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode)
                        pass
                    else:
                        self.items[inode] = item
                        continue
                segments = prefix + path.split(b'/')
                parent = 1
                for segment in segments[:-1]:
                    parent = self.process_inner(segment, parent)
                self.process_leaf(segments[-1], item, parent, prefix, is_dir)
        duration = time.perf_counter() - t0
        logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name)
-    def process_leaf(self, name, item, parent, prefix, is_dir):
+    def process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
        def file_version(item):
            if 'chunks' in item:
                ident = 0
@ -290,7 +374,7 @@ class FuseOperations(llfuse.Operations):
            item.nlink = item.get('nlink', 1) + 1
            self.items[inode] = item
        else:
-            inode = self.cache.inode_for_current_item()
+            inode = item_inode
        self.parent[inode] = parent
        if name:
            self.contents[parent][name] = inode
--- a/src/borg/lrucache.py
+++ b/src/borg/lrucache.py
@ -28,6 +28,14 @@ class LRUCache:
    def __contains__(self, key):
        return key in self._cache
    def get(self, key, default=None):
        value = self._cache.get(key, default)
        if value is default:
            return value
        self._lru.remove(key)
        self._lru.append(key)
        return value
    def clear(self):
        for value in self._cache.values():
            self._dispose(value)
--- a/src/borg/testsuite/lrucache.py
+++ b/src/borg/testsuite/lrucache.py
@ -19,7 +19,10 @@ class TestLRUCache:
        assert 'b' in c
        with pytest.raises(KeyError):
            c['a']
        assert c.get('a') is None
        assert c.get('a', 'foo') == 'foo'
        assert c['b'] == 1
        assert c.get('b') == 1
        assert c['c'] == 2
        c['d'] = 3
        assert len(c) == 2