Merge pull request #2778 from enkore/f/fuse-versions-numbering

fuse: versions view, linear numbering by archive time
This commit is contained in:
TW 2017-07-03 22:43:07 +02:00 committed by GitHub
commit 9bd522819e
6 changed files with 93 additions and 20 deletions

View File

@ -695,3 +695,11 @@ hashindex_size(HashIndex *index)
{
return sizeof(HashHeader) + index->num_buckets * index->bucket_size;
}
/*
* Used by the FuseVersionsIndex.
*/
typedef struct {
uint32_t version;
char hash[16];
} __attribute__((__packed__)) FuseVersionsElement;

View File

@ -8,7 +8,7 @@ from libc.stdlib cimport malloc, free
from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
from cpython.bytes cimport PyBytes_FromStringAndSize
API_VERSION = '1.1_01'
API_VERSION = '1.1_02'
cdef extern from "../algorithms/blake2-libselect.h":
@ -252,6 +252,25 @@ def blake2b_256(key, data):
return PyBytes_FromStringAndSize(<char*> &md[0], 32)
def blake2b_128(data):
cdef blake2b_state state
cdef unsigned char md[16]
cdef unsigned char *data_ptr = data
if blake2b_init(&state, 16) == -1:
raise Exception('blake2b_init() failed')
rc = blake2b_update(&state, data_ptr, len(data))
if rc == -1:
raise Exception('blake2b_update() failed')
rc = blake2b_final(&state, &md[0], 16)
if rc == -1:
raise Exception('blake2b_final() failed')
return PyBytes_FromStringAndSize(<char*> &md[0], 16)
def hkdf_hmac_sha512(ikm, salt, info, output_length):
"""
Compute HKDF-HMAC-SHA512 with input key material *ikm*, *salt* and *info* to produce *output_length* bytes.

View File

@ -9,7 +9,6 @@ import time
from collections import defaultdict
from signal import SIGINT
from distutils.version import LooseVersion
from zlib import adler32
import llfuse
import msgpack
@ -17,7 +16,9 @@ import msgpack
from .logger import create_logger
logger = create_logger()
from .crypto.low_level import blake2b_128
from .archive import Archive
from .hashindex import FuseVersionsIndex
from .helpers import daemonize, hardlinkable, signal_handler, format_file_size
from .item import Item
from .lrucache import LRUCache
@ -240,13 +241,14 @@ class FuseOperations(llfuse.Operations):
if self.args.location.archive:
self.process_archive(self.args.location.archive)
else:
self.versions_index = FuseVersionsIndex()
archive_names = (x.name for x in self.manifest.archives.list_considering(self.args))
for archive_name in archive_names:
if self.versions:
# process archives immediately
self.process_archive(archive_name)
else:
# lazy load archives, create archive placeholder inode
# lazily load archives, create archive placeholder inode
archive_inode = self._create_dir(parent=1)
self.contents[1][os.fsencode(archive_name)] = archive_inode
self.pending_archives[archive_inode] = archive_name
@ -339,12 +341,19 @@ class FuseOperations(llfuse.Operations):
logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name)
def process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
def file_version(item):
def file_version(item, path):
if 'chunks' in item:
ident = 0
for chunkid, _, _ in item.chunks:
ident = adler32(chunkid, ident)
return ident
file_id = blake2b_128(path)
current_version, previous_id = self.versions_index.get(file_id, (0, None))
chunk_ids = [chunk_id for chunk_id, _, _ in item.chunks]
contents_id = blake2b_128(b''.join(chunk_ids))
if contents_id != previous_id:
current_version += 1
self.versions_index[file_id] = current_version, contents_id
return current_version
def make_versioned_name(name, version, add_dir=False):
if add_dir:
@ -353,16 +362,16 @@ class FuseOperations(llfuse.Operations):
name += b'/' + path_fname[-1]
# keep original extension at end to avoid confusing tools
name, ext = os.path.splitext(name)
version_enc = os.fsencode('.%08x' % version)
version_enc = os.fsencode('.%05d' % version)
return name + version_enc + ext
if self.versions and not is_dir:
parent = self.process_inner(name, parent)
version = file_version(item)
path = os.fsencode(item.path)
version = file_version(item, path)
if version is not None:
# regular file, with contents - maybe a hardlink master
name = make_versioned_name(name, version)
path = os.fsencode(item.path)
self.file_versions[path] = version
path = item.path

View File

@ -6,17 +6,22 @@ import os
cimport cython
from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
from libc.errno cimport errno
from libc.string cimport memcpy
from cpython.exc cimport PyErr_SetFromErrnoWithFilename
from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
from cpython.bytes cimport PyBytes_FromStringAndSize
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_CheckExact, PyBytes_GET_SIZE, PyBytes_AS_STRING
API_VERSION = '1.1_06'
API_VERSION = '1.1_07'
cdef extern from "_hashindex.c":
ctypedef struct HashIndex:
pass
ctypedef struct FuseVersionsElement:
uint32_t version
char hash[16]
HashIndex *hashindex_read(object file_py, int permit_compact) except *
HashIndex *hashindex_init(int capacity, int key_size, int value_size)
void hashindex_free(HashIndex *index)
@ -74,11 +79,13 @@ cdef class IndexBase:
cdef HashIndex *index
cdef int key_size
_key_size = 32
MAX_LOAD_FACTOR = HASH_MAX_LOAD
MAX_VALUE = _MAX_VALUE
def __cinit__(self, capacity=0, path=None, key_size=32, permit_compact=False):
self.key_size = key_size
def __cinit__(self, capacity=0, path=None, permit_compact=False):
self.key_size = self._key_size
if path:
if isinstance(path, (str, bytes)):
with open(path, 'rb') as fd:
@ -153,6 +160,36 @@ cdef class IndexBase:
return hashindex_compact(self.index)
cdef class FuseVersionsIndex(IndexBase):
# 4 byte version + 16 byte file contents hash
value_size = 20
_key_size = 16
def __getitem__(self, key):
cdef FuseVersionsElement *data
assert len(key) == self.key_size
data = <FuseVersionsElement *>hashindex_get(self.index, <char *>key)
if data == NULL:
raise KeyError(key)
return _le32toh(data.version), PyBytes_FromStringAndSize(data.hash, 16)
def __setitem__(self, key, value):
cdef FuseVersionsElement data
assert len(key) == self.key_size
data.version = value[0]
assert data.version <= _MAX_VALUE, "maximum number of versions reached"
if not PyBytes_CheckExact(value[1]) or PyBytes_GET_SIZE(value[1]) != 16:
raise TypeError("Expected bytes of length 16 for second value")
memcpy(data.hash, PyBytes_AS_STRING(value[1]), 16)
data.version = _htole32(data.version)
if not hashindex_set(self.index, <char *>key, <void *> &data):
raise Exception('hashindex_set failed')
def __contains__(self, key):
assert len(key) == self.key_size
return hashindex_get(self.index, <char *>key) != NULL
cdef class NSIndex(IndexBase):
value_size = 8

View File

@ -131,13 +131,13 @@ class MandatoryFeatureUnsupported(Error):
def check_extension_modules():
from . import platform, compress, item
if hashindex.API_VERSION != '1.1_06':
if hashindex.API_VERSION != '1.1_07':
raise ExtensionModuleError
if chunker.API_VERSION != '1.1_01':
raise ExtensionModuleError
if compress.API_VERSION != '1.1_03':
raise ExtensionModuleError
if borg.crypto.low_level.API_VERSION != '1.1_01':
if borg.crypto.low_level.API_VERSION != '1.1_02':
raise ExtensionModuleError
if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01':
raise ExtensionModuleError

View File

@ -2101,11 +2101,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions'):
path = os.path.join(mountpoint, 'input', 'test') # filename shows up as directory ...
files = os.listdir(path)
assert all(f.startswith('test.') for f in files) # ... with files test.xxxxxxxx in there
assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there
assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files}
if are_hardlinks_supported():
st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00000000'))
st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00000000'))
st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001'))
st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001'))
assert st1.st_ino == st2.st_ino
@unittest.skipUnless(has_llfuse, 'llfuse not installed')