CacheSynchronizer

This commit is contained in:
Marian Beermann 2017-03-07 15:13:59 +01:00
parent 47894072ad
commit 740898d83b
5 changed files with 198 additions and 15 deletions

View File

@ -600,7 +600,7 @@ if not on_rtd:
ext_modules += [
Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
Extension('borg.crypto.low_level', [crypto_ll_source], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
Extension('borg.hashindex', [hashindex_source]),
Extension('borg.hashindex', [hashindex_source], libraries=['msgpackc']),
Extension('borg.item', [item_source]),
Extension('borg.algorithms.chunker', [chunker_source]),
Extension('borg.algorithms.checksums', [checksums_source]),

159
src/borg/_cache.c Normal file
View File

@ -0,0 +1,159 @@
#include <msgpack.h>
// 2**32 - 1025
#define _MAX_VALUE ( (uint32_t) 4294966271 )
#define MIN(x, y) ((x) < (y) ? (x): (y))
typedef struct {
HashIndex *chunks;
msgpack_unpacker unpacker;
msgpack_unpacked unpacked;
const char *error;
} CacheSyncCtx;
static CacheSyncCtx *
cache_sync_init(HashIndex *chunks)
{
CacheSyncCtx *ctx;
if (!(ctx = malloc(sizeof(CacheSyncCtx)))) {
return NULL;
}
ctx->chunks = chunks;
ctx->error = NULL;
if(!msgpack_unpacker_init(&ctx->unpacker, MSGPACK_UNPACKER_INIT_BUFFER_SIZE)) {
free(ctx);
return NULL;
}
msgpack_unpacked_init(&ctx->unpacked);
return ctx;
}
static void
cache_sync_free(CacheSyncCtx *ctx)
{
msgpack_unpacker_destroy(&ctx->unpacker);
msgpack_unpacked_destroy(&ctx->unpacked);
free(ctx);
}
static const char *
cache_sync_error(CacheSyncCtx *ctx)
{
return ctx->error;
}
static int
cache_process_chunks(CacheSyncCtx *ctx, msgpack_object_array *array)
{
uint32_t i;
const char *key;
uint32_t cache_values[3];
uint32_t *cache_entry;
uint64_t refcount;
msgpack_object *current;
for (i = 0; i < array->size; i++) {
current = &array->ptr[i];
if (current->type != MSGPACK_OBJECT_ARRAY || current->via.array.size != 3
|| current->via.array.ptr[0].type != MSGPACK_OBJECT_STR || current->via.array.ptr[0].via.str.size != 32
|| current->via.array.ptr[1].type != MSGPACK_OBJECT_POSITIVE_INTEGER
|| current->via.array.ptr[2].type != MSGPACK_OBJECT_POSITIVE_INTEGER) {
ctx->error = "Malformed chunk list entry";
return 0;
}
key = current->via.array.ptr[0].via.str.ptr;
cache_entry = (uint32_t*) hashindex_get(ctx->chunks, key);
if (cache_entry) {
refcount = _le32toh(cache_entry[0]);
refcount += 1;
cache_entry[0] = _htole32(MIN(refcount, _MAX_VALUE));
} else {
/* refcount, size, csize */
cache_values[0] = 1;
cache_values[1] = current->via.array.ptr[1].via.u64;
cache_values[2] = current->via.array.ptr[2].via.u64;
if (!hashindex_set(ctx->chunks, key, cache_values)) {
ctx->error = "hashindex_set failed";
return 0;
}
}
}
return 1;
}
/**
* feed data to the cache synchronizer
* 0 = abort, 1 = continue
* abort is a regular condition, check cache_sync_error
*/
static int
cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length)
{
msgpack_unpack_return unpack_status;
/* grow buffer if necessary */
if (msgpack_unpacker_buffer_capacity(&ctx->unpacker) < length) {
if (!msgpack_unpacker_reserve_buffer(&ctx->unpacker, length)) {
return 0;
}
}
memcpy(msgpack_unpacker_buffer(&ctx->unpacker), data, length);
msgpack_unpacker_buffer_consumed(&ctx->unpacker, length);
do {
unpack_status = msgpack_unpacker_next(&ctx->unpacker, &ctx->unpacked);
switch (unpack_status) {
case MSGPACK_UNPACK_SUCCESS:
{
uint32_t i;
msgpack_object *item = &ctx->unpacked.data;
msgpack_object_kv *current;
if (item->type != MSGPACK_OBJECT_MAP) {
ctx->error = "Unexpected data type in item stream";
return 0;
}
for (i = 0; i < item->via.map.size; i++) {
current = &item->via.map.ptr[i];
if (current->key.type != MSGPACK_OBJECT_STR) {
ctx->error = "Invalid key data type in item";
return 0;
}
if (current->key.via.str.size == 6
&& !memcmp(current->key.via.str.ptr, "chunks", 6)) {
if (current->val.type != MSGPACK_OBJECT_ARRAY) {
ctx->error = "Unexpected value type of item chunks";
return 0;
}
if (!cache_process_chunks(ctx, &current->val.via.array)) {
return 0;
}
}
}
}
break;
case MSGPACK_UNPACK_PARSE_ERROR:
ctx->error = "Malformed msgpack";
return 0;
default:
break;
}
} while (unpack_status != MSGPACK_UNPACK_CONTINUE);
return 1;
}

View File

@ -12,7 +12,7 @@ from .logger import create_logger
logger = create_logger()
from .constants import CACHE_README
from .hashindex import ChunkIndex, ChunkIndexEntry
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
from .helpers import Location
from .helpers import Error
from .helpers import get_cache_dir, get_security_dir
@ -571,17 +571,11 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
if archive.version != 1:
raise Exception('Unknown archive metadata version')
unpacker = msgpack.Unpacker()
sync = CacheSynchronizer(chunk_idx)
for item_id, chunk in zip(archive.items, repository.get_many(archive.items)):
data = key.decrypt(item_id, chunk)
chunk_idx.add(item_id, 1, len(data), len(chunk))
unpacker.feed(data)
for item in unpacker:
if not isinstance(item, dict):
logger.error('Error: Did not get expected metadata dict - archive corrupted!')
continue # XXX: continue?!
for chunk_id, size, csize in item.get(b'chunks', []):
chunk_idx.add(chunk_id, 1, size, csize)
sync.feed(data)
if self.do_cache:
fn = mkpath(archive_id)
fn_tmp = mkpath(archive_id, suffix='.tmp')

View File

@ -8,7 +8,7 @@ from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
from libc.errno cimport errno
from cpython.exc cimport PyErr_SetFromErrnoWithFilename
API_VERSION = '1.1_01'
API_VERSION = '1.1_02'
cdef extern from "_hashindex.c":
@ -31,6 +31,18 @@ cdef extern from "_hashindex.c":
double HASH_MAX_LOAD
cdef extern from "_cache.c":
ctypedef struct CacheSyncCtx:
pass
CacheSyncCtx *cache_sync_init(HashIndex *chunks)
const char *cache_sync_error(CacheSyncCtx *ctx)
int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length)
void cache_sync_free(CacheSyncCtx *ctx)
uint32_t _MAX_VALUE
cdef _NoDefault = object()
"""
@ -50,9 +62,6 @@ AssertionError is raised instead.
assert UINT32_MAX == 2**32-1
# module-level constant because cdef's in classes can't have default values
cdef uint32_t _MAX_VALUE = 2**32-1025
assert _MAX_VALUE % 2 == 1
@ -375,3 +384,24 @@ cdef class ChunkKeyIterator:
cdef uint32_t refcount = _le32toh(value[0])
assert refcount <= _MAX_VALUE, "invalid reference count"
return (<char *>self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]), _le32toh(value[2]))
cdef class CacheSynchronizer:
cdef ChunkIndex chunks
cdef CacheSyncCtx *sync
def __cinit__(self, chunks):
self.chunks = chunks
self.sync = cache_sync_init(self.chunks.index)
if not self.sync:
raise Exception('cache_sync_init failed')
def __dealloc__(self):
if self.sync:
cache_sync_free(self.sync)
def feed(self, chunk):
if not cache_sync_feed(self.sync, <char *>chunk, len(chunk)):
error = cache_sync_error(self.sync)
if error is not None:
raise Exception('cache_sync_feed failed: ' + error.decode('ascii'))

View File

@ -126,7 +126,7 @@ def check_python():
def check_extension_modules():
from . import platform, compress, item
if hashindex.API_VERSION != '1.1_01':
if hashindex.API_VERSION != '1.1_02':
raise ExtensionModuleError
if chunker.API_VERSION != '1.1_01':
raise ExtensionModuleError