cache_sync: compute size/count stats, borg info: consider part files (#4286)

cache_sync: compute size/count stats, borg info: consider part files

fixes #3522
This commit is contained in:
TW 2019-02-04 03:26:45 +01:00 committed by GitHub
parent 50666c3e44
commit c3f40de606
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 132 additions and 22 deletions

View File

@ -510,10 +510,16 @@ Utilization of max. archive size: {csize_max:.0%}
add(id)
data = self.key.decrypt(id, chunk)
sync.feed(data)
stats = Statistics()
stats.osize, stats.csize, unique_size, stats.usize, unique_chunks, chunks = archive_index.stats_against(cache.chunks)
stats.nfiles = sync.num_files
unique_csize = archive_index.stats_against(cache.chunks)[3]
pi.finish()
stats = Statistics()
stats.nfiles = sync.num_files_totals if self.consider_part_files \
else sync.num_files_totals - sync.num_files_parts
stats.osize = sync.size_totals if self.consider_part_files \
else sync.size_totals - sync.size_parts
stats.csize = sync.csize_totals if self.consider_part_files \
else sync.csize_totals - sync.csize_parts
stats.usize = unique_csize # the part files use same chunks as the full file
return stats
@contextmanager

View File

@ -38,7 +38,12 @@ cache_sync_init(HashIndex *chunks)
unpack_init(&ctx->ctx);
/* needs to be set only once */
ctx->ctx.user.chunks = chunks;
ctx->ctx.user.num_files = 0;
ctx->ctx.user.parts.size = 0;
ctx->ctx.user.parts.csize = 0;
ctx->ctx.user.parts.num_files = 0;
ctx->ctx.user.totals.size = 0;
ctx->ctx.user.totals.csize = 0;
ctx->ctx.user.totals.num_files = 0;
ctx->buf = NULL;
ctx->head = 0;
ctx->tail = 0;
@ -63,9 +68,39 @@ cache_sync_error(const CacheSyncCtx *ctx)
}
static uint64_t
cache_sync_num_files(const CacheSyncCtx *ctx)
cache_sync_num_files_totals(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.num_files;
return ctx->ctx.user.totals.num_files;
}
static uint64_t
cache_sync_num_files_parts(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.parts.num_files;
}
static uint64_t
cache_sync_size_totals(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.totals.size;
}
static uint64_t
cache_sync_size_parts(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.parts.size;
}
static uint64_t
cache_sync_csize_totals(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.totals.csize;
}
static uint64_t
cache_sync_csize_parts(const CacheSyncCtx *ctx)
{
return ctx->ctx.user.parts.csize;
}
/**

View File

@ -40,7 +40,7 @@
#endif
typedef struct unpack_user {
/* Item.chunks is at the top level; we don't care about anything else,
/* Item.chunks and Item.part are at the top level; we don't care about anything else,
* only need to track the current level to navigate arbitrary and unknown structure.
* To discern keys from everything else on the top level we use expect_map_item_end.
*/
@ -50,8 +50,6 @@ typedef struct unpack_user {
HashIndex *chunks;
uint64_t num_files;
/*
* We don't care about most stuff. This flag tells us whether we're at the chunks structure,
* meaning:
@ -59,16 +57,23 @@ typedef struct unpack_user {
* ^-HERE-^
*/
int inside_chunks;
/* is this item a .part file (created for checkpointing inside files)? */
int part;
/* does this item have a chunks list in it? */
int has_chunks;
enum {
/* the next thing is a map key at the Item root level,
* and it might be the "chunks" key we're looking for */
expect_chunks_map_key,
* and it might be the "chunks" or "part" key we're looking for */
expect_map_key,
/* blocking state to expect_chunks_map_key
/* blocking state to expect_map_key
* { 'stuff': <complex and arbitrary structure>, 'chunks': [
* ecmk -> emie -> -> -> -> ecmk ecb eeboce
* emk -> emie -> -> -> -> emk ecb eeboce
* (nested containers are tracked via level)
* ecmk=expect_chunks_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin,
* emk=expect_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin,
* eeboce=expect_entry_begin_or_chunks_end
*/
expect_map_item_end,
@ -95,11 +100,28 @@ typedef struct unpack_user {
expect_item_begin
} expect;
/* collect values here for current chunklist entry */
struct {
char key[32];
uint32_t csize;
uint32_t size;
} current;
/* summing up chunks sizes here within a single item */
struct {
uint64_t size, csize;
} item;
/* total sizes and files count coming from all files */
struct {
uint64_t size, csize, num_files;
} totals;
/* total sizes and files count coming from part files */
struct {
uint64_t size, csize, num_files;
} parts;
} unpack_user;
struct unpack_context;
@ -107,7 +129,7 @@ typedef struct unpack_context unpack_context;
typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off);
#define UNEXPECTED(what) \
if(u->inside_chunks || u->expect == expect_chunks_map_key) { \
if(u->inside_chunks || u->expect == expect_map_key) { \
SET_LAST_ERROR("Unexpected object: " what); \
return -1; \
}
@ -270,6 +292,8 @@ static inline int unpack_callback_array_end(unpack_user* u)
return -1;
}
}
u->item.size += u->current.size;
u->item.csize += u->current.csize;
u->expect = expect_entry_begin_or_chunks_end;
break;
@ -302,7 +326,11 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n)
return -1;
}
/* This begins a new Item */
u->expect = expect_chunks_map_key;
u->expect = expect_map_key;
u->part = 0;
u->has_chunks = 0;
u->item.size = 0;
u->item.csize = 0;
}
if(u->inside_chunks) {
@ -321,7 +349,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current)
if(u->level == 1) {
switch(u->expect) {
case expect_map_item_end:
u->expect = expect_chunks_map_key;
u->expect = expect_map_key;
break;
default:
SET_LAST_ERROR("Unexpected map item");
@ -338,6 +366,19 @@ static inline int unpack_callback_map_end(unpack_user* u)
SET_LAST_ERROR("Unexpected map end");
return -1;
}
if(u->level == 0) {
/* This ends processing of an Item */
if(u->has_chunks) {
if(u->part) {
u->parts.num_files += 1;
u->parts.size += u->item.size;
u->parts.csize += u->item.csize;
}
u->totals.num_files += 1;
u->totals.size += u->item.size;
u->totals.csize += u->item.csize;
}
}
return 0;
}
@ -356,11 +397,14 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
memcpy(u->current.key, p, 32);
u->expect = expect_size;
break;
case expect_chunks_map_key:
case expect_map_key:
if(length == 6 && !memcmp("chunks", p, 6)) {
u->expect = expect_chunks_begin;
u->inside_chunks = 1;
u->num_files++;
u->has_chunks = 1;
} else if(length == 4 && !memcmp("part", p, 4)) {
u->expect = expect_map_item_end;
u->part = 1;
} else {
u->expect = expect_map_item_end;
}

View File

@ -45,7 +45,12 @@ cdef extern from "cache_sync/cache_sync.c":
CacheSyncCtx *cache_sync_init(HashIndex *chunks)
const char *cache_sync_error(const CacheSyncCtx *ctx)
uint64_t cache_sync_num_files(const CacheSyncCtx *ctx)
uint64_t cache_sync_num_files_totals(const CacheSyncCtx *ctx)
uint64_t cache_sync_num_files_parts(const CacheSyncCtx *ctx)
uint64_t cache_sync_size_totals(const CacheSyncCtx *ctx)
uint64_t cache_sync_size_parts(const CacheSyncCtx *ctx)
uint64_t cache_sync_csize_totals(const CacheSyncCtx *ctx)
uint64_t cache_sync_csize_parts(const CacheSyncCtx *ctx)
int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length)
void cache_sync_free(CacheSyncCtx *ctx)
@ -526,5 +531,25 @@ cdef class CacheSynchronizer:
raise ValueError('cache_sync_feed failed: ' + error.decode('ascii'))
@property
def num_files(self):
return cache_sync_num_files(self.sync)
def num_files_totals(self):
return cache_sync_num_files_totals(self.sync)
@property
def num_files_parts(self):
return cache_sync_num_files_parts(self.sync)
@property
def size_totals(self):
return cache_sync_size_totals(self.sync)
@property
def size_parts(self):
return cache_sync_size_parts(self.sync)
@property
def csize_totals(self):
return cache_sync_csize_totals(self.sync)
@property
def csize_parts(self):
return cache_sync_csize_parts(self.sync)