From c3f40de6067edbbe89a9a9c2eeaf738853818a8d Mon Sep 17 00:00:00 2001 From: TW Date: Mon, 4 Feb 2019 03:26:45 +0100 Subject: [PATCH] cache_sync: compute size/count stats, borg info: consider part files (#4286) cache_sync: compute size/count stats, borg info: consider part files fixes #3522 --- src/borg/archive.py | 12 ++++-- src/borg/cache_sync/cache_sync.c | 41 +++++++++++++++++-- src/borg/cache_sync/unpack.h | 70 ++++++++++++++++++++++++++------ src/borg/hashindex.pyx | 31 ++++++++++++-- 4 files changed, 132 insertions(+), 22 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 88f489ef5..fb47c149a 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -510,10 +510,16 @@ def add(id): add(id) data = self.key.decrypt(id, chunk) sync.feed(data) - stats = Statistics() - stats.osize, stats.csize, unique_size, stats.usize, unique_chunks, chunks = archive_index.stats_against(cache.chunks) - stats.nfiles = sync.num_files + unique_csize = archive_index.stats_against(cache.chunks)[3] pi.finish() + stats = Statistics() + stats.nfiles = sync.num_files_totals if self.consider_part_files \ + else sync.num_files_totals - sync.num_files_parts + stats.osize = sync.size_totals if self.consider_part_files \ + else sync.size_totals - sync.size_parts + stats.csize = sync.csize_totals if self.consider_part_files \ + else sync.csize_totals - sync.csize_parts + stats.usize = unique_csize # the part files use same chunks as the full file return stats @contextmanager diff --git a/src/borg/cache_sync/cache_sync.c b/src/borg/cache_sync/cache_sync.c index 53b615522..1a2cfb0f2 100644 --- a/src/borg/cache_sync/cache_sync.c +++ b/src/borg/cache_sync/cache_sync.c @@ -38,7 +38,12 @@ cache_sync_init(HashIndex *chunks) unpack_init(&ctx->ctx); /* needs to be set only once */ ctx->ctx.user.chunks = chunks; - ctx->ctx.user.num_files = 0; + ctx->ctx.user.parts.size = 0; + ctx->ctx.user.parts.csize = 0; + ctx->ctx.user.parts.num_files = 0; + ctx->ctx.user.totals.size = 0; + ctx->ctx.user.totals.csize = 0; + ctx->ctx.user.totals.num_files = 0; ctx->buf = NULL; ctx->head = 0; ctx->tail = 0; @@ -63,9 +68,39 @@ cache_sync_error(const CacheSyncCtx *ctx) } static uint64_t -cache_sync_num_files(const CacheSyncCtx *ctx) +cache_sync_num_files_totals(const CacheSyncCtx *ctx) { - return ctx->ctx.user.num_files; + return ctx->ctx.user.totals.num_files; +} + +static uint64_t +cache_sync_num_files_parts(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.parts.num_files; +} + +static uint64_t +cache_sync_size_totals(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.totals.size; +} + +static uint64_t +cache_sync_size_parts(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.parts.size; +} + +static uint64_t +cache_sync_csize_totals(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.totals.csize; +} + +static uint64_t +cache_sync_csize_parts(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.parts.csize; } /** diff --git a/src/borg/cache_sync/unpack.h b/src/borg/cache_sync/unpack.h index 56eecb376..f25d38eb9 100644 --- a/src/borg/cache_sync/unpack.h +++ b/src/borg/cache_sync/unpack.h @@ -40,7 +40,7 @@ #endif typedef struct unpack_user { - /* Item.chunks is at the top level; we don't care about anything else, + /* Item.chunks and Item.part are at the top level; we don't care about anything else, * only need to track the current level to navigate arbitrary and unknown structure. * To discern keys from everything else on the top level we use expect_map_item_end. */ @@ -50,8 +50,6 @@ typedef struct unpack_user { HashIndex *chunks; - uint64_t num_files; - /* * We don't care about most stuff. This flag tells us whether we're at the chunks structure, * meaning: @@ -59,16 +57,23 @@ typedef struct unpack_user { * ^-HERE-^ */ int inside_chunks; + + /* is this item a .part file (created for checkpointing inside files)? */ + int part; + + /* does this item have a chunks list in it? */ + int has_chunks; + enum { /* the next thing is a map key at the Item root level, - * and it might be the "chunks" key we're looking for */ - expect_chunks_map_key, + * and it might be the "chunks" or "part" key we're looking for */ + expect_map_key, - /* blocking state to expect_chunks_map_key + /* blocking state to expect_map_key * { 'stuff': , 'chunks': [ - * ecmk -> emie -> -> -> -> ecmk ecb eeboce + * emk -> emie -> -> -> -> emk ecb eeboce * (nested containers are tracked via level) - * ecmk=expect_chunks_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin, + * emk=expect_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin, * eeboce=expect_entry_begin_or_chunks_end */ expect_map_item_end, @@ -95,11 +100,28 @@ typedef struct unpack_user { expect_item_begin } expect; + /* collect values here for current chunklist entry */ struct { char key[32]; uint32_t csize; uint32_t size; } current; + + /* summing up chunks sizes here within a single item */ + struct { + uint64_t size, csize; + } item; + + /* total sizes and files count coming from all files */ + struct { + uint64_t size, csize, num_files; + } totals; + + /* total sizes and files count coming from part files */ + struct { + uint64_t size, csize, num_files; + } parts; + } unpack_user; struct unpack_context; @@ -107,7 +129,7 @@ typedef struct unpack_context unpack_context; typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off); #define UNEXPECTED(what) \ - if(u->inside_chunks || u->expect == expect_chunks_map_key) { \ + if(u->inside_chunks || u->expect == expect_map_key) { \ SET_LAST_ERROR("Unexpected object: " what); \ return -1; \ } @@ -270,6 +292,8 @@ static inline int unpack_callback_array_end(unpack_user* u) return -1; } } + u->item.size += u->current.size; + u->item.csize += u->current.csize; u->expect = expect_entry_begin_or_chunks_end; break; @@ -302,7 +326,11 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n) return -1; } /* This begins a new Item */ - u->expect = expect_chunks_map_key; + u->expect = expect_map_key; + u->part = 0; + u->has_chunks = 0; + u->item.size = 0; + u->item.csize = 0; } if(u->inside_chunks) { @@ -321,7 +349,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current) if(u->level == 1) { switch(u->expect) { case expect_map_item_end: - u->expect = expect_chunks_map_key; + u->expect = expect_map_key; break; default: SET_LAST_ERROR("Unexpected map item"); @@ -338,6 +366,19 @@ static inline int unpack_callback_map_end(unpack_user* u) SET_LAST_ERROR("Unexpected map end"); return -1; } + if(u->level == 0) { + /* This ends processing of an Item */ + if(u->has_chunks) { + if(u->part) { + u->parts.num_files += 1; + u->parts.size += u->item.size; + u->parts.csize += u->item.csize; + } + u->totals.num_files += 1; + u->totals.size += u->item.size; + u->totals.csize += u->item.csize; + } + } return 0; } @@ -356,11 +397,14 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* memcpy(u->current.key, p, 32); u->expect = expect_size; break; - case expect_chunks_map_key: + case expect_map_key: if(length == 6 && !memcmp("chunks", p, 6)) { u->expect = expect_chunks_begin; u->inside_chunks = 1; - u->num_files++; + u->has_chunks = 1; + } else if(length == 4 && !memcmp("part", p, 4)) { + u->expect = expect_map_item_end; + u->part = 1; } else { u->expect = expect_map_item_end; } diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 666d8a0e5..8c9de6328 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -45,7 +45,12 @@ cdef extern from "cache_sync/cache_sync.c": CacheSyncCtx *cache_sync_init(HashIndex *chunks) const char *cache_sync_error(const CacheSyncCtx *ctx) - uint64_t cache_sync_num_files(const CacheSyncCtx *ctx) + uint64_t cache_sync_num_files_totals(const CacheSyncCtx *ctx) + uint64_t cache_sync_num_files_parts(const CacheSyncCtx *ctx) + uint64_t cache_sync_size_totals(const CacheSyncCtx *ctx) + uint64_t cache_sync_size_parts(const CacheSyncCtx *ctx) + uint64_t cache_sync_csize_totals(const CacheSyncCtx *ctx) + uint64_t cache_sync_csize_parts(const CacheSyncCtx *ctx) int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length) void cache_sync_free(CacheSyncCtx *ctx) @@ -526,5 +531,25 @@ cdef class CacheSynchronizer: raise ValueError('cache_sync_feed failed: ' + error.decode('ascii')) @property - def num_files(self): - return cache_sync_num_files(self.sync) + def num_files_totals(self): + return cache_sync_num_files_totals(self.sync) + + @property + def num_files_parts(self): + return cache_sync_num_files_parts(self.sync) + + @property + def size_totals(self): + return cache_sync_size_totals(self.sync) + + @property + def size_parts(self): + return cache_sync_size_parts(self.sync) + + @property + def csize_totals(self): + return cache_sync_csize_totals(self.sync) + + @property + def csize_parts(self): + return cache_sync_csize_parts(self.sync)