From e398d5f632fc04c596fee06cec167103e93182a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Tue, 18 Mar 2014 21:42:03 +0100 Subject: [PATCH] Include "all archives" size information in "--stats" output. Closes #54 --- CHANGES | 8 ++++++++ attic/_hashindex.c | 20 ++++++++++++++++++++ attic/archiver.py | 8 ++++---- attic/hashindex.pyx | 6 ++++++ attic/helpers.py | 27 ++++++++++++++++----------- 5 files changed, 54 insertions(+), 15 deletions(-) diff --git a/CHANGES b/CHANGES index 0a15d795f..cfb8e00ae 100644 --- a/CHANGES +++ b/CHANGES @@ -3,6 +3,14 @@ Attic Changelog Here you can see the full list of changes between each Attic release. +Version 0.12 +------------ + +(feature release, released on X) + +- Include "all archives" size information in "--stats" output. (#54) +- Switch to SI units (Power of 1000 instead 1024) when printing file sizes + Version 0.11 ------------ diff --git a/attic/_hashindex.c b/attic/_hashindex.c index ae6737e41..0e04f6bb7 100644 --- a/attic/_hashindex.c +++ b/attic/_hashindex.c @@ -389,3 +389,23 @@ hashindex_get_size(HashIndex *index) return index->num_entries; } +static void +hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *total_unique_size, long long *total_unique_csize) +{ + int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0; + const int32_t *values; + void *key = NULL; + + while((key = hashindex_next_key(index, key))) { + values = key + 32; + unique_size += values[1]; + unique_csize += values[2]; + size += values[0] * values[1]; + csize += values[0] * values[2]; + } + *total_size = size; + *total_csize = csize; + *total_unique_size = unique_size; + *total_unique_csize = unique_csize; +} + diff --git a/attic/archiver.py b/attic/archiver.py index 6da0cae79..4037ce6f0 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -130,14 +130,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if args.stats: t = datetime.now() diff = t - t0 - print('-' * 40) + print('-' * 70) print('Archive name: %s' % args.archive.archive) print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii')) print('Start time: %s' % t0.strftime('%c')) print('End time: %s' % t.strftime('%c')) print('Duration: %s' % format_timedelta(diff)) - archive.stats.print_() - print('-' * 40) + archive.stats.print_(cache) + print('-' * 70) return self.exit_code def _process(self, archive, cache, excludes, skip_inodes, path, restrict_dev): @@ -297,7 +297,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print('Username:', archive.metadata[b'username']) print('Time: %s' % to_localtime(archive.ts).strftime('%c')) print('Command line:', remove_surrogates(' '.join(archive.metadata[b'cmdline']))) - stats.print_() + stats.print_(cache) return self.exit_code def do_prune(self, args): diff --git a/attic/hashindex.pyx b/attic/hashindex.pyx index 743071248..53afbe338 100644 --- a/attic/hashindex.pyx +++ b/attic/hashindex.pyx @@ -8,6 +8,7 @@ cdef extern from "_hashindex.c": HashIndex *hashindex_open(char *path, int readonly) HashIndex *hashindex_create(char *path, int capacity, int key_size, int value_size) + void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize) int hashindex_get_size(HashIndex *index) int hashindex_clear(HashIndex *index) int hashindex_close(HashIndex *index) @@ -174,6 +175,11 @@ cdef class ChunkIndex(IndexBase): iter.key = key - 32 return iter + def summarize(self): + cdef long long total_size, total_csize, unique_size, unique_csize + hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize) + return total_size, total_csize, unique_size, unique_csize + cdef class ChunkKeyIterator: cdef ChunkIndex idx diff --git a/attic/helpers.py b/attic/helpers.py index 80ab77a3b..953d23279 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -132,11 +132,14 @@ class Statistics: if unique: self.usize += csize - def print_(self): + def print_(self, cache): + total_size, total_csize, unique_size, unique_csize = cache.chunks.summarize() print('Number of files: %d' % self.nfiles) - print('Original size: %d (%s)' % (self.osize, format_file_size(self.osize))) - print('Compressed size: %s (%s)' % (self.csize, format_file_size(self.csize))) - print('Unique data: %d (%s)' % (self.usize, format_file_size(self.usize))) + print() + print(' Original size Compressed size Deduplicated size') + print('This archive: %18s %18s %18s' % (format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize))) + print('All archives: %18s %18s %18s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize))) + def get_keys_dir(): @@ -278,14 +281,16 @@ def format_file_mode(mod): def format_file_size(v): """Format file size into a human friendly format """ - if v > 1024 * 1024 * 1024: - return '%.2f GB' % (v / 1024. / 1024. / 1024.) - elif v > 1024 * 1024: - return '%.2f MB' % (v / 1024. / 1024.) - elif v > 1024: - return '%.2f kB' % (v / 1024.) + if v > 10**12: + return '%.2f TB' % (v / 10**12) + elif v > 10**9: + return '%.2f GB' % (v / 10**9) + elif v > 10**6: + return '%.2f MB' % (v / 10**6) + elif v > 10**3: + return '%.2f kB' % (v / 10**3) else: - return '%d B' % v + return '%d B ' % v def format_archive(archive):