From e569595974da46cead38099274d286df9288e06f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 23 Feb 2019 09:44:33 +0100 Subject: [PATCH 1/2] include size/csize/nfiles[_parts] stats into archive, fixes #3241 --- src/borg/archive.py | 37 ++++++++++++++++++++++++++++--------- src/borg/archiver.py | 4 ++-- src/borg/cache.py | 8 ++++---- src/borg/constants.py | 2 +- src/borg/helpers/checks.py | 2 +- src/borg/item.pyx | 8 +++++++- 6 files changed, 43 insertions(+), 18 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 21ed7b0cd..e6f4bd2ac 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -54,13 +54,20 @@ class Statistics: def __init__(self, output_json=False): self.output_json = output_json self.osize = self.csize = self.usize = self.nfiles = 0 + self.osize_parts = self.csize_parts = self.usize_parts = self.nfiles_parts = 0 self.last_progress = 0 # timestamp when last progress was shown - def update(self, size, csize, unique): - self.osize += size - self.csize += csize - if unique: - self.usize += csize + def update(self, size, csize, unique, part=False): + if not part: + self.osize += size + self.csize += csize + if unique: + self.usize += csize + else: + self.osize_parts += size + self.csize_parts += csize + if unique: + self.usize_parts += csize def __add__(self, other): if not isinstance(other, Statistics): @@ -70,6 +77,10 @@ class Statistics: stats.csize = self.csize + other.csize stats.usize = self.usize + other.usize stats.nfiles = self.nfiles + other.nfiles + stats.osize_parts = self.osize_parts + other.osize_parts + stats.csize_parts = self.csize_parts + other.csize_parts + stats.usize_parts = self.usize_parts + other.usize_parts + stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts return stats summary = "{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}" @@ -492,7 +503,7 @@ Utilization of max. archive size: {csize_max:.0%} del self.manifest.archives[self.checkpoint_name] self.cache.chunk_decref(self.id, self.stats) - def save(self, name=None, comment=None, timestamp=None, additional_metadata=None): + def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None): name = name or self.name if name in self.manifest.archives: raise self.AlreadyExists(name) @@ -518,6 +529,14 @@ Utilization of max. archive size: {csize_max:.0%} 'time_end': end.strftime(ISO_FORMAT), 'chunker_params': self.chunker_params, } + if stats is not None: + metadata.update({ + 'size': stats.osize, + 'csize': stats.csize, + 'nfiles': stats.nfiles, + 'size_parts': stats.osize_parts, + 'csize_parts': stats.csize_parts, + 'nfiles_parts': stats.nfiles_parts}) metadata.update(additional_metadata or {}) metadata = ArchiveItem(metadata) data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b'archive') @@ -1057,9 +1076,9 @@ class ChunksProcessor: # if we created part files, we have referenced all chunks from the part files, # but we also will reference the same chunks also from the final, complete file: - dummy_stats = Statistics() # do not count this data volume twice for chunk in item.chunks: - cache.chunk_incref(chunk.id, dummy_stats, size=chunk.size) + cache.chunk_incref(chunk.id, stats, size=chunk.size, part=True) + stats.nfiles_parts += part_number - 1 class FilesystemObjectProcessors: @@ -1882,7 +1901,7 @@ class ArchiveRecreater: return if comment is None: comment = archive.metadata.get('comment', '') - target.save(comment=comment, additional_metadata={ + target.save(comment=comment, stats=target.stats, additional_metadata={ # keep some metadata as in original archive: 'time': archive.metadata.time, 'time_end': archive.metadata.get('time_end') or archive.metadata.time, diff --git a/src/borg/archiver.py b/src/borg/archiver.py index c07dee77b..4fa36a92b 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -502,11 +502,11 @@ class Archiver: keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) if not dry_run: - archive.save(comment=args.comment, timestamp=args.timestamp) if args.progress: archive.stats.show_progress(final=True) - args.stats |= args.json archive.stats += fso.stats + archive.save(comment=args.comment, timestamp=args.timestamp, stats=archive.stats) + args.stats |= args.json if args.stats: if args.json: json_print(basic_json_data(manifest, cache=cache, extra={ diff --git a/src/borg/cache.py b/src/borg/cache.py index 182a68649..33bb9db59 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -903,11 +903,11 @@ class LocalCache(CacheStatsMixin): id, stored_size, size)) return refcount - def chunk_incref(self, id, stats, size=None): + def chunk_incref(self, id, stats, size=None, part=False): if not self.txn_active: self.begin_txn() count, _size, csize = self.chunks.incref(id) - stats.update(_size, csize, False) + stats.update(_size, csize, False, part=part) return ChunkListEntry(id, _size, csize) def chunk_decref(self, id, stats, wait=True): @@ -1047,7 +1047,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown""" self.chunks[id] = entry._replace(size=size) return entry.refcount - def chunk_incref(self, id, stats, size=None): + def chunk_incref(self, id, stats, size=None, part=False): if not self._txn_active: self.begin_txn() count, _size, csize = self.chunks.incref(id) @@ -1055,7 +1055,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown""" # size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to. size = _size or size assert size - stats.update(size, csize, False) + stats.update(size, csize, False, part=part) return ChunkListEntry(id, size, csize) def chunk_decref(self, id, stats, wait=True): diff --git a/src/borg/constants.py b/src/borg/constants.py index eecb344dd..a2eff43fa 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -12,7 +12,7 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us 'comment', 'chunker_params', 'recreate_cmdline', 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2 - ]) + 'size', 'csize', 'nfiles', 'size_parts', 'csize_parts', 'nfiles_parts', ]) # this is the set of keys that are always present in archives: REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ]) diff --git a/src/borg/helpers/checks.py b/src/borg/helpers/checks.py index 5e4cecffd..475544356 100644 --- a/src/borg/helpers/checks.py +++ b/src/borg/helpers/checks.py @@ -35,5 +35,5 @@ def check_extension_modules(): raise ExtensionModuleError if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.2_03': raise ExtensionModuleError - if item.API_VERSION != '1.1_05': + if item.API_VERSION != '1.1_06': raise ExtensionModuleError diff --git a/src/borg/item.pyx b/src/borg/item.pyx index ee67ee0d2..43f0d88f6 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -12,7 +12,7 @@ cdef extern from "_item.c": object _optr_to_object(object bytes) -API_VERSION = '1.1_05' +API_VERSION = '1.1_06' class PropDict: @@ -368,6 +368,12 @@ class ArchiveItem(PropDict): recreate_source_id = PropDict._make_property('recreate_source_id', bytes) recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples + size = PropDict._make_property('size', int) + csize = PropDict._make_property('csize', int) + nfiles = PropDict._make_property('nfiles', int) + size_parts = PropDict._make_property('size_parts', int) + csize_parts = PropDict._make_property('csize_parts', int) + nfiles_parts = PropDict._make_property('nfiles_parts', int) class ManifestItem(PropDict): From 6809f6f7fa54349c6add30abfc911441de9c7b13 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 23 Feb 2019 14:56:53 +0100 Subject: [PATCH 2/2] calc_stats: use archive stats metadata, if available by default, we still have to compute unique_csize the slow way, but the code offers want_unique=False param to not compute it. --- src/borg/archive.py | 60 +++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index e6f4bd2ac..ceacd3663 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -549,30 +549,48 @@ Utilization of max. archive size: {csize_max:.0%} self.repository.commit(compact=False) self.cache.commit() - def calc_stats(self, cache): - def add(id): - entry = cache.chunks[id] - archive_index.add(id, 1, entry.size, entry.csize) + def calc_stats(self, cache, want_unique=True): + have_borg12_meta = self.metadata.get('nfiles') is not None + + if have_borg12_meta and not want_unique: + unique_csize = 0 + else: + def add(id): + entry = cache.chunks[id] + archive_index.add(id, 1, entry.size, entry.csize) + + archive_index = ChunkIndex() + sync = CacheSynchronizer(archive_index) + add(self.id) + pi = ProgressIndicatorPercent(total=len(self.metadata.items), msg='Calculating statistics... %3d%%') + for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)): + pi.show(increase=1) + add(id) + data = self.key.decrypt(id, chunk) + sync.feed(data) + unique_csize = archive_index.stats_against(cache.chunks)[3] + pi.finish() - archive_index = ChunkIndex() - sync = CacheSynchronizer(archive_index) - add(self.id) - pi = ProgressIndicatorPercent(total=len(self.metadata.items), msg='Calculating statistics... %3d%%') - for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)): - pi.show(increase=1) - add(id) - data = self.key.decrypt(id, chunk) - sync.feed(data) - unique_csize = archive_index.stats_against(cache.chunks)[3] - pi.finish() stats = Statistics() - stats.nfiles = sync.num_files_totals if self.consider_part_files \ - else sync.num_files_totals - sync.num_files_parts - stats.osize = sync.size_totals if self.consider_part_files \ - else sync.size_totals - sync.size_parts - stats.csize = sync.csize_totals if self.consider_part_files \ - else sync.csize_totals - sync.csize_parts stats.usize = unique_csize # the part files use same chunks as the full file + if not have_borg12_meta: + if self.consider_part_files: + stats.nfiles = sync.num_files_totals + stats.osize = sync.size_totals + stats.csize = sync.csize_totals + else: + stats.nfiles = sync.num_files_totals - sync.num_files_parts + stats.osize = sync.size_totals - sync.size_parts + stats.csize = sync.csize_totals - sync.csize_parts + else: + if self.consider_part_files: + stats.nfiles = self.metadata.nfiles_parts + self.metadata.nfiles + stats.osize = self.metadata.size_parts + self.metadata.size + stats.csize = self.metadata.csize_parts + self.metadata.csize + else: + stats.nfiles = self.metadata.nfiles + stats.osize = self.metadata.size + stats.csize = self.metadata.csize return stats @contextmanager