From 49adb7715763b8e46c0fc4d0c418175248d2bea7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 10 Jun 2022 22:27:11 +0200 Subject: [PATCH] calc_stats: deduplicated size now, was deduplicated csize also: remove pre12_meta cache --- src/borg/archive.py | 33 +++++++++++++++++++++++---------- src/borg/cache.py | 12 ------------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 945dc5a8a..f9dd2e486 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -629,18 +629,31 @@ def save(self, name=None, comment=None, timestamp=None, stats=None, additional_m self.cache.commit() def calc_stats(self, cache, want_unique=True): - # caching wrapper around _calc_stats which is rather slow for archives made with borg < 1.2 - have_borg12_meta = self.metadata.get('nfiles') is not None - try: - stats = Statistics.from_raw_dict(**cache.pre12_meta[self.fpr]) - except KeyError: # not in pre12_meta cache - stats = self._calc_stats(cache, want_unique=want_unique) - if not have_borg12_meta: - cache.pre12_meta[self.fpr] = stats.as_raw_dict() - return stats + if not want_unique: + unique_size = 0 + else: + def add(id): + entry = cache.chunks[id] + archive_index.add(id, 1, entry.size) + + archive_index = ChunkIndex() + sync = CacheSynchronizer(archive_index) + add(self.id) + # we must escape any % char in the archive name, because we use it in a format string, see #6500 + arch_name_escd = self.name.replace('%', '%%') + pi = ProgressIndicatorPercent(total=len(self.metadata.items), + msg='Calculating statistics for archive %s ... %%3.0f%%%%' % arch_name_escd, + msgid='archive.calc_stats') + for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)): + pi.show(increase=1) + add(id) + data = self.key.decrypt(id, chunk) + sync.feed(data) + unique_size = archive_index.stats_against(cache.chunks)[1] + pi.finish() - def _calc_stats(self, cache, want_unique=True): stats = Statistics(iec=self.iec) + stats.usize = unique_size # the part files use same chunks as the full file stats.nfiles = self.metadata.nfiles stats.osize = self.metadata.size if self.consider_part_files: diff --git a/src/borg/cache.py b/src/borg/cache.py index 65cbe7712..784910673 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -413,7 +413,6 @@ class CacheStatsMixin: def __init__(self, iec=False): self.iec = iec - self.pre12_meta = {} # here we cache archive metadata for borg < 1.2 def __str__(self): return self.str_format.format(self.format_tuple()) @@ -511,8 +510,6 @@ def create(self): os.makedirs(os.path.join(self.path, 'chunks.archive.d')) with SaveFile(os.path.join(self.path, files_cache_name()), binary=True): pass # empty file - with SaveFile(os.path.join(self.path, 'pre12-meta'), binary=False) as fd: - json.dump(self.pre12_meta, fd, indent=4) def _do_open(self): self.cache_config.load() @@ -523,11 +520,6 @@ def _do_open(self): self.files = None else: self._read_files() - try: - with open(os.path.join(self.path, 'pre12-meta')) as fd: - self.pre12_meta = json.load(fd) - except (FileNotFoundError, json.JSONDecodeError): - pass def open(self): if not os.path.isdir(self.path): @@ -536,9 +528,6 @@ def open(self): self.rollback() def close(self): - # save the pre12_meta cache in any case - with open(os.path.join(self.path, 'pre12-meta'), 'w') as fd: - json.dump(self.pre12_meta, fd, indent=4) if self.cache_config is not None: self.cache_config.close() self.cache_config = None @@ -1037,7 +1026,6 @@ def __init__(self, repository, key, manifest, warn_if_unencrypted=True, lock_wai self.security_manager = SecurityManager(repository) self.security_manager.assert_secure(manifest, key, lock_wait=lock_wait) - self.pre12_meta = {} logger.warning('Note: --no-cache-sync is an experimental feature.') # Public API