Merge pull request #4391 from ThomasWaldmann/archive-stat-meta

include size/csize/nfiles[_parts] stats into archive, fixes #3241
This commit is contained in:
TW 2019-02-24 14:45:27 +01:00 committed by GitHub
commit dd2a3d42fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 82 additions and 39 deletions

View File

@ -54,13 +54,20 @@ class Statistics:
def __init__(self, output_json=False): def __init__(self, output_json=False):
self.output_json = output_json self.output_json = output_json
self.osize = self.csize = self.usize = self.nfiles = 0 self.osize = self.csize = self.usize = self.nfiles = 0
self.osize_parts = self.csize_parts = self.usize_parts = self.nfiles_parts = 0
self.last_progress = 0 # timestamp when last progress was shown self.last_progress = 0 # timestamp when last progress was shown
def update(self, size, csize, unique): def update(self, size, csize, unique, part=False):
self.osize += size if not part:
self.csize += csize self.osize += size
if unique: self.csize += csize
self.usize += csize if unique:
self.usize += csize
else:
self.osize_parts += size
self.csize_parts += csize
if unique:
self.usize_parts += csize
def __add__(self, other): def __add__(self, other):
if not isinstance(other, Statistics): if not isinstance(other, Statistics):
@ -70,6 +77,10 @@ class Statistics:
stats.csize = self.csize + other.csize stats.csize = self.csize + other.csize
stats.usize = self.usize + other.usize stats.usize = self.usize + other.usize
stats.nfiles = self.nfiles + other.nfiles stats.nfiles = self.nfiles + other.nfiles
stats.osize_parts = self.osize_parts + other.osize_parts
stats.csize_parts = self.csize_parts + other.csize_parts
stats.usize_parts = self.usize_parts + other.usize_parts
stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
return stats return stats
summary = "{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}" summary = "{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"
@ -492,7 +503,7 @@ Utilization of max. archive size: {csize_max:.0%}
del self.manifest.archives[self.checkpoint_name] del self.manifest.archives[self.checkpoint_name]
self.cache.chunk_decref(self.id, self.stats) self.cache.chunk_decref(self.id, self.stats)
def save(self, name=None, comment=None, timestamp=None, additional_metadata=None): def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
name = name or self.name name = name or self.name
if name in self.manifest.archives: if name in self.manifest.archives:
raise self.AlreadyExists(name) raise self.AlreadyExists(name)
@ -518,6 +529,14 @@ Utilization of max. archive size: {csize_max:.0%}
'time_end': end.strftime(ISO_FORMAT), 'time_end': end.strftime(ISO_FORMAT),
'chunker_params': self.chunker_params, 'chunker_params': self.chunker_params,
} }
if stats is not None:
metadata.update({
'size': stats.osize,
'csize': stats.csize,
'nfiles': stats.nfiles,
'size_parts': stats.osize_parts,
'csize_parts': stats.csize_parts,
'nfiles_parts': stats.nfiles_parts})
metadata.update(additional_metadata or {}) metadata.update(additional_metadata or {})
metadata = ArchiveItem(metadata) metadata = ArchiveItem(metadata)
data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b'archive') data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b'archive')
@ -530,30 +549,48 @@ Utilization of max. archive size: {csize_max:.0%}
self.repository.commit(compact=False) self.repository.commit(compact=False)
self.cache.commit() self.cache.commit()
def calc_stats(self, cache): def calc_stats(self, cache, want_unique=True):
def add(id): have_borg12_meta = self.metadata.get('nfiles') is not None
entry = cache.chunks[id]
archive_index.add(id, 1, entry.size, entry.csize) if have_borg12_meta and not want_unique:
unique_csize = 0
else:
def add(id):
entry = cache.chunks[id]
archive_index.add(id, 1, entry.size, entry.csize)
archive_index = ChunkIndex()
sync = CacheSynchronizer(archive_index)
add(self.id)
pi = ProgressIndicatorPercent(total=len(self.metadata.items), msg='Calculating statistics... %3d%%')
for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
pi.show(increase=1)
add(id)
data = self.key.decrypt(id, chunk)
sync.feed(data)
unique_csize = archive_index.stats_against(cache.chunks)[3]
pi.finish()
archive_index = ChunkIndex()
sync = CacheSynchronizer(archive_index)
add(self.id)
pi = ProgressIndicatorPercent(total=len(self.metadata.items), msg='Calculating statistics... %3d%%')
for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
pi.show(increase=1)
add(id)
data = self.key.decrypt(id, chunk)
sync.feed(data)
unique_csize = archive_index.stats_against(cache.chunks)[3]
pi.finish()
stats = Statistics() stats = Statistics()
stats.nfiles = sync.num_files_totals if self.consider_part_files \
else sync.num_files_totals - sync.num_files_parts
stats.osize = sync.size_totals if self.consider_part_files \
else sync.size_totals - sync.size_parts
stats.csize = sync.csize_totals if self.consider_part_files \
else sync.csize_totals - sync.csize_parts
stats.usize = unique_csize # the part files use same chunks as the full file stats.usize = unique_csize # the part files use same chunks as the full file
if not have_borg12_meta:
if self.consider_part_files:
stats.nfiles = sync.num_files_totals
stats.osize = sync.size_totals
stats.csize = sync.csize_totals
else:
stats.nfiles = sync.num_files_totals - sync.num_files_parts
stats.osize = sync.size_totals - sync.size_parts
stats.csize = sync.csize_totals - sync.csize_parts
else:
if self.consider_part_files:
stats.nfiles = self.metadata.nfiles_parts + self.metadata.nfiles
stats.osize = self.metadata.size_parts + self.metadata.size
stats.csize = self.metadata.csize_parts + self.metadata.csize
else:
stats.nfiles = self.metadata.nfiles
stats.osize = self.metadata.size
stats.csize = self.metadata.csize
return stats return stats
@contextmanager @contextmanager
@ -1057,9 +1094,9 @@ class ChunksProcessor:
# if we created part files, we have referenced all chunks from the part files, # if we created part files, we have referenced all chunks from the part files,
# but we also will reference the same chunks also from the final, complete file: # but we also will reference the same chunks also from the final, complete file:
dummy_stats = Statistics() # do not count this data volume twice
for chunk in item.chunks: for chunk in item.chunks:
cache.chunk_incref(chunk.id, dummy_stats, size=chunk.size) cache.chunk_incref(chunk.id, stats, size=chunk.size, part=True)
stats.nfiles_parts += part_number - 1
class FilesystemObjectProcessors: class FilesystemObjectProcessors:
@ -1882,7 +1919,7 @@ class ArchiveRecreater:
return return
if comment is None: if comment is None:
comment = archive.metadata.get('comment', '') comment = archive.metadata.get('comment', '')
target.save(comment=comment, additional_metadata={ target.save(comment=comment, stats=target.stats, additional_metadata={
# keep some metadata as in original archive: # keep some metadata as in original archive:
'time': archive.metadata.time, 'time': archive.metadata.time,
'time_end': archive.metadata.get('time_end') or archive.metadata.time, 'time_end': archive.metadata.get('time_end') or archive.metadata.time,

View File

@ -502,11 +502,11 @@ class Archiver:
keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
if not dry_run: if not dry_run:
archive.save(comment=args.comment, timestamp=args.timestamp)
if args.progress: if args.progress:
archive.stats.show_progress(final=True) archive.stats.show_progress(final=True)
args.stats |= args.json
archive.stats += fso.stats archive.stats += fso.stats
archive.save(comment=args.comment, timestamp=args.timestamp, stats=archive.stats)
args.stats |= args.json
if args.stats: if args.stats:
if args.json: if args.json:
json_print(basic_json_data(manifest, cache=cache, extra={ json_print(basic_json_data(manifest, cache=cache, extra={

View File

@ -903,11 +903,11 @@ class LocalCache(CacheStatsMixin):
id, stored_size, size)) id, stored_size, size))
return refcount return refcount
def chunk_incref(self, id, stats, size=None): def chunk_incref(self, id, stats, size=None, part=False):
if not self.txn_active: if not self.txn_active:
self.begin_txn() self.begin_txn()
count, _size, csize = self.chunks.incref(id) count, _size, csize = self.chunks.incref(id)
stats.update(_size, csize, False) stats.update(_size, csize, False, part=part)
return ChunkListEntry(id, _size, csize) return ChunkListEntry(id, _size, csize)
def chunk_decref(self, id, stats, wait=True): def chunk_decref(self, id, stats, wait=True):
@ -1047,7 +1047,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
self.chunks[id] = entry._replace(size=size) self.chunks[id] = entry._replace(size=size)
return entry.refcount return entry.refcount
def chunk_incref(self, id, stats, size=None): def chunk_incref(self, id, stats, size=None, part=False):
if not self._txn_active: if not self._txn_active:
self.begin_txn() self.begin_txn()
count, _size, csize = self.chunks.incref(id) count, _size, csize = self.chunks.incref(id)
@ -1055,7 +1055,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
# size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to. # size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to.
size = _size or size size = _size or size
assert size assert size
stats.update(size, csize, False) stats.update(size, csize, False, part=part)
return ChunkListEntry(id, size, csize) return ChunkListEntry(id, size, csize)
def chunk_decref(self, id, stats, wait=True): def chunk_decref(self, id, stats, wait=True):

View File

@ -12,7 +12,7 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us
'comment', 'chunker_params', 'comment', 'chunker_params',
'recreate_cmdline', 'recreate_cmdline',
'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2
]) 'size', 'csize', 'nfiles', 'size_parts', 'csize_parts', 'nfiles_parts', ])
# this is the set of keys that are always present in archives: # this is the set of keys that are always present in archives:
REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ]) REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])

View File

@ -35,5 +35,5 @@ def check_extension_modules():
raise ExtensionModuleError raise ExtensionModuleError
if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.2_03': if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.2_03':
raise ExtensionModuleError raise ExtensionModuleError
if item.API_VERSION != '1.1_05': if item.API_VERSION != '1.1_06':
raise ExtensionModuleError raise ExtensionModuleError

View File

@ -12,7 +12,7 @@ cdef extern from "_item.c":
object _optr_to_object(object bytes) object _optr_to_object(object bytes)
API_VERSION = '1.1_05' API_VERSION = '1.1_06'
class PropDict: class PropDict:
@ -368,6 +368,12 @@ class ArchiveItem(PropDict):
recreate_source_id = PropDict._make_property('recreate_source_id', bytes) recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str
recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples
size = PropDict._make_property('size', int)
csize = PropDict._make_property('csize', int)
nfiles = PropDict._make_property('nfiles', int)
size_parts = PropDict._make_property('size_parts', int)
csize_parts = PropDict._make_property('csize_parts', int)
nfiles_parts = PropDict._make_property('nfiles_parts', int)
class ManifestItem(PropDict): class ManifestItem(PropDict):