Merge pull request #4391 from ThomasWaldmann/archive-stat-meta

include size/csize/nfiles[_parts] stats into archive, fixes #3241
This commit is contained in:
TW 2019-02-24 14:45:27 +01:00 committed by GitHub
commit dd2a3d42fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 82 additions and 39 deletions

View File

@ -54,13 +54,20 @@ class Statistics:
def __init__(self, output_json=False):
self.output_json = output_json
self.osize = self.csize = self.usize = self.nfiles = 0
self.osize_parts = self.csize_parts = self.usize_parts = self.nfiles_parts = 0
self.last_progress = 0 # timestamp when last progress was shown
def update(self, size, csize, unique):
def update(self, size, csize, unique, part=False):
if not part:
self.osize += size
self.csize += csize
if unique:
self.usize += csize
else:
self.osize_parts += size
self.csize_parts += csize
if unique:
self.usize_parts += csize
def __add__(self, other):
if not isinstance(other, Statistics):
@ -70,6 +77,10 @@ class Statistics:
stats.csize = self.csize + other.csize
stats.usize = self.usize + other.usize
stats.nfiles = self.nfiles + other.nfiles
stats.osize_parts = self.osize_parts + other.osize_parts
stats.csize_parts = self.csize_parts + other.csize_parts
stats.usize_parts = self.usize_parts + other.usize_parts
stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
return stats
summary = "{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"
@ -492,7 +503,7 @@ Utilization of max. archive size: {csize_max:.0%}
del self.manifest.archives[self.checkpoint_name]
self.cache.chunk_decref(self.id, self.stats)
def save(self, name=None, comment=None, timestamp=None, additional_metadata=None):
def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
name = name or self.name
if name in self.manifest.archives:
raise self.AlreadyExists(name)
@ -518,6 +529,14 @@ Utilization of max. archive size: {csize_max:.0%}
'time_end': end.strftime(ISO_FORMAT),
'chunker_params': self.chunker_params,
}
if stats is not None:
metadata.update({
'size': stats.osize,
'csize': stats.csize,
'nfiles': stats.nfiles,
'size_parts': stats.osize_parts,
'csize_parts': stats.csize_parts,
'nfiles_parts': stats.nfiles_parts})
metadata.update(additional_metadata or {})
metadata = ArchiveItem(metadata)
data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b'archive')
@ -530,7 +549,12 @@ Utilization of max. archive size: {csize_max:.0%}
self.repository.commit(compact=False)
self.cache.commit()
def calc_stats(self, cache):
def calc_stats(self, cache, want_unique=True):
have_borg12_meta = self.metadata.get('nfiles') is not None
if have_borg12_meta and not want_unique:
unique_csize = 0
else:
def add(id):
entry = cache.chunks[id]
archive_index.add(id, 1, entry.size, entry.csize)
@ -546,14 +570,27 @@ Utilization of max. archive size: {csize_max:.0%}
sync.feed(data)
unique_csize = archive_index.stats_against(cache.chunks)[3]
pi.finish()
stats = Statistics()
stats.nfiles = sync.num_files_totals if self.consider_part_files \
else sync.num_files_totals - sync.num_files_parts
stats.osize = sync.size_totals if self.consider_part_files \
else sync.size_totals - sync.size_parts
stats.csize = sync.csize_totals if self.consider_part_files \
else sync.csize_totals - sync.csize_parts
stats.usize = unique_csize # the part files use same chunks as the full file
if not have_borg12_meta:
if self.consider_part_files:
stats.nfiles = sync.num_files_totals
stats.osize = sync.size_totals
stats.csize = sync.csize_totals
else:
stats.nfiles = sync.num_files_totals - sync.num_files_parts
stats.osize = sync.size_totals - sync.size_parts
stats.csize = sync.csize_totals - sync.csize_parts
else:
if self.consider_part_files:
stats.nfiles = self.metadata.nfiles_parts + self.metadata.nfiles
stats.osize = self.metadata.size_parts + self.metadata.size
stats.csize = self.metadata.csize_parts + self.metadata.csize
else:
stats.nfiles = self.metadata.nfiles
stats.osize = self.metadata.size
stats.csize = self.metadata.csize
return stats
@contextmanager
@ -1057,9 +1094,9 @@ class ChunksProcessor:
# if we created part files, we have referenced all chunks from the part files,
# but we also will reference the same chunks also from the final, complete file:
dummy_stats = Statistics() # do not count this data volume twice
for chunk in item.chunks:
cache.chunk_incref(chunk.id, dummy_stats, size=chunk.size)
cache.chunk_incref(chunk.id, stats, size=chunk.size, part=True)
stats.nfiles_parts += part_number - 1
class FilesystemObjectProcessors:
@ -1882,7 +1919,7 @@ class ArchiveRecreater:
return
if comment is None:
comment = archive.metadata.get('comment', '')
target.save(comment=comment, additional_metadata={
target.save(comment=comment, stats=target.stats, additional_metadata={
# keep some metadata as in original archive:
'time': archive.metadata.time,
'time_end': archive.metadata.get('time_end') or archive.metadata.time,

View File

@ -502,11 +502,11 @@ class Archiver:
keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
if not dry_run:
archive.save(comment=args.comment, timestamp=args.timestamp)
if args.progress:
archive.stats.show_progress(final=True)
args.stats |= args.json
archive.stats += fso.stats
archive.save(comment=args.comment, timestamp=args.timestamp, stats=archive.stats)
args.stats |= args.json
if args.stats:
if args.json:
json_print(basic_json_data(manifest, cache=cache, extra={

View File

@ -903,11 +903,11 @@ class LocalCache(CacheStatsMixin):
id, stored_size, size))
return refcount
def chunk_incref(self, id, stats, size=None):
def chunk_incref(self, id, stats, size=None, part=False):
if not self.txn_active:
self.begin_txn()
count, _size, csize = self.chunks.incref(id)
stats.update(_size, csize, False)
stats.update(_size, csize, False, part=part)
return ChunkListEntry(id, _size, csize)
def chunk_decref(self, id, stats, wait=True):
@ -1047,7 +1047,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
self.chunks[id] = entry._replace(size=size)
return entry.refcount
def chunk_incref(self, id, stats, size=None):
def chunk_incref(self, id, stats, size=None, part=False):
if not self._txn_active:
self.begin_txn()
count, _size, csize = self.chunks.incref(id)
@ -1055,7 +1055,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
# size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to.
size = _size or size
assert size
stats.update(size, csize, False)
stats.update(size, csize, False, part=part)
return ChunkListEntry(id, size, csize)
def chunk_decref(self, id, stats, wait=True):

View File

@ -12,7 +12,7 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us
'comment', 'chunker_params',
'recreate_cmdline',
'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2
])
'size', 'csize', 'nfiles', 'size_parts', 'csize_parts', 'nfiles_parts', ])
# this is the set of keys that are always present in archives:
REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])

View File

@ -35,5 +35,5 @@ def check_extension_modules():
raise ExtensionModuleError
if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.2_03':
raise ExtensionModuleError
if item.API_VERSION != '1.1_05':
if item.API_VERSION != '1.1_06':
raise ExtensionModuleError

View File

@ -12,7 +12,7 @@ cdef extern from "_item.c":
object _optr_to_object(object bytes)
API_VERSION = '1.1_05'
API_VERSION = '1.1_06'
class PropDict:
@ -368,6 +368,12 @@ class ArchiveItem(PropDict):
recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str
recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples
size = PropDict._make_property('size', int)
csize = PropDict._make_property('csize', int)
nfiles = PropDict._make_property('nfiles', int)
size_parts = PropDict._make_property('size_parts', int)
csize_parts = PropDict._make_property('csize_parts', int)
nfiles_parts = PropDict._make_property('nfiles_parts', int)
class ManifestItem(PropDict):