info / create --stats: add --iec option

If --iec is passed, then sizes are expressed in powers of 1024
instead of 1000.
This commit is contained in:
Romain Vimont 2021-03-21 00:33:31 +01:00 committed by Thomas Waldmann
parent 57abf9a3ad
commit 9ddcfaf4f7
5 changed files with 71 additions and 31 deletions

View File

@ -54,8 +54,9 @@ has_link = hasattr(os, 'link')
class Statistics:
def __init__(self, output_json=False):
def __init__(self, output_json=False, iec=False):
self.output_json = output_json
self.iec = iec
self.osize = self.csize = self.usize = self.nfiles = 0
self.osize_parts = self.csize_parts = self.usize_parts = self.nfiles_parts = 0
self.last_progress = 0 # timestamp when last progress was shown
@ -75,7 +76,7 @@ class Statistics:
def __add__(self, other):
if not isinstance(other, Statistics):
raise TypeError('can only add Statistics objects')
stats = Statistics(self.output_json)
stats = Statistics(self.output_json, self.iec)
stats.osize = self.osize + other.osize
stats.csize = self.csize + other.csize
stats.usize = self.usize + other.usize
@ -97,23 +98,23 @@ class Statistics:
def as_dict(self):
return {
'original_size': FileSize(self.osize),
'compressed_size': FileSize(self.csize),
'deduplicated_size': FileSize(self.usize),
'original_size': FileSize(self.osize, iec=self.iec),
'compressed_size': FileSize(self.csize, iec=self.iec),
'deduplicated_size': FileSize(self.usize, iec=self.iec),
'nfiles': self.nfiles,
}
@property
def osize_fmt(self):
return format_file_size(self.osize)
return format_file_size(self.osize, iec=self.iec)
@property
def usize_fmt(self):
return format_file_size(self.usize)
return format_file_size(self.usize, iec=self.iec)
@property
def csize_fmt(self):
return format_file_size(self.csize)
return format_file_size(self.csize, iec=self.iec)
def show_progress(self, item=None, final=False, stream=None, dt=None):
now = time.monotonic()
@ -400,14 +401,15 @@ class Archive:
checkpoint_interval=1800, numeric_ids=False, noatime=False, noctime=False,
noflags=False, noacls=False, noxattrs=False,
progress=False, chunker_params=CHUNKER_PARAMS, start=None, start_monotonic=None, end=None,
consider_part_files=False, log_json=False):
consider_part_files=False, log_json=False, iec=False):
self.cwd = os.getcwd()
self.key = key
self.repository = repository
self.cache = cache
self.manifest = manifest
self.hard_links = {}
self.stats = Statistics(output_json=log_json)
self.stats = Statistics(output_json=log_json, iec=iec)
self.iec = iec
self.show_progress = progress
self.name = name # overwritten later with name from archive metadata
self.name_in_manifest = name # can differ from .name later (if borg check fixed duplicate archive names)
@ -644,7 +646,7 @@ Utilization of max. archive size: {csize_max:.0%}
unique_csize = archive_index.stats_against(cache.chunks)[3]
pi.finish()
stats = Statistics()
stats = Statistics(iec=self.iec)
stats.usize = unique_csize # the part files use same chunks as the full file
if not have_borg12_meta:
if self.consider_part_files:
@ -1220,7 +1222,7 @@ class FilesystemObjectProcessors:
def __init__(self, *, metadata_collector, cache, key,
add_item, process_file_chunks,
chunker_params, show_progress, sparse,
log_json):
log_json, iec):
self.metadata_collector = metadata_collector
self.cache = cache
self.key = key
@ -1229,7 +1231,7 @@ class FilesystemObjectProcessors:
self.show_progress = show_progress
self.hard_links = {}
self.stats = Statistics(output_json=log_json) # threading: done by cache (including progress)
self.stats = Statistics(output_json=log_json, iec=iec) # threading: done by cache (including progress)
self.cwd = os.getcwd()
self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)

View File

@ -175,7 +175,8 @@ def with_repository(fake=False, invert_fake=False, create=False, lock=True,
with Cache(repository, kwargs['key'], kwargs['manifest'],
progress=getattr(args, 'progress', False), lock_wait=self.lock_wait,
cache_mode=getattr(args, 'files_cache_mode', DEFAULT_FILES_CACHE_MODE),
consider_part_files=getattr(args, 'consider_part_files', False)) as cache_:
consider_part_files=getattr(args, 'consider_part_files', False),
iec=getattr(args, 'iec', False)) as cache_:
return method(self, args, repository=repository, cache=cache_, **kwargs)
else:
return method(self, args, repository=repository, **kwargs)
@ -192,7 +193,7 @@ def with_archive(method):
noacls=getattr(args, 'noacls', False),
noxattrs=getattr(args, 'noxattrs', False),
cache=kwargs.get('cache'),
consider_part_files=args.consider_part_files, log_json=args.log_json)
consider_part_files=args.consider_part_files, log_json=args.log_json, iec=args.iec)
return method(self, args, repository=repository, manifest=manifest, key=key, archive=archive, **kwargs)
return wrapper
@ -647,13 +648,13 @@ class Archiver:
if not dry_run:
with Cache(repository, key, manifest, progress=args.progress,
lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync,
cache_mode=args.files_cache_mode) as cache:
cache_mode=args.files_cache_mode, iec=args.iec) as cache:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
create=True, checkpoint_interval=args.checkpoint_interval,
numeric_ids=args.numeric_ids, noatime=not args.atime, noctime=args.noctime,
progress=args.progress,
chunker_params=args.chunker_params, start=t0, start_monotonic=t0_monotonic,
log_json=args.log_json)
log_json=args.log_json, iec=args.iec)
metadata_collector = MetadataCollector(noatime=not args.atime, noctime=args.noctime,
noflags=args.nobsdflags or args.noflags, noacls=args.noacls, noxattrs=args.noxattrs,
numeric_ids=args.numeric_ids, nobirthtime=args.nobirthtime)
@ -663,7 +664,7 @@ class Archiver:
fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key,
process_file_chunks=cp.process_file_chunks, add_item=archive.add_item,
chunker_params=args.chunker_params, show_progress=args.progress, sparse=args.sparse,
log_json=args.log_json)
log_json=args.log_json, iec=args.iec)
create_inner(archive, cache, fso)
else:
create_inner(None, None, None)
@ -1408,7 +1409,7 @@ class Archiver:
format = "{archive}{NL}"
else:
format = "{archive:<36} {time} [{id}]{NL}"
formatter = ArchiveFormatter(format, repository, manifest, key, json=args.json)
formatter = ArchiveFormatter(format, repository, manifest, key, json=args.json, iec=args.iec)
output_data = []
@ -1449,7 +1450,7 @@ class Archiver:
for i, archive_name in enumerate(archive_names, 1):
archive = Archive(repository, key, manifest, archive_name, cache=cache,
consider_part_files=args.consider_part_files)
consider_part_files=args.consider_part_files, iec=args.iec)
info = archive.info()
if args.json:
output_data.append(info)
@ -2730,6 +2731,8 @@ class Archiver:
'The logger path is borg.debug.<TOPIC> if TOPIC is not fully qualified.')
add_common_option('-p', '--progress', dest='progress', action='store_true',
help='show progress information')
add_common_option('--iec', action='store_true',
help='format using IEC units (1KiB = 1024B)')
add_common_option('--log-json', dest='log_json', action='store_true',
help='Output one JSON object per log line instead of formatted text.')
add_common_option('--lock-wait', metavar='SECONDS', dest='lock_wait', type=int, default=1,

View File

@ -367,15 +367,15 @@ class Cache:
def __new__(cls, repository, key, manifest, path=None, sync=True, warn_if_unencrypted=True,
progress=False, lock_wait=None, permit_adhoc_cache=False, cache_mode=DEFAULT_FILES_CACHE_MODE,
consider_part_files=False):
consider_part_files=False, iec=False):
def local():
return LocalCache(repository=repository, key=key, manifest=manifest, path=path, sync=sync,
warn_if_unencrypted=warn_if_unencrypted, progress=progress,
warn_if_unencrypted=warn_if_unencrypted, progress=progress, iec=iec,
lock_wait=lock_wait, cache_mode=cache_mode, consider_part_files=consider_part_files)
def adhoc():
return AdHocCache(repository=repository, key=key, manifest=manifest, lock_wait=lock_wait,
return AdHocCache(repository=repository, key=key, manifest=manifest, lock_wait=lock_wait, iec=iec,
consider_part_files=consider_part_files)
if not permit_adhoc_cache:
@ -405,6 +405,9 @@ All archives: {0.total_size:>20s} {0.total_csize:>20s} {0.unique_csize:>20s}
Unique chunks Total chunks
Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
def __init__(self, iec=False):
self.iec = iec
def __str__(self):
return self.str_format.format(self.format_tuple())
@ -435,7 +438,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
def format_tuple(self):
stats = self.stats()
for field in ['total_size', 'total_csize', 'unique_csize']:
stats[field] = format_file_size(stats[field])
stats[field] = format_file_size(stats[field], iec=self.iec)
return self.Summary(**stats)
def chunks_stored_size(self):
@ -448,13 +451,15 @@ class LocalCache(CacheStatsMixin):
"""
def __init__(self, repository, key, manifest, path=None, sync=True, warn_if_unencrypted=True,
progress=False, lock_wait=None, cache_mode=DEFAULT_FILES_CACHE_MODE, consider_part_files=False):
progress=False, lock_wait=None, cache_mode=DEFAULT_FILES_CACHE_MODE, consider_part_files=False,
iec=False):
"""
:param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
:param lock_wait: timeout for lock acquisition (int [s] or None [wait forever])
:param sync: do :meth:`.sync`
:param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
"""
CacheStatsMixin.__init__(self, iec=iec)
self.repository = repository
self.key = key
self.manifest = manifest
@ -1049,7 +1054,9 @@ All archives: unknown unknown unknown
Unique chunks Total chunks
Chunk index: {0.total_unique_chunks:20d} unknown"""
def __init__(self, repository, key, manifest, warn_if_unencrypted=True, lock_wait=None, consider_part_files=False):
def __init__(self, repository, key, manifest, warn_if_unencrypted=True, lock_wait=None, consider_part_files=False,
iec=False):
CacheStatsMixin.__init__(self, iec=iec)
self.repository = repository
self.key = key
self.manifest = manifest

View File

@ -222,15 +222,21 @@ def SortBySpec(text):
return text.replace('timestamp', 'ts')
def format_file_size(v, precision=2, sign=False):
def format_file_size(v, precision=2, sign=False, iec=False):
"""Format file size into a human friendly format
"""
return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign)
fn = sizeof_fmt_iec if iec else sizeof_fmt_decimal
return fn(v, suffix='B', sep=' ', precision=precision, sign=sign)
class FileSize(int):
def __new__(cls, value, iec=False):
obj = int.__new__(cls, value)
obj.iec = iec
return obj
def __format__(self, format_spec):
return format_file_size(int(self)).__format__(format_spec)
return format_file_size(int(self), iec=self.iec).__format__(format_spec)
def parse_file_size(s):
@ -593,7 +599,7 @@ class ArchiveFormatter(BaseFormatter):
assert not keys, str(keys)
return "\n".join(help)
def __init__(self, format, repository, manifest, key, *, json=False):
def __init__(self, format, repository, manifest, key, *, json=False, iec=False):
self.repository = repository
self.manifest = manifest
self.key = key
@ -601,6 +607,7 @@ class ArchiveFormatter(BaseFormatter):
self.id = None
self._archive = None
self.json = json
self.iec = iec
static_keys = {} # here could be stuff on repo level, above archive level
static_keys.update(self.FIXED_KEYS)
self.format = partial_format(format, static_keys)
@ -644,7 +651,7 @@ class ArchiveFormatter(BaseFormatter):
"""lazy load / update loaded archive"""
if self._archive is None or self._archive.id != self.id:
from ..archive import Archive
self._archive = Archive(self.repository, self.key, self.manifest, self.name)
self._archive = Archive(self.repository, self.key, self.manifest, self.name, iec=self.iec)
return self._archive
def get_meta(self, key, rs):

View File

@ -610,6 +610,27 @@ def test_file_size():
assert format_file_size(size) == fmt
def test_file_size_iec():
"""test the size formatting routines"""
iec_size_map = {
0: '0 B',
2**0: '1 B',
2**10: '1.00 KiB',
2**20: '1.00 MiB',
2**30: '1.00 GiB',
2**40: '1.00 TiB',
2**50: '1.00 PiB',
2**60: '1.00 EiB',
2**70: '1.00 ZiB',
2**80: '1.00 YiB',
-2**0: '-1 B',
-2**10: '-1.00 KiB',
-2**20: '-1.00 MiB',
}
for size, fmt in iec_size_map.items():
assert format_file_size(size, iec=True) == fmt
def test_file_size_precision():
assert format_file_size(1234, precision=1) == '1.2 kB' # rounded down
assert format_file_size(1254, precision=1) == '1.3 kB' # rounded up