mirror of https://github.com/borgbackup/borg.git
compute the deduplicated size before compression
so we do not need csize for it.
This commit is contained in:
parent
1fd571a4d0
commit
19dfbe5c5c
|
@ -58,38 +58,45 @@ class Statistics:
|
|||
def __init__(self, output_json=False, iec=False):
|
||||
self.output_json = output_json
|
||||
self.iec = iec
|
||||
self.osize = self.nfiles = 0
|
||||
self.osize_parts = self.nfiles_parts = 0
|
||||
self.osize = self.usize = self.nfiles = 0
|
||||
self.osize_parts = self.usize_parts = self.nfiles_parts = 0
|
||||
self.last_progress = 0 # timestamp when last progress was shown
|
||||
|
||||
def update(self, size, part=False):
|
||||
def update(self, size, unique, part=False):
|
||||
if not part:
|
||||
self.osize += size
|
||||
if unique:
|
||||
self.usize += size
|
||||
else:
|
||||
self.osize_parts += size
|
||||
if unique:
|
||||
self.usize_parts += size
|
||||
|
||||
def __add__(self, other):
|
||||
if not isinstance(other, Statistics):
|
||||
raise TypeError('can only add Statistics objects')
|
||||
stats = Statistics(self.output_json, self.iec)
|
||||
stats.osize = self.osize + other.osize
|
||||
stats.usize = self.usize + other.usize
|
||||
stats.nfiles = self.nfiles + other.nfiles
|
||||
stats.osize_parts = self.osize_parts + other.osize_parts
|
||||
stats.usize_parts = self.usize_parts + other.usize_parts
|
||||
stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
|
||||
return stats
|
||||
|
||||
summary = "{label:15} {stats.osize_fmt:>20s}"
|
||||
summary = "{label:15} {stats.osize_fmt:>20s} {stats.usize_fmt:>20s}"
|
||||
|
||||
def __str__(self):
|
||||
return self.summary.format(stats=self, label='This archive:')
|
||||
|
||||
def __repr__(self):
|
||||
return "<{cls} object at {hash:#x} ({self.osize})>".format(
|
||||
return "<{cls} object at {hash:#x} ({self.osize}, {self.usize})>".format(
|
||||
cls=type(self).__name__, hash=id(self), self=self)
|
||||
|
||||
def as_dict(self):
|
||||
return {
|
||||
'original_size': FileSize(self.osize, iec=self.iec),
|
||||
'deduplicated_size': FileSize(self.usize, iec=self.iec),
|
||||
'nfiles': self.nfiles,
|
||||
}
|
||||
|
||||
|
@ -114,6 +121,10 @@ class Statistics:
|
|||
def osize_fmt(self):
|
||||
return format_file_size(self.osize, iec=self.iec)
|
||||
|
||||
@property
|
||||
def usize_fmt(self):
|
||||
return format_file_size(self.usize, iec=self.iec)
|
||||
|
||||
def show_progress(self, item=None, final=False, stream=None, dt=None):
|
||||
now = time.monotonic()
|
||||
if dt is None or now - self.last_progress > dt:
|
||||
|
@ -134,7 +145,7 @@ class Statistics:
|
|||
else:
|
||||
columns, lines = get_terminal_size()
|
||||
if not final:
|
||||
msg = '{0.osize_fmt} O {0.nfiles} N '.format(self)
|
||||
msg = '{0.osize_fmt} O {0.usize_fmt} U {0.nfiles} N '.format(self)
|
||||
path = remove_surrogates(item.path) if item else ''
|
||||
space = columns - swidth(msg)
|
||||
if space < 12:
|
||||
|
|
|
@ -99,7 +99,7 @@ except BaseException:
|
|||
assert EXIT_ERROR == 2, "EXIT_ERROR is not 2, as expected - fix assert AND exception handler right above this line."
|
||||
|
||||
|
||||
STATS_HEADER = " Original size"
|
||||
STATS_HEADER = " Original size Deduplicated size"
|
||||
|
||||
PURE_PYTHON_MSGPACK_WARNING = "Using a pure-python msgpack! This will result in lower performance."
|
||||
|
||||
|
@ -1797,8 +1797,8 @@ class Archiver:
|
|||
Command line: {command_line}
|
||||
Utilization of maximum supported archive size: {limits[max_archive_size]:.0%}
|
||||
------------------------------------------------------------------------------
|
||||
Original size
|
||||
This archive: {stats[original_size]:>20s}
|
||||
Original size Deduplicated size
|
||||
This archive: {stats[original_size]:>20s} {stats[deduplicated_size]:>20s}
|
||||
{cache}
|
||||
""").strip().format(cache=cache, **info))
|
||||
if self.exit_code:
|
||||
|
|
|
@ -406,7 +406,7 @@ class Cache:
|
|||
|
||||
class CacheStatsMixin:
|
||||
str_format = """\
|
||||
All archives: {0.total_size:>20s}
|
||||
All archives: {0.total_size:>20s} {0.unique_size:>20s}
|
||||
|
||||
Unique chunks Total chunks
|
||||
Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
||||
|
@ -440,7 +440,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
|
||||
def format_tuple(self):
|
||||
stats = self.stats()
|
||||
for field in ['total_size', ]:
|
||||
for field in ['total_size', 'unique_size']:
|
||||
stats[field] = format_file_size(stats[field], iec=self.iec)
|
||||
return self.Summary(**stats)
|
||||
|
||||
|
@ -905,7 +905,7 @@ class LocalCache(CacheStatsMixin):
|
|||
data = self.key.encrypt(id, chunk, compress=compress)
|
||||
self.repository.put(id, data, wait=wait)
|
||||
self.chunks.add(id, 1, size)
|
||||
stats.update(size)
|
||||
stats.update(size, not refcount)
|
||||
return ChunkListEntry(id, size)
|
||||
|
||||
def seen_chunk(self, id, size=None):
|
||||
|
@ -921,7 +921,7 @@ class LocalCache(CacheStatsMixin):
|
|||
if not self.txn_active:
|
||||
self.begin_txn()
|
||||
count, _size = self.chunks.incref(id)
|
||||
stats.update(_size, part=part)
|
||||
stats.update(_size, False, part=part)
|
||||
return ChunkListEntry(id, _size)
|
||||
|
||||
def chunk_decref(self, id, stats, wait=True, part=False):
|
||||
|
@ -931,9 +931,9 @@ class LocalCache(CacheStatsMixin):
|
|||
if count == 0:
|
||||
del self.chunks[id]
|
||||
self.repository.delete(id, wait=wait)
|
||||
stats.update(-size, part=part)
|
||||
stats.update(-size, True, part=part)
|
||||
else:
|
||||
stats.update(-size, part=part)
|
||||
stats.update(-size, False, part=part)
|
||||
|
||||
def file_known_and_unchanged(self, hashed_path, path_hash, st):
|
||||
"""
|
||||
|
@ -1072,7 +1072,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
|
|||
data = self.key.encrypt(id, chunk, compress=compress)
|
||||
self.repository.put(id, data, wait=wait)
|
||||
self.chunks.add(id, 1, size)
|
||||
stats.update(size)
|
||||
stats.update(size, not refcount)
|
||||
return ChunkListEntry(id, size)
|
||||
|
||||
def seen_chunk(self, id, size=None):
|
||||
|
@ -1094,7 +1094,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
|
|||
# size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to.
|
||||
size = _size or size
|
||||
assert size
|
||||
stats.update(size, part=part)
|
||||
stats.update(size, False, part=part)
|
||||
return ChunkListEntry(id, size)
|
||||
|
||||
def chunk_decref(self, id, stats, wait=True, part=False):
|
||||
|
@ -1104,9 +1104,9 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
|
|||
if count == 0:
|
||||
del self.chunks[id]
|
||||
self.repository.delete(id, wait=wait)
|
||||
stats.update(-size, part=part)
|
||||
stats.update(-size, True, part=part)
|
||||
else:
|
||||
stats.update(-size, part=part)
|
||||
stats.update(-size, False, part=part)
|
||||
|
||||
def commit(self):
|
||||
if not self._txn_active:
|
||||
|
|
|
@ -19,44 +19,46 @@ from ..platform import uid2user, gid2group
|
|||
@pytest.fixture()
|
||||
def stats():
|
||||
stats = Statistics()
|
||||
stats.update(20)
|
||||
stats.update(20, unique=True)
|
||||
return stats
|
||||
|
||||
|
||||
def test_stats_basic(stats):
|
||||
assert stats.osize == 20
|
||||
stats.update(20)
|
||||
assert stats.usize == 20
|
||||
stats.update(20, unique=False)
|
||||
assert stats.osize == 40
|
||||
assert stats.usize == 20
|
||||
|
||||
|
||||
def tests_stats_progress(stats, monkeypatch, columns=80):
|
||||
monkeypatch.setenv('COLUMNS', str(columns))
|
||||
out = StringIO()
|
||||
stats.show_progress(stream=out)
|
||||
s = '20 B O 0 N '
|
||||
s = '20 B O 20 B U 0 N '
|
||||
buf = ' ' * (columns - len(s))
|
||||
assert out.getvalue() == s + buf + "\r"
|
||||
|
||||
out = StringIO()
|
||||
stats.update(10 ** 3)
|
||||
stats.update(10 ** 3, unique=False)
|
||||
stats.show_progress(item=Item(path='foo'), final=False, stream=out)
|
||||
s = '1.02 kB O 0 N foo'
|
||||
s = '1.02 kB O 20 B U 0 N foo'
|
||||
buf = ' ' * (columns - len(s))
|
||||
assert out.getvalue() == s + buf + "\r"
|
||||
out = StringIO()
|
||||
stats.show_progress(item=Item(path='foo'*40), final=False, stream=out)
|
||||
s = '1.02 kB O 0 N foofoofoofoofoofoofoofoofoofoo...foofoofoofoofoofoofoofoofoofoofoo'
|
||||
s = '1.02 kB O 20 B U 0 N foofoofoofoofoofoofoofoofo...foofoofoofoofoofoofoofoofoofoo'
|
||||
buf = ' ' * (columns - len(s))
|
||||
assert out.getvalue() == s + buf + "\r"
|
||||
|
||||
|
||||
def test_stats_format(stats):
|
||||
assert str(stats) == """\
|
||||
This archive: 20 B"""
|
||||
This archive: 20 B 20 B"""
|
||||
s = f"{stats.osize_fmt}"
|
||||
assert s == "20 B"
|
||||
# kind of redundant, but id is variable so we can't match reliably
|
||||
assert repr(stats) == f'<Statistics object at {id(stats):#x} (20)>'
|
||||
assert repr(stats) == f'<Statistics object at {id(stats):#x} (20, 20)>'
|
||||
|
||||
|
||||
def test_stats_progress_json(stats):
|
||||
|
|
Loading…
Reference in New Issue