diff --git a/src/borg/archive.py b/src/borg/archive.py index 4e70c8c41..1df1401de 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -519,13 +519,20 @@ Utilization of max. archive size: {csize_max:.0%} has_damaged_chunks = 'chunks_healthy' in item if dry_run or stdout: if 'chunks' in item: + item_chunks_size = 0 for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True): if pi: pi.show(increase=len(data), info=[remove_surrogates(item.path)]) if stdout: sys.stdout.buffer.write(data) + item_chunks_size += len(data) if stdout: sys.stdout.buffer.flush() + if 'size' in item: + item_size = item.size + if item_size != item_chunks_size: + logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format( + item.path, item_size, item_chunks_size)) if has_damaged_chunks: logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' % remove_surrogates(item.path)) @@ -582,10 +589,15 @@ Utilization of max. archive size: {csize_max:.0%} else: fd.write(data) with backup_io('truncate'): - pos = fd.tell() + pos = item_chunks_size = fd.tell() fd.truncate(pos) fd.flush() self.restore_attrs(path, item, fd=fd.fileno()) + if 'size' in item: + item_size = item.size + if item_size != item_chunks_size: + logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format( + item.path, item_size, item_chunks_size)) if has_damaged_chunks: logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' % remove_surrogates(item.path)) @@ -829,6 +841,7 @@ Utilization of max. archive size: {csize_max:.0%} length = len(item.chunks) # the item should only have the *additional* chunks we processed after the last partial item: item.chunks = item.chunks[from_chunk:] + item.get_size(memorize=True) item.path += '.borg_part_%d' % number item.part = number number += 1 @@ -877,6 +890,7 @@ Utilization of max. archive size: {csize_max:.0%} ) fd = sys.stdin.buffer # binary self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd))) + item.get_size(memorize=True) self.stats.nfiles += 1 self.add_item(item) return 'i' # stdin @@ -937,6 +951,7 @@ Utilization of max. archive size: {csize_max:.0%} cache.memorize_file(path_hash, st, [c.id for c in item.chunks]) status = status or 'M' # regular file, modified (if not 'A' already) item.update(self.stat_attrs(st, path)) + item.get_size(memorize=True) if is_special_file: # we processed a special file like a regular file. reflect that in mode, # so it can be extracted / accessed in FUSE mount like a regular file: @@ -1355,6 +1370,13 @@ class ArchiveChecker: logger.info('{}: Completely healed previously damaged file!'.format(item.path)) del item.chunks_healthy item.chunks = chunk_list + if 'size' in item: + item_size = item.size + item_chunks_size = item.get_size(compressed=False, from_chunks=True) + if item_size != item_chunks_size: + # just warn, but keep the inconsistency, so that borg extract can warn about it. + logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format( + item.path, item_size, item_chunks_size)) def robust_iterator(archive): """Iterates through all archive items diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 8a20ef21c..243e91d2d 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -557,7 +557,7 @@ class Archiver: if progress: pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1) pi.output('Calculating size') - extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter)) + extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter)) pi.total = extracted_size else: pi = None @@ -616,10 +616,13 @@ class Archiver: def sum_chunk_size(item, consider_ids=None): if item.get('deleted'): - return None + size = None else: - return sum(c.size for c in item.chunks - if consider_ids is None or c.id in consider_ids) + if consider_ids is not None: # consider only specific chunks + size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids) + else: # consider all chunks + size = item.get_size() + return size def get_owner(item): if args.numeric_owner: diff --git a/src/borg/cache.py b/src/borg/cache.py index a17a5343e..504001e7a 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -20,13 +20,12 @@ from .helpers import format_file_size from .helpers import yes from .helpers import remove_surrogates from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage -from .item import Item, ArchiveItem +from .item import Item, ArchiveItem, ChunkListEntry from .key import PlaintextKey from .locking import Lock from .platform import SaveFile from .remote import cache_if_remote -ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize') FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids') diff --git a/src/borg/constants.py b/src/borg/constants.py index 610486d08..f7cb11c92 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -1,6 +1,6 @@ # this set must be kept complete, otherwise the RobustUnpacker might malfunction: ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master', - 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', + 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'size', 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', 'part']) diff --git a/src/borg/fuse.py b/src/borg/fuse.py index dbf34e1a6..33c6b3897 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -72,7 +72,6 @@ class FuseOperations(llfuse.Operations): self.contents = defaultdict(dict) self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid()) self.pending_archives = {} - self.accounted_chunks = {} self.cache = ItemCache() data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1)) logger.debug('mount data cache capacity: %d chunks', data_cache_capacity) @@ -257,14 +256,6 @@ class FuseOperations(llfuse.Operations): def getattr(self, inode, ctx=None): item = self.get_item(inode) - size = 0 - dsize = 0 - if 'chunks' in item: - for key, chunksize, _ in item.chunks: - size += chunksize - if self.accounted_chunks.get(key, inode) == inode: - self.accounted_chunks[key] = inode - dsize += chunksize entry = llfuse.EntryAttributes() entry.st_ino = inode entry.generation = 0 @@ -275,9 +266,9 @@ class FuseOperations(llfuse.Operations): entry.st_uid = item.uid entry.st_gid = item.gid entry.st_rdev = item.get('rdev', 0) - entry.st_size = size + entry.st_size = item.get_size() entry.st_blksize = 512 - entry.st_blocks = dsize / 512 + entry.st_blocks = (entry.st_size + entry.st_blksize - 1) // entry.st_blksize # note: older archives only have mtime (not atime nor ctime) mtime_ns = item.mtime if have_fuse_xtime_ns: diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 0e6ecbb47..8f1e9cbca 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -105,7 +105,7 @@ def check_extension_modules(): raise ExtensionModuleError if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01': raise ExtensionModuleError - if item.API_VERSION != '1.1_01': + if item.API_VERSION != '1.1_02': raise ExtensionModuleError @@ -1759,10 +1759,12 @@ class ItemFormatter(BaseFormatter): return len(item.get('chunks', [])) def calculate_size(self, item): - return sum(c.size for c in item.get('chunks', [])) + # note: does not support hardlink slaves, they will be size 0 + return item.get_size(compressed=False) def calculate_csize(self, item): - return sum(c.csize for c in item.get('chunks', [])) + # note: does not support hardlink slaves, they will be csize 0 + return item.get_size(compressed=True) def hash_item(self, hash_function, item): if 'chunks' not in item: diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 4ac960a63..627ffd1fb 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -1,8 +1,10 @@ +from collections import namedtuple + from .constants import ITEM_KEYS from .helpers import safe_encode, safe_decode from .helpers import StableDict -API_VERSION = '1.1_01' +API_VERSION = '1.1_02' class PropDict: @@ -113,6 +115,8 @@ class PropDict: return property(_get, _set, _del, doc=doc) +ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize') + class Item(PropDict): """ Item abstraction that deals with validation and the low-level details internally: @@ -156,6 +160,10 @@ class Item(PropDict): ctime = PropDict._make_property('ctime', int) mtime = PropDict._make_property('mtime', int) + # size is only present for items with a chunk list and then it is sum(chunk_sizes) + # compatibility note: this is a new feature, in old archives size will be missing. + size = PropDict._make_property('size', int) + hardlink_master = PropDict._make_property('hardlink_master', bool) chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None') @@ -168,13 +176,48 @@ class Item(PropDict): part = PropDict._make_property('part', int) - def file_size(self, hardlink_masters=None): - hardlink_masters = hardlink_masters or {} - chunks, _ = hardlink_masters.get(self.get('source'), (None, None)) - chunks = self.get('chunks', chunks) - if chunks is None: - return 0 - return sum(chunk.size for chunk in chunks) + def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False): + """ + Determine the (uncompressed or compressed) size of this item. + + For hardlink slaves, the size is computed via the hardlink master's + chunk list, if available (otherwise size will be returned as 0). + + If memorize is True, the computed size value will be stored into the item. + """ + attr = 'csize' if compressed else 'size' + try: + if from_chunks: + raise AttributeError + size = getattr(self, attr) + except AttributeError: + # no precomputed (c)size value available, compute it: + try: + chunks = getattr(self, 'chunks') + having_chunks = True + except AttributeError: + having_chunks = False + # this item has no (own) chunks list, but if this is a hardlink slave + # and we know the master, we can still compute the size. + if hardlink_masters is None: + chunks = None + else: + try: + master = getattr(self, 'source') + except AttributeError: + # not a hardlink slave, likely a directory or special file w/o chunks + chunks = None + else: + # hardlink slave, try to fetch hardlink master's chunks list + # todo: put precomputed size into hardlink_masters' values and use it, if present + chunks, _ = hardlink_masters.get(master, (None, None)) + if chunks is None: + return 0 + size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks) + # if requested, memorize the precomputed (c)size for items that have an own chunks list: + if memorize and having_chunks: + setattr(self, attr, size) + return size class EncryptedKey(PropDict): diff --git a/src/borg/testsuite/item.py b/src/borg/testsuite/item.py index 35934f3ba..9c66b6a67 100644 --- a/src/borg/testsuite/item.py +++ b/src/borg/testsuite/item.py @@ -142,9 +142,9 @@ def test_item_file_size(): ChunkListEntry(csize=1, size=1000, id=None), ChunkListEntry(csize=1, size=2000, id=None), ]) - assert item.file_size() == 3000 + assert item.get_size() == 3000 def test_item_file_size_no_chunks(): item = Item() - assert item.file_size() == 0 + assert item.get_size() == 0