mirror of https://github.com/borgbackup/borg.git
Merge pull request #2157 from ThomasWaldmann/add-filesize
archived file items: add size metadata
This commit is contained in:
commit
7c9c4b61d7
|
@ -519,13 +519,20 @@ Utilization of max. archive size: {csize_max:.0%}
|
|||
has_damaged_chunks = 'chunks_healthy' in item
|
||||
if dry_run or stdout:
|
||||
if 'chunks' in item:
|
||||
item_chunks_size = 0
|
||||
for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
|
||||
if pi:
|
||||
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
|
||||
if stdout:
|
||||
sys.stdout.buffer.write(data)
|
||||
item_chunks_size += len(data)
|
||||
if stdout:
|
||||
sys.stdout.buffer.flush()
|
||||
if 'size' in item:
|
||||
item_size = item.size
|
||||
if item_size != item_chunks_size:
|
||||
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
|
||||
item.path, item_size, item_chunks_size))
|
||||
if has_damaged_chunks:
|
||||
logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
|
||||
remove_surrogates(item.path))
|
||||
|
@ -582,10 +589,15 @@ Utilization of max. archive size: {csize_max:.0%}
|
|||
else:
|
||||
fd.write(data)
|
||||
with backup_io('truncate'):
|
||||
pos = fd.tell()
|
||||
pos = item_chunks_size = fd.tell()
|
||||
fd.truncate(pos)
|
||||
fd.flush()
|
||||
self.restore_attrs(path, item, fd=fd.fileno())
|
||||
if 'size' in item:
|
||||
item_size = item.size
|
||||
if item_size != item_chunks_size:
|
||||
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
|
||||
item.path, item_size, item_chunks_size))
|
||||
if has_damaged_chunks:
|
||||
logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
|
||||
remove_surrogates(item.path))
|
||||
|
@ -829,6 +841,7 @@ Utilization of max. archive size: {csize_max:.0%}
|
|||
length = len(item.chunks)
|
||||
# the item should only have the *additional* chunks we processed after the last partial item:
|
||||
item.chunks = item.chunks[from_chunk:]
|
||||
item.get_size(memorize=True)
|
||||
item.path += '.borg_part_%d' % number
|
||||
item.part = number
|
||||
number += 1
|
||||
|
@ -877,6 +890,7 @@ Utilization of max. archive size: {csize_max:.0%}
|
|||
)
|
||||
fd = sys.stdin.buffer # binary
|
||||
self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd)))
|
||||
item.get_size(memorize=True)
|
||||
self.stats.nfiles += 1
|
||||
self.add_item(item)
|
||||
return 'i' # stdin
|
||||
|
@ -937,6 +951,7 @@ Utilization of max. archive size: {csize_max:.0%}
|
|||
cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
|
||||
status = status or 'M' # regular file, modified (if not 'A' already)
|
||||
item.update(self.stat_attrs(st, path))
|
||||
item.get_size(memorize=True)
|
||||
if is_special_file:
|
||||
# we processed a special file like a regular file. reflect that in mode,
|
||||
# so it can be extracted / accessed in FUSE mount like a regular file:
|
||||
|
@ -1355,6 +1370,13 @@ class ArchiveChecker:
|
|||
logger.info('{}: Completely healed previously damaged file!'.format(item.path))
|
||||
del item.chunks_healthy
|
||||
item.chunks = chunk_list
|
||||
if 'size' in item:
|
||||
item_size = item.size
|
||||
item_chunks_size = item.get_size(compressed=False, from_chunks=True)
|
||||
if item_size != item_chunks_size:
|
||||
# just warn, but keep the inconsistency, so that borg extract can warn about it.
|
||||
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
|
||||
item.path, item_size, item_chunks_size))
|
||||
|
||||
def robust_iterator(archive):
|
||||
"""Iterates through all archive items
|
||||
|
|
|
@ -557,7 +557,7 @@ class Archiver:
|
|||
if progress:
|
||||
pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1)
|
||||
pi.output('Calculating size')
|
||||
extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter))
|
||||
extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter))
|
||||
pi.total = extracted_size
|
||||
else:
|
||||
pi = None
|
||||
|
@ -616,10 +616,13 @@ class Archiver:
|
|||
|
||||
def sum_chunk_size(item, consider_ids=None):
|
||||
if item.get('deleted'):
|
||||
return None
|
||||
size = None
|
||||
else:
|
||||
return sum(c.size for c in item.chunks
|
||||
if consider_ids is None or c.id in consider_ids)
|
||||
if consider_ids is not None: # consider only specific chunks
|
||||
size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids)
|
||||
else: # consider all chunks
|
||||
size = item.get_size()
|
||||
return size
|
||||
|
||||
def get_owner(item):
|
||||
if args.numeric_owner:
|
||||
|
|
|
@ -20,13 +20,12 @@ from .helpers import format_file_size
|
|||
from .helpers import yes
|
||||
from .helpers import remove_surrogates
|
||||
from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
|
||||
from .item import Item, ArchiveItem
|
||||
from .item import Item, ArchiveItem, ChunkListEntry
|
||||
from .key import PlaintextKey
|
||||
from .locking import Lock
|
||||
from .platform import SaveFile
|
||||
from .remote import cache_if_remote
|
||||
|
||||
ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
|
||||
FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids')
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
|
||||
ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
|
||||
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
|
||||
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'size',
|
||||
'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
|
||||
'part'])
|
||||
|
||||
|
|
|
@ -72,7 +72,6 @@ class FuseOperations(llfuse.Operations):
|
|||
self.contents = defaultdict(dict)
|
||||
self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
|
||||
self.pending_archives = {}
|
||||
self.accounted_chunks = {}
|
||||
self.cache = ItemCache()
|
||||
data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1))
|
||||
logger.debug('mount data cache capacity: %d chunks', data_cache_capacity)
|
||||
|
@ -257,14 +256,6 @@ class FuseOperations(llfuse.Operations):
|
|||
|
||||
def getattr(self, inode, ctx=None):
|
||||
item = self.get_item(inode)
|
||||
size = 0
|
||||
dsize = 0
|
||||
if 'chunks' in item:
|
||||
for key, chunksize, _ in item.chunks:
|
||||
size += chunksize
|
||||
if self.accounted_chunks.get(key, inode) == inode:
|
||||
self.accounted_chunks[key] = inode
|
||||
dsize += chunksize
|
||||
entry = llfuse.EntryAttributes()
|
||||
entry.st_ino = inode
|
||||
entry.generation = 0
|
||||
|
@ -275,9 +266,9 @@ class FuseOperations(llfuse.Operations):
|
|||
entry.st_uid = item.uid
|
||||
entry.st_gid = item.gid
|
||||
entry.st_rdev = item.get('rdev', 0)
|
||||
entry.st_size = size
|
||||
entry.st_size = item.get_size()
|
||||
entry.st_blksize = 512
|
||||
entry.st_blocks = dsize / 512
|
||||
entry.st_blocks = (entry.st_size + entry.st_blksize - 1) // entry.st_blksize
|
||||
# note: older archives only have mtime (not atime nor ctime)
|
||||
mtime_ns = item.mtime
|
||||
if have_fuse_xtime_ns:
|
||||
|
|
|
@ -105,7 +105,7 @@ def check_extension_modules():
|
|||
raise ExtensionModuleError
|
||||
if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01':
|
||||
raise ExtensionModuleError
|
||||
if item.API_VERSION != '1.1_01':
|
||||
if item.API_VERSION != '1.1_02':
|
||||
raise ExtensionModuleError
|
||||
|
||||
|
||||
|
@ -1759,10 +1759,12 @@ class ItemFormatter(BaseFormatter):
|
|||
return len(item.get('chunks', []))
|
||||
|
||||
def calculate_size(self, item):
|
||||
return sum(c.size for c in item.get('chunks', []))
|
||||
# note: does not support hardlink slaves, they will be size 0
|
||||
return item.get_size(compressed=False)
|
||||
|
||||
def calculate_csize(self, item):
|
||||
return sum(c.csize for c in item.get('chunks', []))
|
||||
# note: does not support hardlink slaves, they will be csize 0
|
||||
return item.get_size(compressed=True)
|
||||
|
||||
def hash_item(self, hash_function, item):
|
||||
if 'chunks' not in item:
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
from collections import namedtuple
|
||||
|
||||
from .constants import ITEM_KEYS
|
||||
from .helpers import safe_encode, safe_decode
|
||||
from .helpers import StableDict
|
||||
|
||||
API_VERSION = '1.1_01'
|
||||
API_VERSION = '1.1_02'
|
||||
|
||||
|
||||
class PropDict:
|
||||
|
@ -113,6 +115,8 @@ class PropDict:
|
|||
return property(_get, _set, _del, doc=doc)
|
||||
|
||||
|
||||
ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
|
||||
|
||||
class Item(PropDict):
|
||||
"""
|
||||
Item abstraction that deals with validation and the low-level details internally:
|
||||
|
@ -156,6 +160,10 @@ class Item(PropDict):
|
|||
ctime = PropDict._make_property('ctime', int)
|
||||
mtime = PropDict._make_property('mtime', int)
|
||||
|
||||
# size is only present for items with a chunk list and then it is sum(chunk_sizes)
|
||||
# compatibility note: this is a new feature, in old archives size will be missing.
|
||||
size = PropDict._make_property('size', int)
|
||||
|
||||
hardlink_master = PropDict._make_property('hardlink_master', bool)
|
||||
|
||||
chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
|
||||
|
@ -168,13 +176,48 @@ class Item(PropDict):
|
|||
|
||||
part = PropDict._make_property('part', int)
|
||||
|
||||
def file_size(self, hardlink_masters=None):
|
||||
hardlink_masters = hardlink_masters or {}
|
||||
chunks, _ = hardlink_masters.get(self.get('source'), (None, None))
|
||||
chunks = self.get('chunks', chunks)
|
||||
if chunks is None:
|
||||
return 0
|
||||
return sum(chunk.size for chunk in chunks)
|
||||
def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False):
|
||||
"""
|
||||
Determine the (uncompressed or compressed) size of this item.
|
||||
|
||||
For hardlink slaves, the size is computed via the hardlink master's
|
||||
chunk list, if available (otherwise size will be returned as 0).
|
||||
|
||||
If memorize is True, the computed size value will be stored into the item.
|
||||
"""
|
||||
attr = 'csize' if compressed else 'size'
|
||||
try:
|
||||
if from_chunks:
|
||||
raise AttributeError
|
||||
size = getattr(self, attr)
|
||||
except AttributeError:
|
||||
# no precomputed (c)size value available, compute it:
|
||||
try:
|
||||
chunks = getattr(self, 'chunks')
|
||||
having_chunks = True
|
||||
except AttributeError:
|
||||
having_chunks = False
|
||||
# this item has no (own) chunks list, but if this is a hardlink slave
|
||||
# and we know the master, we can still compute the size.
|
||||
if hardlink_masters is None:
|
||||
chunks = None
|
||||
else:
|
||||
try:
|
||||
master = getattr(self, 'source')
|
||||
except AttributeError:
|
||||
# not a hardlink slave, likely a directory or special file w/o chunks
|
||||
chunks = None
|
||||
else:
|
||||
# hardlink slave, try to fetch hardlink master's chunks list
|
||||
# todo: put precomputed size into hardlink_masters' values and use it, if present
|
||||
chunks, _ = hardlink_masters.get(master, (None, None))
|
||||
if chunks is None:
|
||||
return 0
|
||||
size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks)
|
||||
# if requested, memorize the precomputed (c)size for items that have an own chunks list:
|
||||
if memorize and having_chunks:
|
||||
setattr(self, attr, size)
|
||||
return size
|
||||
|
||||
|
||||
class EncryptedKey(PropDict):
|
||||
|
|
|
@ -142,9 +142,9 @@ def test_item_file_size():
|
|||
ChunkListEntry(csize=1, size=1000, id=None),
|
||||
ChunkListEntry(csize=1, size=2000, id=None),
|
||||
])
|
||||
assert item.file_size() == 3000
|
||||
assert item.get_size() == 3000
|
||||
|
||||
|
||||
def test_item_file_size_no_chunks():
|
||||
item = Item()
|
||||
assert item.file_size() == 0
|
||||
assert item.get_size() == 0
|
||||
|
|
Loading…
Reference in New Issue