Merge pull request #2157 from ThomasWaldmann/add-filesize

archived file items: add size metadata
This commit is contained in:
enkore 2017-02-27 18:05:43 +01:00 committed by GitHub
commit 7c9c4b61d7
8 changed files with 92 additions and 32 deletions

View File

@ -519,13 +519,20 @@ Utilization of max. archive size: {csize_max:.0%}
has_damaged_chunks = 'chunks_healthy' in item has_damaged_chunks = 'chunks_healthy' in item
if dry_run or stdout: if dry_run or stdout:
if 'chunks' in item: if 'chunks' in item:
item_chunks_size = 0
for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True): for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
if pi: if pi:
pi.show(increase=len(data), info=[remove_surrogates(item.path)]) pi.show(increase=len(data), info=[remove_surrogates(item.path)])
if stdout: if stdout:
sys.stdout.buffer.write(data) sys.stdout.buffer.write(data)
item_chunks_size += len(data)
if stdout: if stdout:
sys.stdout.buffer.flush() sys.stdout.buffer.flush()
if 'size' in item:
item_size = item.size
if item_size != item_chunks_size:
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
item.path, item_size, item_chunks_size))
if has_damaged_chunks: if has_damaged_chunks:
logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' % logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
remove_surrogates(item.path)) remove_surrogates(item.path))
@ -582,10 +589,15 @@ Utilization of max. archive size: {csize_max:.0%}
else: else:
fd.write(data) fd.write(data)
with backup_io('truncate'): with backup_io('truncate'):
pos = fd.tell() pos = item_chunks_size = fd.tell()
fd.truncate(pos) fd.truncate(pos)
fd.flush() fd.flush()
self.restore_attrs(path, item, fd=fd.fileno()) self.restore_attrs(path, item, fd=fd.fileno())
if 'size' in item:
item_size = item.size
if item_size != item_chunks_size:
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
item.path, item_size, item_chunks_size))
if has_damaged_chunks: if has_damaged_chunks:
logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' % logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
remove_surrogates(item.path)) remove_surrogates(item.path))
@ -829,6 +841,7 @@ Utilization of max. archive size: {csize_max:.0%}
length = len(item.chunks) length = len(item.chunks)
# the item should only have the *additional* chunks we processed after the last partial item: # the item should only have the *additional* chunks we processed after the last partial item:
item.chunks = item.chunks[from_chunk:] item.chunks = item.chunks[from_chunk:]
item.get_size(memorize=True)
item.path += '.borg_part_%d' % number item.path += '.borg_part_%d' % number
item.part = number item.part = number
number += 1 number += 1
@ -877,6 +890,7 @@ Utilization of max. archive size: {csize_max:.0%}
) )
fd = sys.stdin.buffer # binary fd = sys.stdin.buffer # binary
self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd))) self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd)))
item.get_size(memorize=True)
self.stats.nfiles += 1 self.stats.nfiles += 1
self.add_item(item) self.add_item(item)
return 'i' # stdin return 'i' # stdin
@ -937,6 +951,7 @@ Utilization of max. archive size: {csize_max:.0%}
cache.memorize_file(path_hash, st, [c.id for c in item.chunks]) cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
status = status or 'M' # regular file, modified (if not 'A' already) status = status or 'M' # regular file, modified (if not 'A' already)
item.update(self.stat_attrs(st, path)) item.update(self.stat_attrs(st, path))
item.get_size(memorize=True)
if is_special_file: if is_special_file:
# we processed a special file like a regular file. reflect that in mode, # we processed a special file like a regular file. reflect that in mode,
# so it can be extracted / accessed in FUSE mount like a regular file: # so it can be extracted / accessed in FUSE mount like a regular file:
@ -1355,6 +1370,13 @@ class ArchiveChecker:
logger.info('{}: Completely healed previously damaged file!'.format(item.path)) logger.info('{}: Completely healed previously damaged file!'.format(item.path))
del item.chunks_healthy del item.chunks_healthy
item.chunks = chunk_list item.chunks = chunk_list
if 'size' in item:
item_size = item.size
item_chunks_size = item.get_size(compressed=False, from_chunks=True)
if item_size != item_chunks_size:
# just warn, but keep the inconsistency, so that borg extract can warn about it.
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
item.path, item_size, item_chunks_size))
def robust_iterator(archive): def robust_iterator(archive):
"""Iterates through all archive items """Iterates through all archive items

View File

@ -557,7 +557,7 @@ class Archiver:
if progress: if progress:
pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1) pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1)
pi.output('Calculating size') pi.output('Calculating size')
extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter)) extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter))
pi.total = extracted_size pi.total = extracted_size
else: else:
pi = None pi = None
@ -616,10 +616,13 @@ class Archiver:
def sum_chunk_size(item, consider_ids=None): def sum_chunk_size(item, consider_ids=None):
if item.get('deleted'): if item.get('deleted'):
return None size = None
else: else:
return sum(c.size for c in item.chunks if consider_ids is not None: # consider only specific chunks
if consider_ids is None or c.id in consider_ids) size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids)
else: # consider all chunks
size = item.get_size()
return size
def get_owner(item): def get_owner(item):
if args.numeric_owner: if args.numeric_owner:

View File

@ -20,13 +20,12 @@ from .helpers import format_file_size
from .helpers import yes from .helpers import yes
from .helpers import remove_surrogates from .helpers import remove_surrogates
from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
from .item import Item, ArchiveItem from .item import Item, ArchiveItem, ChunkListEntry
from .key import PlaintextKey from .key import PlaintextKey
from .locking import Lock from .locking import Lock
from .platform import SaveFile from .platform import SaveFile
from .remote import cache_if_remote from .remote import cache_if_remote
ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids') FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids')

View File

@ -1,6 +1,6 @@
# this set must be kept complete, otherwise the RobustUnpacker might malfunction: # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master', ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'size',
'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
'part']) 'part'])

View File

@ -72,7 +72,6 @@ class FuseOperations(llfuse.Operations):
self.contents = defaultdict(dict) self.contents = defaultdict(dict)
self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid()) self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
self.pending_archives = {} self.pending_archives = {}
self.accounted_chunks = {}
self.cache = ItemCache() self.cache = ItemCache()
data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1)) data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1))
logger.debug('mount data cache capacity: %d chunks', data_cache_capacity) logger.debug('mount data cache capacity: %d chunks', data_cache_capacity)
@ -257,14 +256,6 @@ class FuseOperations(llfuse.Operations):
def getattr(self, inode, ctx=None): def getattr(self, inode, ctx=None):
item = self.get_item(inode) item = self.get_item(inode)
size = 0
dsize = 0
if 'chunks' in item:
for key, chunksize, _ in item.chunks:
size += chunksize
if self.accounted_chunks.get(key, inode) == inode:
self.accounted_chunks[key] = inode
dsize += chunksize
entry = llfuse.EntryAttributes() entry = llfuse.EntryAttributes()
entry.st_ino = inode entry.st_ino = inode
entry.generation = 0 entry.generation = 0
@ -275,9 +266,9 @@ class FuseOperations(llfuse.Operations):
entry.st_uid = item.uid entry.st_uid = item.uid
entry.st_gid = item.gid entry.st_gid = item.gid
entry.st_rdev = item.get('rdev', 0) entry.st_rdev = item.get('rdev', 0)
entry.st_size = size entry.st_size = item.get_size()
entry.st_blksize = 512 entry.st_blksize = 512
entry.st_blocks = dsize / 512 entry.st_blocks = (entry.st_size + entry.st_blksize - 1) // entry.st_blksize
# note: older archives only have mtime (not atime nor ctime) # note: older archives only have mtime (not atime nor ctime)
mtime_ns = item.mtime mtime_ns = item.mtime
if have_fuse_xtime_ns: if have_fuse_xtime_ns:

View File

@ -105,7 +105,7 @@ def check_extension_modules():
raise ExtensionModuleError raise ExtensionModuleError
if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01': if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01':
raise ExtensionModuleError raise ExtensionModuleError
if item.API_VERSION != '1.1_01': if item.API_VERSION != '1.1_02':
raise ExtensionModuleError raise ExtensionModuleError
@ -1759,10 +1759,12 @@ class ItemFormatter(BaseFormatter):
return len(item.get('chunks', [])) return len(item.get('chunks', []))
def calculate_size(self, item): def calculate_size(self, item):
return sum(c.size for c in item.get('chunks', [])) # note: does not support hardlink slaves, they will be size 0
return item.get_size(compressed=False)
def calculate_csize(self, item): def calculate_csize(self, item):
return sum(c.csize for c in item.get('chunks', [])) # note: does not support hardlink slaves, they will be csize 0
return item.get_size(compressed=True)
def hash_item(self, hash_function, item): def hash_item(self, hash_function, item):
if 'chunks' not in item: if 'chunks' not in item:

View File

@ -1,8 +1,10 @@
from collections import namedtuple
from .constants import ITEM_KEYS from .constants import ITEM_KEYS
from .helpers import safe_encode, safe_decode from .helpers import safe_encode, safe_decode
from .helpers import StableDict from .helpers import StableDict
API_VERSION = '1.1_01' API_VERSION = '1.1_02'
class PropDict: class PropDict:
@ -113,6 +115,8 @@ class PropDict:
return property(_get, _set, _del, doc=doc) return property(_get, _set, _del, doc=doc)
ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
class Item(PropDict): class Item(PropDict):
""" """
Item abstraction that deals with validation and the low-level details internally: Item abstraction that deals with validation and the low-level details internally:
@ -156,6 +160,10 @@ class Item(PropDict):
ctime = PropDict._make_property('ctime', int) ctime = PropDict._make_property('ctime', int)
mtime = PropDict._make_property('mtime', int) mtime = PropDict._make_property('mtime', int)
# size is only present for items with a chunk list and then it is sum(chunk_sizes)
# compatibility note: this is a new feature, in old archives size will be missing.
size = PropDict._make_property('size', int)
hardlink_master = PropDict._make_property('hardlink_master', bool) hardlink_master = PropDict._make_property('hardlink_master', bool)
chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None') chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
@ -168,13 +176,48 @@ class Item(PropDict):
part = PropDict._make_property('part', int) part = PropDict._make_property('part', int)
def file_size(self, hardlink_masters=None): def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False):
hardlink_masters = hardlink_masters or {} """
chunks, _ = hardlink_masters.get(self.get('source'), (None, None)) Determine the (uncompressed or compressed) size of this item.
chunks = self.get('chunks', chunks)
if chunks is None: For hardlink slaves, the size is computed via the hardlink master's
return 0 chunk list, if available (otherwise size will be returned as 0).
return sum(chunk.size for chunk in chunks)
If memorize is True, the computed size value will be stored into the item.
"""
attr = 'csize' if compressed else 'size'
try:
if from_chunks:
raise AttributeError
size = getattr(self, attr)
except AttributeError:
# no precomputed (c)size value available, compute it:
try:
chunks = getattr(self, 'chunks')
having_chunks = True
except AttributeError:
having_chunks = False
# this item has no (own) chunks list, but if this is a hardlink slave
# and we know the master, we can still compute the size.
if hardlink_masters is None:
chunks = None
else:
try:
master = getattr(self, 'source')
except AttributeError:
# not a hardlink slave, likely a directory or special file w/o chunks
chunks = None
else:
# hardlink slave, try to fetch hardlink master's chunks list
# todo: put precomputed size into hardlink_masters' values and use it, if present
chunks, _ = hardlink_masters.get(master, (None, None))
if chunks is None:
return 0
size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks)
# if requested, memorize the precomputed (c)size for items that have an own chunks list:
if memorize and having_chunks:
setattr(self, attr, size)
return size
class EncryptedKey(PropDict): class EncryptedKey(PropDict):

View File

@ -142,9 +142,9 @@ def test_item_file_size():
ChunkListEntry(csize=1, size=1000, id=None), ChunkListEntry(csize=1, size=1000, id=None),
ChunkListEntry(csize=1, size=2000, id=None), ChunkListEntry(csize=1, size=2000, id=None),
]) ])
assert item.file_size() == 3000 assert item.get_size() == 3000
def test_item_file_size_no_chunks(): def test_item_file_size_no_chunks():
item = Item() item = Item()
assert item.file_size() == 0 assert item.get_size() == 0