archived file items: add size metadata

if an item has a chunk list, pre-compute the total size and store it into "size" metadata entry.

this speeds up access to item size (e.g. for regular files) and could also be used to verify the validity of the chunks list.

note about hardlinks: size is only stored for hardlink masters (only they have an own chunk list)
This commit is contained in:
Thomas Waldmann 2017-02-14 06:35:54 +01:00
parent 4862efe718
commit a52b54dc3c
6 changed files with 25 additions and 6 deletions

View File

@ -777,6 +777,7 @@ Utilization of max. archive size: {csize_max:.0%}
length = len(item.chunks)
# the item should only have the *additional* chunks we processed after the last partial item:
item.chunks = item.chunks[from_chunk:]
item.size = sum(chunk.size for chunk in item.chunks)
item.path += '.borg_part_%d' % number
item.part = number
number += 1
@ -825,6 +826,7 @@ Utilization of max. archive size: {csize_max:.0%}
)
fd = sys.stdin.buffer # binary
self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd)))
item.size = sum(chunk.size for chunk in item.chunks)
self.stats.nfiles += 1
self.add_item(item)
return 'i' # stdin
@ -885,6 +887,7 @@ Utilization of max. archive size: {csize_max:.0%}
cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
status = status or 'M' # regular file, modified (if not 'A' already)
item.update(self.stat_attrs(st, path))
item.size = sum(chunk.size for chunk in item.chunks)
if is_special_file:
# we processed a special file like a regular file. reflect that in mode,
# so it can be extracted / accessed in FUSE mount like a regular file:

View File

@ -600,10 +600,15 @@ class Archiver:
def sum_chunk_size(item, consider_ids=None):
if item.get('deleted'):
return None
size = None
else:
return sum(c.size for c in item.chunks
if consider_ids is None or c.id in consider_ids)
if consider_ids is not None: # consider only specific chunks
size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids)
else: # consider all chunks
size = item.get('size')
if size is None:
size = sum(chunk.size for chunk in item.chunks)
return size
def get_owner(item):
if args.numeric_owner:

View File

@ -1,6 +1,6 @@
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'size',
'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
'part'])

View File

@ -260,6 +260,7 @@ class FuseOperations(llfuse.Operations):
size = 0
dsize = 0
if 'chunks' in item:
# if we would not need to compute dsize, we could get size quickly from item.size, if present.
for key, chunksize, _ in item.chunks:
size += chunksize
if self.accounted_chunks.get(key, inode) == inode:

View File

@ -104,7 +104,7 @@ def check_extension_modules():
raise ExtensionModuleError
if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01':
raise ExtensionModuleError
if item.API_VERSION != '1.1_01':
if item.API_VERSION != '1.1_02':
raise ExtensionModuleError
@ -1701,6 +1701,9 @@ class ItemFormatter(BaseFormatter):
return len(item.get('chunks', []))
def calculate_size(self, item):
size = item.get('size')
if size is not None:
return size
return sum(c.size for c in item.get('chunks', []))
def calculate_csize(self, item):

View File

@ -2,7 +2,7 @@ from .constants import ITEM_KEYS
from .helpers import safe_encode, safe_decode
from .helpers import StableDict
API_VERSION = '1.1_01'
API_VERSION = '1.1_02'
class PropDict:
@ -156,6 +156,10 @@ class Item(PropDict):
ctime = PropDict._make_property('ctime', int)
mtime = PropDict._make_property('mtime', int)
# size is only present for items with a chunk list and then it is sum(chunk_sizes)
# compatibility note: this is a new feature, in old archives size will be missing.
size = PropDict._make_property('size', int)
hardlink_master = PropDict._make_property('hardlink_master', bool)
chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
@ -169,6 +173,9 @@ class Item(PropDict):
part = PropDict._make_property('part', int)
def file_size(self, hardlink_masters=None):
size = self.get('size')
if size is not None:
return size
hardlink_masters = hardlink_masters or {}
chunks, _ = hardlink_masters.get(self.get('source'), (None, None))
chunks = self.get('chunks', chunks)