Merge pull request #2157 from ThomasWaldmann/add-filesize

archived file items: add size metadata
This commit is contained in:
enkore 2017-02-27 18:05:43 +01:00 committed by GitHub
commit 7c9c4b61d7
8 changed files with 92 additions and 32 deletions

View File

@ -519,13 +519,20 @@ Utilization of max. archive size: {csize_max:.0%}
has_damaged_chunks = 'chunks_healthy' in item
if dry_run or stdout:
if 'chunks' in item:
item_chunks_size = 0
for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
if pi:
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
if stdout:
sys.stdout.buffer.write(data)
item_chunks_size += len(data)
if stdout:
sys.stdout.buffer.flush()
if 'size' in item:
item_size = item.size
if item_size != item_chunks_size:
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
item.path, item_size, item_chunks_size))
if has_damaged_chunks:
logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
remove_surrogates(item.path))
@ -582,10 +589,15 @@ Utilization of max. archive size: {csize_max:.0%}
else:
fd.write(data)
with backup_io('truncate'):
pos = fd.tell()
pos = item_chunks_size = fd.tell()
fd.truncate(pos)
fd.flush()
self.restore_attrs(path, item, fd=fd.fileno())
if 'size' in item:
item_size = item.size
if item_size != item_chunks_size:
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
item.path, item_size, item_chunks_size))
if has_damaged_chunks:
logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
remove_surrogates(item.path))
@ -829,6 +841,7 @@ Utilization of max. archive size: {csize_max:.0%}
length = len(item.chunks)
# the item should only have the *additional* chunks we processed after the last partial item:
item.chunks = item.chunks[from_chunk:]
item.get_size(memorize=True)
item.path += '.borg_part_%d' % number
item.part = number
number += 1
@ -877,6 +890,7 @@ Utilization of max. archive size: {csize_max:.0%}
)
fd = sys.stdin.buffer # binary
self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd)))
item.get_size(memorize=True)
self.stats.nfiles += 1
self.add_item(item)
return 'i' # stdin
@ -937,6 +951,7 @@ Utilization of max. archive size: {csize_max:.0%}
cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
status = status or 'M' # regular file, modified (if not 'A' already)
item.update(self.stat_attrs(st, path))
item.get_size(memorize=True)
if is_special_file:
# we processed a special file like a regular file. reflect that in mode,
# so it can be extracted / accessed in FUSE mount like a regular file:
@ -1355,6 +1370,13 @@ class ArchiveChecker:
logger.info('{}: Completely healed previously damaged file!'.format(item.path))
del item.chunks_healthy
item.chunks = chunk_list
if 'size' in item:
item_size = item.size
item_chunks_size = item.get_size(compressed=False, from_chunks=True)
if item_size != item_chunks_size:
# just warn, but keep the inconsistency, so that borg extract can warn about it.
logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
item.path, item_size, item_chunks_size))
def robust_iterator(archive):
"""Iterates through all archive items

View File

@ -557,7 +557,7 @@ class Archiver:
if progress:
pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1)
pi.output('Calculating size')
extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter))
extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter))
pi.total = extracted_size
else:
pi = None
@ -616,10 +616,13 @@ class Archiver:
def sum_chunk_size(item, consider_ids=None):
if item.get('deleted'):
return None
size = None
else:
return sum(c.size for c in item.chunks
if consider_ids is None or c.id in consider_ids)
if consider_ids is not None: # consider only specific chunks
size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids)
else: # consider all chunks
size = item.get_size()
return size
def get_owner(item):
if args.numeric_owner:

View File

@ -20,13 +20,12 @@ from .helpers import format_file_size
from .helpers import yes
from .helpers import remove_surrogates
from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
from .item import Item, ArchiveItem
from .item import Item, ArchiveItem, ChunkListEntry
from .key import PlaintextKey
from .locking import Lock
from .platform import SaveFile
from .remote import cache_if_remote
ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids')

View File

@ -1,6 +1,6 @@
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'size',
'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
'part'])

View File

@ -72,7 +72,6 @@ class FuseOperations(llfuse.Operations):
self.contents = defaultdict(dict)
self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
self.pending_archives = {}
self.accounted_chunks = {}
self.cache = ItemCache()
data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1))
logger.debug('mount data cache capacity: %d chunks', data_cache_capacity)
@ -257,14 +256,6 @@ class FuseOperations(llfuse.Operations):
def getattr(self, inode, ctx=None):
item = self.get_item(inode)
size = 0
dsize = 0
if 'chunks' in item:
for key, chunksize, _ in item.chunks:
size += chunksize
if self.accounted_chunks.get(key, inode) == inode:
self.accounted_chunks[key] = inode
dsize += chunksize
entry = llfuse.EntryAttributes()
entry.st_ino = inode
entry.generation = 0
@ -275,9 +266,9 @@ class FuseOperations(llfuse.Operations):
entry.st_uid = item.uid
entry.st_gid = item.gid
entry.st_rdev = item.get('rdev', 0)
entry.st_size = size
entry.st_size = item.get_size()
entry.st_blksize = 512
entry.st_blocks = dsize / 512
entry.st_blocks = (entry.st_size + entry.st_blksize - 1) // entry.st_blksize
# note: older archives only have mtime (not atime nor ctime)
mtime_ns = item.mtime
if have_fuse_xtime_ns:

View File

@ -105,7 +105,7 @@ def check_extension_modules():
raise ExtensionModuleError
if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01':
raise ExtensionModuleError
if item.API_VERSION != '1.1_01':
if item.API_VERSION != '1.1_02':
raise ExtensionModuleError
@ -1759,10 +1759,12 @@ class ItemFormatter(BaseFormatter):
return len(item.get('chunks', []))
def calculate_size(self, item):
return sum(c.size for c in item.get('chunks', []))
# note: does not support hardlink slaves, they will be size 0
return item.get_size(compressed=False)
def calculate_csize(self, item):
return sum(c.csize for c in item.get('chunks', []))
# note: does not support hardlink slaves, they will be csize 0
return item.get_size(compressed=True)
def hash_item(self, hash_function, item):
if 'chunks' not in item:

View File

@ -1,8 +1,10 @@
from collections import namedtuple
from .constants import ITEM_KEYS
from .helpers import safe_encode, safe_decode
from .helpers import StableDict
API_VERSION = '1.1_01'
API_VERSION = '1.1_02'
class PropDict:
@ -113,6 +115,8 @@ class PropDict:
return property(_get, _set, _del, doc=doc)
ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
class Item(PropDict):
"""
Item abstraction that deals with validation and the low-level details internally:
@ -156,6 +160,10 @@ class Item(PropDict):
ctime = PropDict._make_property('ctime', int)
mtime = PropDict._make_property('mtime', int)
# size is only present for items with a chunk list and then it is sum(chunk_sizes)
# compatibility note: this is a new feature, in old archives size will be missing.
size = PropDict._make_property('size', int)
hardlink_master = PropDict._make_property('hardlink_master', bool)
chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
@ -168,13 +176,48 @@ class Item(PropDict):
part = PropDict._make_property('part', int)
def file_size(self, hardlink_masters=None):
hardlink_masters = hardlink_masters or {}
chunks, _ = hardlink_masters.get(self.get('source'), (None, None))
chunks = self.get('chunks', chunks)
if chunks is None:
return 0
return sum(chunk.size for chunk in chunks)
def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False):
"""
Determine the (uncompressed or compressed) size of this item.
For hardlink slaves, the size is computed via the hardlink master's
chunk list, if available (otherwise size will be returned as 0).
If memorize is True, the computed size value will be stored into the item.
"""
attr = 'csize' if compressed else 'size'
try:
if from_chunks:
raise AttributeError
size = getattr(self, attr)
except AttributeError:
# no precomputed (c)size value available, compute it:
try:
chunks = getattr(self, 'chunks')
having_chunks = True
except AttributeError:
having_chunks = False
# this item has no (own) chunks list, but if this is a hardlink slave
# and we know the master, we can still compute the size.
if hardlink_masters is None:
chunks = None
else:
try:
master = getattr(self, 'source')
except AttributeError:
# not a hardlink slave, likely a directory or special file w/o chunks
chunks = None
else:
# hardlink slave, try to fetch hardlink master's chunks list
# todo: put precomputed size into hardlink_masters' values and use it, if present
chunks, _ = hardlink_masters.get(master, (None, None))
if chunks is None:
return 0
size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks)
# if requested, memorize the precomputed (c)size for items that have an own chunks list:
if memorize and having_chunks:
setattr(self, attr, size)
return size
class EncryptedKey(PropDict):

View File

@ -142,9 +142,9 @@ def test_item_file_size():
ChunkListEntry(csize=1, size=1000, id=None),
ChunkListEntry(csize=1, size=2000, id=None),
])
assert item.file_size() == 3000
assert item.get_size() == 3000
def test_item_file_size_no_chunks():
item = Item()
assert item.file_size() == 0
assert item.get_size() == 0