mirror of https://github.com/borgbackup/borg.git
files cache: add chunk size information
the files cache used to have only the chunk ids, so it had to rely on the chunks index having the size information - which is problematic with e.g. the AdhocCache (has size==0 for all not new chunks) and blocked using the files cache there.
This commit is contained in:
parent
411c763fb8
commit
c5e130d03d
|
@ -1552,25 +1552,27 @@ class FilesystemObjectProcessors:
|
|||
started_hashing = time.monotonic()
|
||||
path_hash = self.key.id_hash(hashed_path)
|
||||
self.stats.hashing_time += time.monotonic() - started_hashing
|
||||
known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
|
||||
known, chunks = cache.file_known_and_unchanged(hashed_path, path_hash, st)
|
||||
else:
|
||||
# in --read-special mode, we may be called for special files.
|
||||
# there should be no information in the cache about special files processed in
|
||||
# read-special mode, but we better play safe as this was wrong in the past:
|
||||
hashed_path = path_hash = None
|
||||
known, ids = False, None
|
||||
if ids is not None:
|
||||
known, chunks = False, None
|
||||
if chunks is not None:
|
||||
# Make sure all ids are available
|
||||
for id_ in ids:
|
||||
if not cache.seen_chunk(id_):
|
||||
for chunk in chunks:
|
||||
if not cache.seen_chunk(chunk.id):
|
||||
# cache said it is unmodified, but we lost a chunk: process file like modified
|
||||
status = "M"
|
||||
break
|
||||
else:
|
||||
item.chunks = []
|
||||
for chunk_id in ids:
|
||||
for chunk in chunks:
|
||||
# process one-by-one, so we will know in item.chunks how far we got
|
||||
chunk_entry = cache.chunk_incref(chunk_id, self.stats)
|
||||
chunk_entry = cache.chunk_incref(chunk.id, self.stats)
|
||||
# chunk.size is from files cache, chunk_entry.size from index:
|
||||
assert chunk == chunk_entry
|
||||
item.chunks.append(chunk_entry)
|
||||
status = "U" # regular file, unchanged
|
||||
else:
|
||||
|
@ -1606,7 +1608,7 @@ class FilesystemObjectProcessors:
|
|||
# block or char device will change without its mtime/size/inode changing.
|
||||
# also, we must not memorize a potentially inconsistent/corrupt file that
|
||||
# changed while we backed it up.
|
||||
cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
|
||||
cache.memorize_file(hashed_path, path_hash, st, item.chunks)
|
||||
self.stats.files_stats[status] += 1 # must be done late
|
||||
if not changed_while_backup:
|
||||
status = None # we already called print_file_status
|
||||
|
|
|
@ -35,8 +35,8 @@ from .platform import SaveFile
|
|||
from .remote import cache_if_remote
|
||||
from .repository import LIST_SCAN_LIMIT
|
||||
|
||||
# note: cmtime might me either a ctime or a mtime timestamp
|
||||
FileCacheEntry = namedtuple("FileCacheEntry", "age inode size cmtime chunk_ids")
|
||||
# note: cmtime might be either a ctime or a mtime timestamp, chunks is a list of ChunkListEntry
|
||||
FileCacheEntry = namedtuple("FileCacheEntry", "age inode size cmtime chunks")
|
||||
|
||||
|
||||
class SecurityManager:
|
||||
|
@ -1016,8 +1016,8 @@ class LocalCache(CacheStatsMixin):
|
|||
:param hashed_path: the file's path as we gave it to hash(hashed_path)
|
||||
:param path_hash: hash(hashed_path), to save some memory in the files cache
|
||||
:param st: the file's stat() result
|
||||
:return: known, ids (known is True if we have infos about this file in the cache,
|
||||
ids is the list of chunk ids IF the file has not changed, otherwise None).
|
||||
:return: known, chunks (known is True if we have infos about this file in the cache,
|
||||
chunks is a list[ChunkListEntry] IF the file has not changed, otherwise None).
|
||||
"""
|
||||
if not stat.S_ISREG(st.st_mode):
|
||||
return False, None
|
||||
|
@ -1058,9 +1058,10 @@ class LocalCache(CacheStatsMixin):
|
|||
# again at that time), we need to update the inode number in the cache with what
|
||||
# we see in the filesystem.
|
||||
self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
|
||||
return True, entry.chunk_ids
|
||||
chunks = [ChunkListEntry(*chunk) for chunk in entry.chunks] # convert to list of namedtuple
|
||||
return True, chunks
|
||||
|
||||
def memorize_file(self, hashed_path, path_hash, st, ids):
|
||||
def memorize_file(self, hashed_path, path_hash, st, chunks):
|
||||
if not stat.S_ISREG(st.st_mode):
|
||||
return
|
||||
cache_mode = self.cache_mode
|
||||
|
@ -1078,13 +1079,13 @@ class LocalCache(CacheStatsMixin):
|
|||
cmtime_type = "ctime"
|
||||
cmtime_ns = safe_ns(st.st_ctime_ns)
|
||||
entry = FileCacheEntry(
|
||||
age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_timestamp(cmtime_ns), chunk_ids=ids
|
||||
age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_timestamp(cmtime_ns), chunks=chunks
|
||||
)
|
||||
self.files[path_hash] = msgpack.packb(entry)
|
||||
self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
|
||||
files_cache_logger.debug(
|
||||
"FILES-CACHE-UPDATE: put %r [has %s] <- %r",
|
||||
entry._replace(chunk_ids="[%d entries]" % len(entry.chunk_ids)),
|
||||
entry._replace(chunks="[%d entries]" % len(entry.chunks)),
|
||||
cmtime_type,
|
||||
hashed_path,
|
||||
)
|
||||
|
@ -1135,7 +1136,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
|
|||
files_cache_logger.debug("UNKNOWN: files cache not implemented")
|
||||
return False, None
|
||||
|
||||
def memorize_file(self, hashed_path, path_hash, st, ids):
|
||||
def memorize_file(self, hashed_path, path_hash, st, chunks):
|
||||
pass
|
||||
|
||||
def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ro_type=ROBJ_FILE_STREAM):
|
||||
|
|
Loading…
Reference in New Issue