mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-25 01:06:50 +00:00
verbose files cache logging via --debug-topic=files_cache, fixes #5659
This commit is contained in:
parent
29686798af
commit
2211b840a3
3 changed files with 42 additions and 12 deletions
|
@ -1320,13 +1320,14 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal):
|
|||
item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
|
||||
if not hardlinked or hardlink_master:
|
||||
if not is_special_file:
|
||||
path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
|
||||
known, ids = cache.file_known_and_unchanged(path_hash, st)
|
||||
hashed_path = safe_encode(os.path.join(self.cwd, path))
|
||||
path_hash = self.key.id_hash(hashed_path)
|
||||
known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
|
||||
else:
|
||||
# in --read-special mode, we may be called for special files.
|
||||
# there should be no information in the cache about special files processed in
|
||||
# read-special mode, but we better play safe as this was wrong in the past:
|
||||
path_hash = None
|
||||
hashed_path = path_hash = None
|
||||
known, ids = False, None
|
||||
chunks = None
|
||||
if ids is not None:
|
||||
|
@ -1363,7 +1364,7 @@ def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal):
|
|||
# block or char device will change without its mtime/size/inode changing.
|
||||
# also, we must not memorize a potentially inconsistent/corrupt file that
|
||||
# changed while we backed it up.
|
||||
cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
|
||||
cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
|
||||
self.stats.nfiles += 1
|
||||
item.update(self.metadata_collector.stat_ext_attrs(st, path, fd=fd))
|
||||
item.get_size(memorize=True)
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
logger = create_logger()
|
||||
|
||||
files_cache_logger = create_logger('borg.debug.files_cache')
|
||||
|
||||
from .constants import CACHE_README, DEFAULT_FILES_CACHE_MODE
|
||||
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
|
||||
from .helpers import Location
|
||||
|
@ -530,6 +532,7 @@ def _read_files(self):
|
|||
self.files = {}
|
||||
self._newest_cmtime = None
|
||||
logger.debug('Reading files cache ...')
|
||||
files_cache_logger.debug("FILES-CACHE-LOAD: starting...")
|
||||
msg = None
|
||||
try:
|
||||
with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=False,
|
||||
|
@ -556,6 +559,7 @@ def _read_files(self):
|
|||
logger.warning(msg)
|
||||
logger.warning('Continuing without files cache - expect lower performance.')
|
||||
self.files = {}
|
||||
files_cache_logger.debug("FILES-CACHE-LOAD: finished, %d entries loaded.", len(self.files))
|
||||
|
||||
def begin_txn(self):
|
||||
# Initialize transaction snapshot
|
||||
|
@ -590,7 +594,9 @@ def commit(self):
|
|||
self._newest_cmtime = 2 ** 63 - 1 # nanoseconds, good until y2262
|
||||
ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
|
||||
pi.output('Saving files cache')
|
||||
files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
|
||||
with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=True) as fd:
|
||||
entry_count = 0
|
||||
for path_hash, item in self.files.items():
|
||||
# Only keep files seen in this backup that are older than newest cmtime seen in this backup -
|
||||
# this is to avoid issues with filesystem snapshots and cmtime granularity.
|
||||
|
@ -599,6 +605,10 @@ def commit(self):
|
|||
if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \
|
||||
entry.age > 0 and entry.age < ttl:
|
||||
msgpack.pack((path_hash, entry), fd)
|
||||
entry_count += 1
|
||||
files_cache_logger.debug("FILES-CACHE-KILL: removed all old entries with age >= TTL [%d]", ttl)
|
||||
files_cache_logger.debug("FILES-CACHE-KILL: removed all current entries with newest cmtime %d", self._newest_cmtime)
|
||||
files_cache_logger.debug("FILES-CACHE-SAVE: finished, %d remaining entries saved.", entry_count)
|
||||
self.cache_config.integrity[files_cache_name()] = fd.integrity_data
|
||||
pi.output('Saving chunks cache')
|
||||
with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
|
||||
|
@ -949,36 +959,46 @@ def chunk_decref(self, id, stats, wait=True, part=False):
|
|||
else:
|
||||
stats.update(-size, -csize, False, part=part)
|
||||
|
||||
def file_known_and_unchanged(self, path_hash, st):
|
||||
def file_known_and_unchanged(self, hashed_path, path_hash, st):
|
||||
"""
|
||||
Check if we know the file that has this path_hash (know == it is in our files cache) and
|
||||
whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
|
||||
|
||||
:param path_hash: hash(file_path), to save some memory in the files cache
|
||||
:param hashed_path: the file's path as we gave it to hash(hashed_path)
|
||||
:param path_hash: hash(hashed_path), to save some memory in the files cache
|
||||
:param st: the file's stat() result
|
||||
:return: known, ids (known is True if we have infos about this file in the cache,
|
||||
ids is the list of chunk ids IF the file has not changed, otherwise None).
|
||||
"""
|
||||
if not stat.S_ISREG(st.st_mode):
|
||||
return False, None
|
||||
cache_mode = self.cache_mode
|
||||
if 'd' in cache_mode or not stat.S_ISREG(st.st_mode): # d(isabled)
|
||||
if 'd' in cache_mode: # d(isabled)
|
||||
files_cache_logger.debug('UNKNOWN: files cache disabled')
|
||||
return False, None
|
||||
# note: r(echunk) does not need the files cache in this method, but the files cache will
|
||||
# be updated and saved to disk to memorize the files. To preserve previous generations in
|
||||
# the cache, this means that it also needs to get loaded from disk first.
|
||||
if 'r' in cache_mode: # r(echunk)
|
||||
files_cache_logger.debug('UNKNOWN: rechunking enforced')
|
||||
return False, None
|
||||
entry = self.files.get(path_hash)
|
||||
if not entry:
|
||||
files_cache_logger.debug('UNKNOWN: no file metadata in cache for: %r', hashed_path)
|
||||
return False, None
|
||||
# we know the file!
|
||||
entry = FileCacheEntry(*msgpack.unpackb(entry))
|
||||
if 's' in cache_mode and entry.size != st.st_size:
|
||||
files_cache_logger.debug('KNOWN-CHANGED: file size has changed: %r', hashed_path)
|
||||
return True, None
|
||||
if 'i' in cache_mode and entry.inode != st.st_ino:
|
||||
files_cache_logger.debug('KNOWN-CHANGED: file inode number has changed: %r', hashed_path)
|
||||
return True, None
|
||||
if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
|
||||
files_cache_logger.debug('KNOWN-CHANGED: file ctime has changed: %r', hashed_path)
|
||||
return True, None
|
||||
elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
|
||||
files_cache_logger.debug('KNOWN-CHANGED: file mtime has changed: %r', hashed_path)
|
||||
return True, None
|
||||
# we ignored the inode number in the comparison above or it is still same.
|
||||
# if it is still the same, replacing it in the tuple doesn't change it.
|
||||
|
@ -991,18 +1011,26 @@ def file_known_and_unchanged(self, path_hash, st):
|
|||
self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
|
||||
return True, entry.chunk_ids
|
||||
|
||||
def memorize_file(self, path_hash, st, ids):
|
||||
def memorize_file(self, hashed_path, path_hash, st, ids):
|
||||
if not stat.S_ISREG(st.st_mode):
|
||||
return
|
||||
cache_mode = self.cache_mode
|
||||
# note: r(echunk) modes will update the files cache, d(isabled) mode won't
|
||||
if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):
|
||||
if 'd' in cache_mode:
|
||||
files_cache_logger.debug('FILES-CACHE-NOUPDATE: files cache disabled')
|
||||
return
|
||||
if 'c' in cache_mode:
|
||||
cmtime_type = 'ctime'
|
||||
cmtime_ns = safe_ns(st.st_ctime_ns)
|
||||
elif 'm' in cache_mode:
|
||||
cmtime_type = 'mtime'
|
||||
cmtime_ns = safe_ns(st.st_mtime_ns)
|
||||
entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids)
|
||||
self.files[path_hash] = msgpack.packb(entry)
|
||||
self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
|
||||
files_cache_logger.debug('FILES-CACHE-UPDATE: put %r [has %s] <- %r',
|
||||
entry._replace(chunk_ids='[%d entries]' % len(entry.chunk_ids)),
|
||||
cmtime_type, hashed_path)
|
||||
|
||||
|
||||
class AdHocCache(CacheStatsMixin):
|
||||
|
@ -1044,10 +1072,11 @@ def __exit__(self, exc_type, exc_val, exc_tb):
|
|||
files = None
|
||||
cache_mode = 'd'
|
||||
|
||||
def file_known_and_unchanged(self, path_hash, st):
|
||||
def file_known_and_unchanged(self, hashed_path, path_hash, st):
|
||||
files_cache_logger.debug("UNKNOWN: files cache not implemented")
|
||||
return False, None
|
||||
|
||||
def memorize_file(self, path_hash, st, ids):
|
||||
def memorize_file(self, hashed_path, path_hash, st, ids):
|
||||
pass
|
||||
|
||||
def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):
|
||||
|
|
|
@ -256,7 +256,7 @@ def test_deletes_chunks_during_lifetime(self, cache, repository):
|
|||
repository.get(H(5))
|
||||
|
||||
def test_files_cache(self, cache):
|
||||
assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
|
||||
assert cache.file_known_and_unchanged(b'foo', bytes(32), None) == (False, None)
|
||||
assert cache.cache_mode == 'd'
|
||||
assert cache.files is None
|
||||
|
||||
|
|
Loading…
Reference in a new issue