1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-27 02:08:54 +00:00

fix borg create never showing M status

the problem was that the upper layer code did not have enough information
about the file, whether it is known or not - and thus, could not decide
correctly whether status should be M)odified or A)dded.

now, file_known_and_unchanged method returns an additional "known"
boolean to fix this.

also: add comment about files cache loading in cache_mode='r'
This commit is contained in:
Thomas Waldmann 2018-02-23 14:48:24 +01:00
parent 1922ae3242
commit 4e0f369d0a
4 changed files with 32 additions and 15 deletions

View file

@ -1132,12 +1132,13 @@ def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEF
if not hardlinked or hardlink_master:
if not is_special_file:
path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
else:
# in --read-special mode, we may be called for special files.
# there should be no information in the cache about special files processed in
# read-special mode, but we better play safe as this was wrong in the past:
path_hash = ids = None
path_hash = None
known, ids = False, None
first_run = not cache.files and cache.do_files
if first_run:
logger.debug('Processing files ...')
@ -1146,12 +1147,13 @@ def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEF
# Make sure all ids are available
for id_ in ids:
if not cache.seen_chunk(id_):
status = 'M' # cache said it is unmodified, but we lost a chunk: process file like modified
break
else:
chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
status = 'U' # regular file, unchanged
else:
status = 'A' # regular file, added
status = 'M' if known else 'A' # regular file, modified or added
item.hardlink_master = hardlinked
item.update(self.metadata_collector.stat_simple_attrs(st))
# Only chunkify the file if needed
@ -1166,7 +1168,6 @@ def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEF
# we must not memorize special files, because the contents of e.g. a
# block or char device will change without its mtime/size/inode changing.
cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
status = status or 'M' # regular file, modified (if not 'A' already)
self.stats.nfiles += 1
item.update(self.metadata_collector.stat_attrs(st, path))
item.get_size(memorize=True)

View file

@ -918,24 +918,40 @@ def chunk_decref(self, id, stats, wait=True):
stats.update(-size, -csize, False)
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
"""
Check if we know the file that has this path_hash (know == it is in our files cache) and
whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
:param path_hash: hash(file_path), to save some memory in the files cache
:param st: the file's stat() result
:param ignore_inode: whether the inode number shall be ignored
:param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
:return: known, ids (known is True if we have infos about this file in the cache,
ids is the list of chunk ids IF the file has not changed, otherwise None).
"""
if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): # d(isabled)
return None
return False, None
if self.files is None:
self._read_files()
# note: r(echunk) does not need the files cache in this method, but the files cache will
# be updated and saved to disk to memorize the files. To preserve previous generations in
# the cache, this means that it also needs to get loaded from disk first, so keep
# _read_files() above here.
if 'r' in cache_mode: # r(echunk)
return None
return False, None
entry = self.files.get(path_hash)
if not entry:
return None
return False, None
# we know the file!
entry = FileCacheEntry(*msgpack.unpackb(entry))
if 's' in cache_mode and entry.size != st.st_size:
return None
return True, None
if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
return None
return True, None
if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
return None
return True, None
elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
return None
return True, None
# we ignored the inode number in the comparison above or it is still same.
# if it is still the same, replacing it in the tuple doesn't change it.
# if we ignored it, a reason for doing that is that files were moved to a new
@ -945,7 +961,7 @@ def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode
# again at that time), we need to update the inode number in the cache with what
# we see in the filesystem.
self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
return entry.chunk_ids
return True, entry.chunk_ids
def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
# note: r(echunk) modes will update the files cache, d(isabled) mode won't
@ -999,7 +1015,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
do_files = False
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
return None
return False, None
def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
pass

View file

@ -1678,7 +1678,7 @@ def test_file_status_cs_cache_mode(self):
os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns))
# this mode uses ctime for change detection, so it should find file1 as modified
output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input')
self.assert_in("A input/file1", output)
self.assert_in("M input/file1", output)
def test_file_status_ms_cache_mode(self):
"""test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode"""

View file

@ -256,7 +256,7 @@ def test_deletes_chunks_during_lifetime(self, cache, repository):
repository.get(H(5))
def test_files_cache(self, cache):
assert cache.file_known_and_unchanged(bytes(32), None) is None
assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
assert not cache.do_files
assert cache.files is None