From d8cfd91b15b54246cae1dffe9fb7066e7b86b93d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 23 Feb 2018 14:48:24 +0100 Subject: [PATCH] fix borg create never showing M status the problem was that the upper layer code did not have enough information about the file, whether it is known or not - and thus, could not decide correctly whether status should be M)odified or A)dded. now, file_known_and_unchanged method returns an additional "known" boolean to fix this. also: add comment about files cache loading in cache_mode='r' --- src/borg/archive.py | 9 +++++---- src/borg/cache.py | 34 +++++++++++++++++++++++++--------- src/borg/testsuite/archiver.py | 2 +- src/borg/testsuite/cache.py | 2 +- 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index bba29a265..5e17431e6 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -986,12 +986,13 @@ Utilization of max. archive size: {csize_max:.0%} if not hardlinked or hardlink_master: if not is_special_file: path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path))) - ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode) + known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode) else: # in --read-special mode, we may be called for special files. # there should be no information in the cache about special files processed in # read-special mode, but we better play safe as this was wrong in the past: - path_hash = ids = None + path_hash = None + known, ids = False, None first_run = not cache.files and cache.do_files if first_run: logger.debug('Processing files ...') @@ -1000,12 +1001,13 @@ Utilization of max. archive size: {csize_max:.0%} # Make sure all ids are available for id_ in ids: if not cache.seen_chunk(id_): + status = 'M' # cache said it is unmodified, but we lost a chunk: process file like modified break else: chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] status = 'U' # regular file, unchanged else: - status = 'A' # regular file, added + status = 'M' if known else 'A' # regular file, modified or added item.hardlink_master = hardlinked item.update(self.stat_simple_attrs(st)) # Only chunkify the file if needed @@ -1020,7 +1022,6 @@ Utilization of max. archive size: {csize_max:.0%} # we must not memorize special files, because the contents of e.g. a # block or char device will change without its mtime/size/inode changing. cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode) - status = status or 'M' # regular file, modified (if not 'A' already) self.stats.nfiles += 1 item.update(self.stat_attrs(st, path)) item.get_size(memorize=True) diff --git a/src/borg/cache.py b/src/borg/cache.py index c55dd617d..062829f74 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -918,24 +918,40 @@ class LocalCache(CacheStatsMixin): stats.update(-size, -csize, False) def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE): + """ + Check if we know the file that has this path_hash (know == it is in our files cache) and + whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode). + + :param path_hash: hash(file_path), to save some memory in the files cache + :param st: the file's stat() result + :param ignore_inode: whether the inode number shall be ignored + :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison + :return: known, ids (known is True if we have infos about this file in the cache, + ids is the list of chunk ids IF the file has not changed, otherwise None). + """ if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): # d(isabled) - return None + return False, None if self.files is None: self._read_files() + # note: r(echunk) does not need the files cache in this method, but the files cache will + # be updated and saved to disk to memorize the files. To preserve previous generations in + # the cache, this means that it also needs to get loaded from disk first, so keep + # _read_files() above here. if 'r' in cache_mode: # r(echunk) - return None + return False, None entry = self.files.get(path_hash) if not entry: - return None + return False, None + # we know the file! entry = FileCacheEntry(*msgpack.unpackb(entry)) if 's' in cache_mode and entry.size != st.st_size: - return None + return True, None if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino: - return None + return True, None if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns: - return None + return True, None elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns: - return None + return True, None # we ignored the inode number in the comparison above or it is still same. # if it is still the same, replacing it in the tuple doesn't change it. # if we ignored it, a reason for doing that is that files were moved to a new @@ -945,7 +961,7 @@ class LocalCache(CacheStatsMixin): # again at that time), we need to update the inode number in the cache with what # we see in the filesystem. self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0)) - return entry.chunk_ids + return True, entry.chunk_ids def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE): # note: r(echunk) modes will update the files cache, d(isabled) mode won't @@ -999,7 +1015,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown""" do_files = False def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE): - return None + return False, None def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE): pass diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 0864c773a..52f38c357 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1677,7 +1677,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns)) # this mode uses ctime for change detection, so it should find file1 as modified output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input') - self.assert_in("A input/file1", output) + self.assert_in("M input/file1", output) def test_file_status_ms_cache_mode(self): """test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode""" diff --git a/src/borg/testsuite/cache.py b/src/borg/testsuite/cache.py index 6cce0cb76..b25e72b6f 100644 --- a/src/borg/testsuite/cache.py +++ b/src/borg/testsuite/cache.py @@ -256,7 +256,7 @@ class TestAdHocCache: repository.get(H(5)) def test_files_cache(self, cache): - assert cache.file_known_and_unchanged(bytes(32), None) is None + assert cache.file_known_and_unchanged(bytes(32), None) == (False, None) assert not cache.do_files assert cache.files is None