fix borg create never showing M status

the problem was that the upper layer code did not have enough information about the file, whether it is known or not - and thus, could not decide correctly whether status should be M)odified or A)dded. now, file_known_and_unchanged method returns an additional "known" boolean to fix this. also: add comment about files cache loading in cache_mode='r'
2018-02-23 14:48:24 +01:00 · 2018-02-23 14:48:24 +01:00 · d8cfd91b15
parent fd79b90ec0
commit d8cfd91b15
4 changed files with 32 additions and 15 deletions
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -986,12 +986,13 @@ Utilization of max. archive size: {csize_max:.0%}
            if not hardlinked or hardlink_master:
                if not is_special_file:
                    path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
-                    ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
+                    known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
                else:
                    # in --read-special mode, we may be called for special files.
                    # there should be no information in the cache about special files processed in
                    # read-special mode, but we better play safe as this was wrong in the past:
-                    path_hash = ids = None
+                    path_hash = None
                    known, ids = False, None
                first_run = not cache.files and cache.do_files
                if first_run:
                    logger.debug('Processing files ...')
@ -1000,12 +1001,13 @@ Utilization of max. archive size: {csize_max:.0%}
                    # Make sure all ids are available
                    for id_ in ids:
                        if not cache.seen_chunk(id_):
                            status = 'M'  # cache said it is unmodified, but we lost a chunk: process file like modified
                            break
                    else:
                        chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
                        status = 'U'  # regular file, unchanged
                else:
-                    status = 'A'  # regular file, added
+                    status = 'M' if known else 'A'  # regular file, modified or added
                item.hardlink_master = hardlinked
                item.update(self.stat_simple_attrs(st))
                # Only chunkify the file if needed
@ -1020,7 +1022,6 @@ Utilization of max. archive size: {csize_max:.0%}
                        # we must not memorize special files, because the contents of e.g. a
                        # block or char device will change without its mtime/size/inode changing.
                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
                    status = status or 'M'  # regular file, modified (if not 'A' already)
                self.stats.nfiles += 1
            item.update(self.stat_attrs(st, path))
            item.get_size(memorize=True)
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@ -918,24 +918,40 @@ class LocalCache(CacheStatsMixin):
            stats.update(-size, -csize, False)
    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
        """
        Check if we know the file that has this path_hash (know == it is in our files cache) and
        whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
        :param path_hash: hash(file_path), to save some memory in the files cache
        :param st: the file's stat() result
        :param ignore_inode: whether the inode number shall be ignored
        :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
        :return: known, ids (known is True if we have infos about this file in the cache,
                             ids is the list of chunk ids IF the file has not changed, otherwise None).
        """
        if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):  # d(isabled)
-            return None
+            return False, None
        if self.files is None:
            self._read_files()
        # note: r(echunk) does not need the files cache in this method, but the files cache will
        # be updated and saved to disk to memorize the files. To preserve previous generations in
        # the cache, this means that it also needs to get loaded from disk first, so keep
        # _read_files() above here.
        if 'r' in cache_mode:  # r(echunk)
-            return None
+            return False, None
        entry = self.files.get(path_hash)
        if not entry:
-            return None
+            return False, None
        # we know the file!
        entry = FileCacheEntry(*msgpack.unpackb(entry))
        if 's' in cache_mode and entry.size != st.st_size:
-            return None
+            return True, None
        if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
-            return None
+            return True, None
        if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
-            return None
+            return True, None
        elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
-            return None
+            return True, None
        # we ignored the inode number in the comparison above or it is still same.
        # if it is still the same, replacing it in the tuple doesn't change it.
        # if we ignored it, a reason for doing that is that files were moved to a new
@ -945,7 +961,7 @@ class LocalCache(CacheStatsMixin):
        # again at that time), we need to update the inode number in the cache with what
        # we see in the filesystem.
        self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
-        return entry.chunk_ids
+        return True, entry.chunk_ids
    def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
        # note: r(echunk) modes will update the files cache, d(isabled) mode won't
@ -999,7 +1015,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
    do_files = False
    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
-        return None
+        return False, None
    def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
        pass
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@ -1677,7 +1677,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns))
        # this mode uses ctime for change detection, so it should find file1 as modified
        output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input')
-        self.assert_in("A input/file1", output)
+        self.assert_in("M input/file1", output)
    def test_file_status_ms_cache_mode(self):
        """test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode"""
--- a/src/borg/testsuite/cache.py
+++ b/src/borg/testsuite/cache.py
@ -256,7 +256,7 @@ class TestAdHocCache:
            repository.get(H(5))
    def test_files_cache(self, cache):
-        assert cache.file_known_and_unchanged(bytes(32), None) is None
+        assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
        assert not cache.do_files
        assert cache.files is None