fix borg create never showing M status

the problem was that the upper layer code did not have enough information
about the file, whether it is known or not - and thus, could not decide
correctly whether status should be M)odified or A)dded.

now, file_known_and_unchanged method returns an additional "known"
boolean to fix this.

also: add comment about files cache loading in cache_mode='r'
This commit is contained in:
Thomas Waldmann 2018-02-23 14:48:24 +01:00 committed by Mike Walters
parent fd79b90ec0
commit d8cfd91b15
4 changed files with 32 additions and 15 deletions

View File

@ -986,12 +986,13 @@ Utilization of max. archive size: {csize_max:.0%}
if not hardlinked or hardlink_master: if not hardlinked or hardlink_master:
if not is_special_file: if not is_special_file:
path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path))) path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode) known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
else: else:
# in --read-special mode, we may be called for special files. # in --read-special mode, we may be called for special files.
# there should be no information in the cache about special files processed in # there should be no information in the cache about special files processed in
# read-special mode, but we better play safe as this was wrong in the past: # read-special mode, but we better play safe as this was wrong in the past:
path_hash = ids = None path_hash = None
known, ids = False, None
first_run = not cache.files and cache.do_files first_run = not cache.files and cache.do_files
if first_run: if first_run:
logger.debug('Processing files ...') logger.debug('Processing files ...')
@ -1000,12 +1001,13 @@ Utilization of max. archive size: {csize_max:.0%}
# Make sure all ids are available # Make sure all ids are available
for id_ in ids: for id_ in ids:
if not cache.seen_chunk(id_): if not cache.seen_chunk(id_):
status = 'M' # cache said it is unmodified, but we lost a chunk: process file like modified
break break
else: else:
chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
status = 'U' # regular file, unchanged status = 'U' # regular file, unchanged
else: else:
status = 'A' # regular file, added status = 'M' if known else 'A' # regular file, modified or added
item.hardlink_master = hardlinked item.hardlink_master = hardlinked
item.update(self.stat_simple_attrs(st)) item.update(self.stat_simple_attrs(st))
# Only chunkify the file if needed # Only chunkify the file if needed
@ -1020,7 +1022,6 @@ Utilization of max. archive size: {csize_max:.0%}
# we must not memorize special files, because the contents of e.g. a # we must not memorize special files, because the contents of e.g. a
# block or char device will change without its mtime/size/inode changing. # block or char device will change without its mtime/size/inode changing.
cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode) cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
status = status or 'M' # regular file, modified (if not 'A' already)
self.stats.nfiles += 1 self.stats.nfiles += 1
item.update(self.stat_attrs(st, path)) item.update(self.stat_attrs(st, path))
item.get_size(memorize=True) item.get_size(memorize=True)

View File

@ -918,24 +918,40 @@ class LocalCache(CacheStatsMixin):
stats.update(-size, -csize, False) stats.update(-size, -csize, False)
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE): def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
"""
Check if we know the file that has this path_hash (know == it is in our files cache) and
whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
:param path_hash: hash(file_path), to save some memory in the files cache
:param st: the file's stat() result
:param ignore_inode: whether the inode number shall be ignored
:param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
:return: known, ids (known is True if we have infos about this file in the cache,
ids is the list of chunk ids IF the file has not changed, otherwise None).
"""
if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): # d(isabled) if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): # d(isabled)
return None return False, None
if self.files is None: if self.files is None:
self._read_files() self._read_files()
# note: r(echunk) does not need the files cache in this method, but the files cache will
# be updated and saved to disk to memorize the files. To preserve previous generations in
# the cache, this means that it also needs to get loaded from disk first, so keep
# _read_files() above here.
if 'r' in cache_mode: # r(echunk) if 'r' in cache_mode: # r(echunk)
return None return False, None
entry = self.files.get(path_hash) entry = self.files.get(path_hash)
if not entry: if not entry:
return None return False, None
# we know the file!
entry = FileCacheEntry(*msgpack.unpackb(entry)) entry = FileCacheEntry(*msgpack.unpackb(entry))
if 's' in cache_mode and entry.size != st.st_size: if 's' in cache_mode and entry.size != st.st_size:
return None return True, None
if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino: if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
return None return True, None
if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns: if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
return None return True, None
elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns: elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
return None return True, None
# we ignored the inode number in the comparison above or it is still same. # we ignored the inode number in the comparison above or it is still same.
# if it is still the same, replacing it in the tuple doesn't change it. # if it is still the same, replacing it in the tuple doesn't change it.
# if we ignored it, a reason for doing that is that files were moved to a new # if we ignored it, a reason for doing that is that files were moved to a new
@ -945,7 +961,7 @@ class LocalCache(CacheStatsMixin):
# again at that time), we need to update the inode number in the cache with what # again at that time), we need to update the inode number in the cache with what
# we see in the filesystem. # we see in the filesystem.
self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0)) self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
return entry.chunk_ids return True, entry.chunk_ids
def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE): def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
# note: r(echunk) modes will update the files cache, d(isabled) mode won't # note: r(echunk) modes will update the files cache, d(isabled) mode won't
@ -999,7 +1015,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
do_files = False do_files = False
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE): def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
return None return False, None
def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE): def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
pass pass

View File

@ -1677,7 +1677,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns)) os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns))
# this mode uses ctime for change detection, so it should find file1 as modified # this mode uses ctime for change detection, so it should find file1 as modified
output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input') output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input')
self.assert_in("A input/file1", output) self.assert_in("M input/file1", output)
def test_file_status_ms_cache_mode(self): def test_file_status_ms_cache_mode(self):
"""test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode""" """test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode"""

View File

@ -256,7 +256,7 @@ class TestAdHocCache:
repository.get(H(5)) repository.get(H(5))
def test_files_cache(self, cache): def test_files_cache(self, cache):
assert cache.file_known_and_unchanged(bytes(32), None) is None assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
assert not cache.do_files assert not cache.do_files
assert cache.files is None assert cache.files is None