mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-27 02:08:54 +00:00
fix borg create never showing M status
the problem was that the upper layer code did not have enough information about the file, whether it is known or not - and thus, could not decide correctly whether status should be M)odified or A)dded. now, file_known_and_unchanged method returns an additional "known" boolean to fix this. also: add comment about files cache loading in cache_mode='r'
This commit is contained in:
parent
1922ae3242
commit
4e0f369d0a
4 changed files with 32 additions and 15 deletions
|
@ -1132,12 +1132,13 @@ def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEF
|
|||
if not hardlinked or hardlink_master:
|
||||
if not is_special_file:
|
||||
path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
|
||||
ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
|
||||
known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
|
||||
else:
|
||||
# in --read-special mode, we may be called for special files.
|
||||
# there should be no information in the cache about special files processed in
|
||||
# read-special mode, but we better play safe as this was wrong in the past:
|
||||
path_hash = ids = None
|
||||
path_hash = None
|
||||
known, ids = False, None
|
||||
first_run = not cache.files and cache.do_files
|
||||
if first_run:
|
||||
logger.debug('Processing files ...')
|
||||
|
@ -1146,12 +1147,13 @@ def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEF
|
|||
# Make sure all ids are available
|
||||
for id_ in ids:
|
||||
if not cache.seen_chunk(id_):
|
||||
status = 'M' # cache said it is unmodified, but we lost a chunk: process file like modified
|
||||
break
|
||||
else:
|
||||
chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
|
||||
status = 'U' # regular file, unchanged
|
||||
else:
|
||||
status = 'A' # regular file, added
|
||||
status = 'M' if known else 'A' # regular file, modified or added
|
||||
item.hardlink_master = hardlinked
|
||||
item.update(self.metadata_collector.stat_simple_attrs(st))
|
||||
# Only chunkify the file if needed
|
||||
|
@ -1166,7 +1168,6 @@ def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEF
|
|||
# we must not memorize special files, because the contents of e.g. a
|
||||
# block or char device will change without its mtime/size/inode changing.
|
||||
cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
|
||||
status = status or 'M' # regular file, modified (if not 'A' already)
|
||||
self.stats.nfiles += 1
|
||||
item.update(self.metadata_collector.stat_attrs(st, path))
|
||||
item.get_size(memorize=True)
|
||||
|
|
|
@ -918,24 +918,40 @@ def chunk_decref(self, id, stats, wait=True):
|
|||
stats.update(-size, -csize, False)
|
||||
|
||||
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
|
||||
"""
|
||||
Check if we know the file that has this path_hash (know == it is in our files cache) and
|
||||
whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
|
||||
|
||||
:param path_hash: hash(file_path), to save some memory in the files cache
|
||||
:param st: the file's stat() result
|
||||
:param ignore_inode: whether the inode number shall be ignored
|
||||
:param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
|
||||
:return: known, ids (known is True if we have infos about this file in the cache,
|
||||
ids is the list of chunk ids IF the file has not changed, otherwise None).
|
||||
"""
|
||||
if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): # d(isabled)
|
||||
return None
|
||||
return False, None
|
||||
if self.files is None:
|
||||
self._read_files()
|
||||
# note: r(echunk) does not need the files cache in this method, but the files cache will
|
||||
# be updated and saved to disk to memorize the files. To preserve previous generations in
|
||||
# the cache, this means that it also needs to get loaded from disk first, so keep
|
||||
# _read_files() above here.
|
||||
if 'r' in cache_mode: # r(echunk)
|
||||
return None
|
||||
return False, None
|
||||
entry = self.files.get(path_hash)
|
||||
if not entry:
|
||||
return None
|
||||
return False, None
|
||||
# we know the file!
|
||||
entry = FileCacheEntry(*msgpack.unpackb(entry))
|
||||
if 's' in cache_mode and entry.size != st.st_size:
|
||||
return None
|
||||
return True, None
|
||||
if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
|
||||
return None
|
||||
return True, None
|
||||
if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
|
||||
return None
|
||||
return True, None
|
||||
elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
|
||||
return None
|
||||
return True, None
|
||||
# we ignored the inode number in the comparison above or it is still same.
|
||||
# if it is still the same, replacing it in the tuple doesn't change it.
|
||||
# if we ignored it, a reason for doing that is that files were moved to a new
|
||||
|
@ -945,7 +961,7 @@ def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode
|
|||
# again at that time), we need to update the inode number in the cache with what
|
||||
# we see in the filesystem.
|
||||
self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
|
||||
return entry.chunk_ids
|
||||
return True, entry.chunk_ids
|
||||
|
||||
def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
|
||||
# note: r(echunk) modes will update the files cache, d(isabled) mode won't
|
||||
|
@ -999,7 +1015,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
|
|||
do_files = False
|
||||
|
||||
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
|
||||
return None
|
||||
return False, None
|
||||
|
||||
def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
|
||||
pass
|
||||
|
|
|
@ -1678,7 +1678,7 @@ def test_file_status_cs_cache_mode(self):
|
|||
os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns))
|
||||
# this mode uses ctime for change detection, so it should find file1 as modified
|
||||
output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input')
|
||||
self.assert_in("A input/file1", output)
|
||||
self.assert_in("M input/file1", output)
|
||||
|
||||
def test_file_status_ms_cache_mode(self):
|
||||
"""test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode"""
|
||||
|
|
|
@ -256,7 +256,7 @@ def test_deletes_chunks_during_lifetime(self, cache, repository):
|
|||
repository.get(H(5))
|
||||
|
||||
def test_files_cache(self, cache):
|
||||
assert cache.file_known_and_unchanged(bytes(32), None) is None
|
||||
assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
|
||||
assert not cache.do_files
|
||||
assert cache.files is None
|
||||
|
||||
|
|
Loading…
Reference in a new issue