diff --git a/src/borg/archive.py b/src/borg/archive.py index 5e17431e6..cc828b7dc 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -980,22 +980,19 @@ Utilization of max. archive size: {csize_max:.0%} self.add_item(item) return 'i' # stdin - def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEFAULT_FILES_CACHE_MODE): + def process_file(self, path, st, cache): with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet is_special_file = is_special(st.st_mode) if not hardlinked or hardlink_master: if not is_special_file: path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path))) - known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode) + known, ids = cache.file_known_and_unchanged(path_hash, st) else: # in --read-special mode, we may be called for special files. # there should be no information in the cache about special files processed in # read-special mode, but we better play safe as this was wrong in the past: path_hash = None known, ids = False, None - first_run = not cache.files and cache.do_files - if first_run: - logger.debug('Processing files ...') chunks = None if ids is not None: # Make sure all ids are available @@ -1021,7 +1018,7 @@ Utilization of max. archive size: {csize_max:.0%} if not is_special_file: # we must not memorize special files, because the contents of e.g. a # block or char device will change without its mtime/size/inode changing. - cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode) + cache.memorize_file(path_hash, st, [c.id for c in item.chunks]) self.stats.nfiles += 1 item.update(self.stat_attrs(st, path)) item.get_size(memorize=True) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index d84f52a8c..6ba2f6ab3 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -144,7 +144,9 @@ def with_repository(fake=False, invert_fake=False, create=False, lock=True, if cache: with Cache(repository, kwargs['key'], kwargs['manifest'], do_files=getattr(args, 'cache_files', False), - progress=getattr(args, 'progress', False), lock_wait=self.lock_wait) as cache_: + ignore_inode=getattr(args, 'ignore_inode', False), + progress=getattr(args, 'progress', False), lock_wait=self.lock_wait, + cache_mode=getattr(args, 'files_cache_mode', DEFAULT_FILES_CACHE_MODE)) as cache_: return method(self, args, repository=repository, cache=cache_, **kwargs) else: return method(self, args, repository=repository, **kwargs) @@ -480,6 +482,7 @@ class Archiver: skip_inodes.add((st.st_ino, st.st_dev)) except OSError: pass + logger.debug('Processing files ...') for path in args.paths: if path == '-': # stdin path = 'stdin' @@ -527,15 +530,14 @@ class Archiver: self.output_filter = args.output_filter self.output_list = args.output_list - self.ignore_inode = args.ignore_inode self.exclude_nodump = args.exclude_nodump - self.files_cache_mode = args.files_cache_mode dry_run = args.dry_run t0 = datetime.utcnow() t0_monotonic = time.monotonic() if not dry_run: with Cache(repository, key, manifest, do_files=args.cache_files, progress=args.progress, - lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync) as cache: + lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync, + cache_mode=args.files_cache_mode, ignore_inode=args.ignore_inode) as cache: archive = Archive(repository, key, manifest, args.location.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime, nobirthtime=args.nobirthtime, @@ -593,7 +595,7 @@ class Archiver: return if stat.S_ISREG(st.st_mode): if not dry_run: - status = archive.process_file(path, st, cache, self.ignore_inode, self.files_cache_mode) + status = archive.process_file(path, st, cache) elif stat.S_ISDIR(st.st_mode): if recurse: tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) @@ -1546,7 +1548,7 @@ class Archiver: keep += prune_split(archives, '%Y', args.yearly, keep) to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) stats = Statistics() - with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache: + with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: list_logger = logging.getLogger('borg.output.list') if args.output_list: # set up counters for the progress display diff --git a/src/borg/cache.py b/src/borg/cache.py index 062829f74..5ab3d5f12 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -359,11 +359,18 @@ class Cache: shutil.rmtree(path) def __new__(cls, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True, - progress=False, lock_wait=None, permit_adhoc_cache=False): + progress=False, lock_wait=None, permit_adhoc_cache=False, cache_mode=DEFAULT_FILES_CACHE_MODE, + ignore_inode=False): + + if not do_files and 'd' not in cache_mode: + cache_mode = 'd' + elif ignore_inode and 'i' in cache_mode: + cache_mode = ''.join(set(cache_mode) - set('i')) + def local(): return LocalCache(repository=repository, key=key, manifest=manifest, path=path, sync=sync, - do_files=do_files, warn_if_unencrypted=warn_if_unencrypted, progress=progress, - lock_wait=lock_wait) + warn_if_unencrypted=warn_if_unencrypted, progress=progress, + lock_wait=lock_wait, cache_mode=cache_mode) def adhoc(): return AdHocCache(repository=repository, key=key, manifest=manifest) @@ -421,19 +428,19 @@ class LocalCache(CacheStatsMixin): Persistent, local (client-side) cache. """ - def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True, - progress=False, lock_wait=None): + def __init__(self, repository, key, manifest, path=None, sync=True, warn_if_unencrypted=True, + progress=False, lock_wait=None, cache_mode=DEFAULT_FILES_CACHE_MODE): """ - :param do_files: use file metadata cache :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository :param lock_wait: timeout for lock acquisition (None: return immediately if lock unavailable) :param sync: do :meth:`.sync` + :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison """ self.repository = repository self.key = key self.manifest = manifest self.progress = progress - self.do_files = do_files + self.cache_mode = cache_mode self.timestamp = None self.txn_active = False @@ -485,7 +492,10 @@ class LocalCache(CacheStatsMixin): with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False, integrity_data=self.cache_config.integrity.get('chunks')) as fd: self.chunks = ChunkIndex.read(fd) - self.files = None + if 'd' in self.cache_mode: # d(isabled) + self.files = None + else: + self._read_files() def open(self): if not os.path.isdir(self.path): @@ -917,26 +927,22 @@ class LocalCache(CacheStatsMixin): else: stats.update(-size, -csize, False) - def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE): + def file_known_and_unchanged(self, path_hash, st): """ Check if we know the file that has this path_hash (know == it is in our files cache) and whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode). :param path_hash: hash(file_path), to save some memory in the files cache :param st: the file's stat() result - :param ignore_inode: whether the inode number shall be ignored - :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison :return: known, ids (known is True if we have infos about this file in the cache, ids is the list of chunk ids IF the file has not changed, otherwise None). """ - if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): # d(isabled) + cache_mode = self.cache_mode + if 'd' in cache_mode or not stat.S_ISREG(st.st_mode): # d(isabled) return False, None - if self.files is None: - self._read_files() # note: r(echunk) does not need the files cache in this method, but the files cache will # be updated and saved to disk to memorize the files. To preserve previous generations in - # the cache, this means that it also needs to get loaded from disk first, so keep - # _read_files() above here. + # the cache, this means that it also needs to get loaded from disk first. if 'r' in cache_mode: # r(echunk) return False, None entry = self.files.get(path_hash) @@ -946,7 +952,7 @@ class LocalCache(CacheStatsMixin): entry = FileCacheEntry(*msgpack.unpackb(entry)) if 's' in cache_mode and entry.size != st.st_size: return True, None - if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino: + if 'i' in cache_mode and entry.inode != st.st_ino: return True, None if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns: return True, None @@ -963,9 +969,10 @@ class LocalCache(CacheStatsMixin): self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0)) return True, entry.chunk_ids - def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE): + def memorize_file(self, path_hash, st, ids): + cache_mode = self.cache_mode # note: r(echunk) modes will update the files cache, d(isabled) mode won't - if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): + if 'd' in cache_mode or not stat.S_ISREG(st.st_mode): return if 'c' in cache_mode: cmtime_ns = safe_ns(st.st_ctime_ns) @@ -1012,12 +1019,12 @@ Chunk index: {0.total_unique_chunks:20d} unknown""" pass files = None - do_files = False + cache_mode = 'd' - def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE): + def file_known_and_unchanged(self, path_hash, st): return False, None - def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE): + def memorize_file(self, path_hash, st, ids): pass def add_chunk(self, id, chunk, stats, overwrite=False, wait=True): diff --git a/src/borg/testsuite/cache.py b/src/borg/testsuite/cache.py index b25e72b6f..31ebf55af 100644 --- a/src/borg/testsuite/cache.py +++ b/src/borg/testsuite/cache.py @@ -257,7 +257,7 @@ class TestAdHocCache: def test_files_cache(self, cache): assert cache.file_known_and_unchanged(bytes(32), None) == (False, None) - assert not cache.do_files + assert cache.cache_mode == 'd' assert cache.files is None def test_txn(self, cache):