1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-22 06:01:54 +00:00

Merge pull request #3726 from milkey-mouse/files-cache-non-lazy-fp1.2

non-lazy files cache (1.2 forward-port)
This commit is contained in:
TW 2018-03-25 14:50:48 +02:00 committed by GitHub
commit e6abb0804b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 35 deletions

View file

@ -1131,22 +1131,19 @@ def process_stdin(self, path, cache):
self.add_item(item, stats=self.stats)
return 'i' # stdin
def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEFAULT_FILES_CACHE_MODE):
def process_file(self, path, st, cache):
with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet
is_special_file = is_special(st.st_mode)
if not hardlinked or hardlink_master:
if not is_special_file:
path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
known, ids = cache.file_known_and_unchanged(path_hash, st)
else:
# in --read-special mode, we may be called for special files.
# there should be no information in the cache about special files processed in
# read-special mode, but we better play safe as this was wrong in the past:
path_hash = None
known, ids = False, None
first_run = not cache.files and cache.do_files
if first_run:
logger.debug('Processing files ...')
chunks = None
if ids is not None:
# Make sure all ids are available
@ -1172,7 +1169,7 @@ def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEF
if not is_special_file:
# we must not memorize special files, because the contents of e.g. a
# block or char device will change without its mtime/size/inode changing.
cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
self.stats.nfiles += 1
item.update(self.metadata_collector.stat_attrs(st, path))
item.get_size(memorize=True)

View file

@ -144,7 +144,9 @@ def wrapper(self, args, **kwargs):
if cache:
with Cache(repository, kwargs['key'], kwargs['manifest'],
do_files=getattr(args, 'cache_files', False),
progress=getattr(args, 'progress', False), lock_wait=self.lock_wait) as cache_:
ignore_inode=getattr(args, 'ignore_inode', False),
progress=getattr(args, 'progress', False), lock_wait=self.lock_wait,
cache_mode=getattr(args, 'files_cache_mode', DEFAULT_FILES_CACHE_MODE)) as cache_:
return method(self, args, repository=repository, cache=cache_, **kwargs)
else:
return method(self, args, repository=repository, **kwargs)
@ -453,6 +455,7 @@ def create_inner(archive, cache, fso):
skip_inodes.add((st.st_ino, st.st_dev))
except OSError:
pass
logger.debug('Processing files ...')
for path in args.paths:
if path == '-': # stdin
path = args.stdin_name
@ -501,16 +504,15 @@ def create_inner(archive, cache, fso):
self.output_filter = args.output_filter
self.output_list = args.output_list
self.ignore_inode = args.ignore_inode
self.nobsdflags = args.nobsdflags
self.exclude_nodump = args.exclude_nodump
self.files_cache_mode = args.files_cache_mode
dry_run = args.dry_run
t0 = datetime.utcnow()
t0_monotonic = time.monotonic()
if not dry_run:
with Cache(repository, key, manifest, do_files=args.cache_files, progress=args.progress,
lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync) as cache:
lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync,
cache_mode=args.files_cache_mode, ignore_inode=args.ignore_inode) as cache:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
create=True, checkpoint_interval=args.checkpoint_interval,
numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
@ -576,7 +578,7 @@ def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present,
return
if stat.S_ISREG(st.st_mode):
if not dry_run:
status = fso.process_file(path, st, cache, self.ignore_inode, self.files_cache_mode)
status = fso.process_file(path, st, cache)
elif stat.S_ISDIR(st.st_mode):
if recurse:
tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
@ -1346,7 +1348,7 @@ def do_prune(self, args, repository, manifest, key):
to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
stats = Statistics()
with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache:
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
list_logger = logging.getLogger('borg.output.list')
# set up counters for the progress display
to_delete_len = len(to_delete)

View file

@ -359,11 +359,18 @@ def destroy(repository, path=None):
shutil.rmtree(path)
def __new__(cls, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True,
progress=False, lock_wait=None, permit_adhoc_cache=False):
progress=False, lock_wait=None, permit_adhoc_cache=False, cache_mode=DEFAULT_FILES_CACHE_MODE,
ignore_inode=False):
if not do_files and 'd' not in cache_mode:
cache_mode = 'd'
elif ignore_inode and 'i' in cache_mode:
cache_mode = ''.join(set(cache_mode) - set('i'))
def local():
return LocalCache(repository=repository, key=key, manifest=manifest, path=path, sync=sync,
do_files=do_files, warn_if_unencrypted=warn_if_unencrypted, progress=progress,
lock_wait=lock_wait)
warn_if_unencrypted=warn_if_unencrypted, progress=progress,
lock_wait=lock_wait, cache_mode=cache_mode)
def adhoc():
return AdHocCache(repository=repository, key=key, manifest=manifest)
@ -421,19 +428,19 @@ class LocalCache(CacheStatsMixin):
Persistent, local (client-side) cache.
"""
def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True,
progress=False, lock_wait=None):
def __init__(self, repository, key, manifest, path=None, sync=True, warn_if_unencrypted=True,
progress=False, lock_wait=None, cache_mode=DEFAULT_FILES_CACHE_MODE):
"""
:param do_files: use file metadata cache
:param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
:param lock_wait: timeout for lock acquisition (None: return immediately if lock unavailable)
:param sync: do :meth:`.sync`
:param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
"""
self.repository = repository
self.key = key
self.manifest = manifest
self.progress = progress
self.do_files = do_files
self.cache_mode = cache_mode
self.timestamp = None
self.txn_active = False
@ -485,7 +492,10 @@ def _do_open(self):
with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
integrity_data=self.cache_config.integrity.get('chunks')) as fd:
self.chunks = ChunkIndex.read(fd)
self.files = None
if 'd' in self.cache_mode: # d(isabled)
self.files = None
else:
self._read_files()
def open(self):
if not os.path.isdir(self.path):
@ -917,26 +927,22 @@ def chunk_decref(self, id, stats, wait=True):
else:
stats.update(-size, -csize, False)
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
def file_known_and_unchanged(self, path_hash, st):
"""
Check if we know the file that has this path_hash (know == it is in our files cache) and
whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
:param path_hash: hash(file_path), to save some memory in the files cache
:param st: the file's stat() result
:param ignore_inode: whether the inode number shall be ignored
:param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
:return: known, ids (known is True if we have infos about this file in the cache,
ids is the list of chunk ids IF the file has not changed, otherwise None).
"""
if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): # d(isabled)
cache_mode = self.cache_mode
if 'd' in cache_mode or not stat.S_ISREG(st.st_mode): # d(isabled)
return False, None
if self.files is None:
self._read_files()
# note: r(echunk) does not need the files cache in this method, but the files cache will
# be updated and saved to disk to memorize the files. To preserve previous generations in
# the cache, this means that it also needs to get loaded from disk first, so keep
# _read_files() above here.
# the cache, this means that it also needs to get loaded from disk first.
if 'r' in cache_mode: # r(echunk)
return False, None
entry = self.files.get(path_hash)
@ -946,7 +952,7 @@ def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode
entry = FileCacheEntry(*msgpack.unpackb(entry))
if 's' in cache_mode and entry.size != st.st_size:
return True, None
if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
if 'i' in cache_mode and entry.inode != st.st_ino:
return True, None
if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
return True, None
@ -963,9 +969,10 @@ def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode
self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
return True, entry.chunk_ids
def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
def memorize_file(self, path_hash, st, ids):
cache_mode = self.cache_mode
# note: r(echunk) modes will update the files cache, d(isabled) mode won't
if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):
if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):
return
if 'c' in cache_mode:
cmtime_ns = safe_ns(st.st_ctime_ns)
@ -1012,12 +1019,12 @@ def __exit__(self, exc_type, exc_val, exc_tb):
pass
files = None
do_files = False
cache_mode = 'd'
def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
def file_known_and_unchanged(self, path_hash, st):
return False, None
def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
def memorize_file(self, path_hash, st, ids):
pass
def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):

View file

@ -257,7 +257,7 @@ def test_deletes_chunks_during_lifetime(self, cache, repository):
def test_files_cache(self, cache):
assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
assert not cache.do_files
assert cache.cache_mode == 'd'
assert cache.files is None
def test_txn(self, cache):