From 463393141394adce7320d5c4116b4fc6012e5b3d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 8 Mar 2015 15:01:24 +0100 Subject: [PATCH] add global option --no-cache-files to lower memory consumption When given, attic does not use the "files" cache. Saves about 240B RAM per file (that sounds only a little, but consider that backups nowadays are often millions of files). So try this if attic eats more memory than you have as RAM (usually means paging or MemoryErrors). Of course, saving memory is not for free. In my one experiment, run time increased from 3.5 to 23 minutes (my system has enough RAM). --- attic/archiver.py | 11 +++++++---- attic/cache.py | 8 +++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..573021462 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -97,7 +97,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") t0 = datetime.now() repository = self.open_repository(args.archive, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) + cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, numeric_owner=args.numeric_owner) @@ -227,7 +227,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") """Delete an existing archive""" repository = self.open_repository(args.archive, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) + cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) stats = Statistics() archive.delete(stats) @@ -302,7 +302,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") """Show archive details such as disk space used""" repository = self.open_repository(args.archive) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) + cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) stats = archive.calc_stats(cache) print('Name:', archive.name) @@ -319,7 +319,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") """Prune repository archives according to specified rules""" repository = self.open_repository(args.repository, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) + cache = Cache(repository, key, manifest, do_files=args.cache_files) archives = list(sorted(Archive.list_archives(repository, key, manifest, cache), key=attrgetter('ts'), reverse=True)) if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None: @@ -447,6 +447,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='verbose output') + common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false', + default=True, + help='do not use the "files" cache') # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: diff --git a/attic/cache.py b/attic/cache.py index acbc76653..ce5996564 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -16,13 +16,14 @@ class Cache(object): class RepositoryReplay(Error): """Cache is newer than repository, refusing to continue""" - def __init__(self, repository, key, manifest, path=None, sync=True): + def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False): self.timestamp = None self.txn_active = False self.repository = repository self.key = key self.manifest = manifest self.path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii')) + self.do_files = do_files if not os.path.exists(self.path): self.create() self.open() @@ -83,6 +84,7 @@ class Cache(object): u.feed(data) for path_hash, item in u: item[0] += 1 + # in the end, this takes about 240 Bytes per file self.files[path_hash] = msgpack.packb(item) def begin_txn(self): @@ -206,6 +208,8 @@ class Cache(object): stats.update(-size, -csize, False) def file_known_and_unchanged(self, path_hash, st): + if not self.do_files: + return None if self.files is None: self._read_files() entry = self.files.get(path_hash) @@ -221,6 +225,8 @@ class Cache(object): return None def memorize_file(self, path_hash, st, ids): + if not self.do_files: + return # Entry: Age, inode, size, mtime, chunk ids mtime_ns = st_mtime_ns(st) self.files[path_hash] = msgpack.packb((0, st.st_ino, st.st_size, int_to_bigint(mtime_ns), ids))