From 055a40910bf1521192b8fd80f97e48632ad51eae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Sat, 3 May 2014 13:34:40 +0200 Subject: [PATCH] Reduce memory usage when backing up many small files Closes #69. --- CHANGES | 1 + attic/cache.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/CHANGES b/CHANGES index 5f20080b4..e54ff83ec 100644 --- a/CHANGES +++ b/CHANGES @@ -8,6 +8,7 @@ Version 0.13 (feature release, released on X) +- Reduced memory usage when backing up many small files (#69) - Experimental Linux and FreeBSD ACL support (#66) - Added support for backup and restore of BSDFlags (OSX, FreeBSD) (#56) - Fix bug where xattrs on symlinks were not correctly restored diff --git a/attic/cache.py b/attic/cache.py index 65362ff39..63680056a 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -12,6 +12,8 @@ class Cache(object): """Client Side cache """ + # Do not cache file metadata for files smaller than this + FILE_MIN_SIZE = 4096 class RepositoryReplay(Error): """Cache is newer than repository, refusing to continue""" @@ -81,9 +83,10 @@ def _read_files(self): if not data: break u.feed(data) - for hash, item in u: + for path_hash, item in u: + if item[2] > self.FILE_MIN_SIZE: item[0] += 1 - self.files[hash] = item + self.files[path_hash] = item def begin_txn(self): # Initialize transaction snapshot @@ -218,7 +221,8 @@ def file_known_and_unchanged(self, path_hash, st): return None def memorize_file(self, path_hash, st, ids): - # Entry: Age, inode, size, mtime, chunk ids - mtime_ns = st_mtime_ns(st) - self.files[path_hash] = 0, st.st_ino, st.st_size, mtime_ns, ids - self._newest_mtime = max(self._newest_mtime, mtime_ns) + if st.st_size > self.FILE_MIN_SIZE: + # Entry: Age, inode, size, mtime, chunk ids + mtime_ns = st_mtime_ns(st) + self.files[path_hash] = 0, st.st_ino, st.st_size, mtime_ns, ids + self._newest_mtime = max(self._newest_mtime, mtime_ns)