From 4a4c8884ee50de5a9f02e542f6813ca5c25e3181 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 6 Sep 2017 05:56:26 +0200 Subject: [PATCH 1/2] repo: add test case for uncommitted garbage segment files --- src/borg/testsuite/repository.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index e29c7d532..1bec14006 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -210,6 +210,23 @@ def test_sparse_delete(self): self.repository.commit() assert 0 not in [segment for segment, _ in self.repository.io.segment_iterator()] + def test_uncommitted_garbage(self): + # uncommitted garbage should be no problem, it is cleaned up automatically. + # we just have to be careful with invalidation of cached FDs in LoggedIO. + self.repository.put(H(0), b'foo') + self.repository.commit() + # write some crap to a uncommitted segment file + last_segment = self.repository.io.get_latest_segment() + with open(self.repository.io.segment_filename(last_segment + 1), 'wb') as f: + f.write(MAGIC + b'crapcrapcrap') + self.repository.close() + # usually, opening the repo and starting a transaction should trigger a cleanup. + self.repository = self.open() + with self.repository: + self.repository.put(H(0), b'bar') # this may trigger compact_segments() + self.repository.commit() + # the point here is that nothing blows up with an exception. + class RepositoryCommitTestCase(RepositoryTestCaseBase): From 71229138255e9c3541efd83b89bbec960944b9d6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 6 Sep 2017 05:42:22 +0200 Subject: [PATCH 2/2] repo cleanup/write: invalidate cached FDs --- src/borg/repository.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/borg/repository.py b/src/borg/repository.py index ca55c6988..dada3c14e 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -1176,6 +1176,8 @@ def cleanup(self, transaction_id): count = 0 for segment, filename in self.segment_iterator(reverse=True): if segment > transaction_id: + if segment in self.fds: + del self.fds[segment] truncate_and_unlink(filename) count += 1 else: @@ -1232,6 +1234,12 @@ def get_write_fd(self, no_new=False, raise_full=False): self._write_fd = SyncFile(self.segment_filename(self.segment), binary=True) self._write_fd.write(MAGIC) self.offset = MAGIC_LEN + if self.segment in self.fds: + # we may have a cached fd for a segment file we already deleted and + # we are writing now a new segment file to same file name. get rid of + # of the cached fd that still refers to the old file, so it will later + # get repopulated (on demand) with a fd that refers to the new file. + del self.fds[self.segment] return self._write_fd def get_fd(self, segment):