From 6f00b025d87b85d3236b0ecba15de30ae4acd85e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 7 Jan 2021 00:57:14 +0100 Subject: [PATCH] remove empty shadowed_segments lists, fixes #5275 also: - add test for removed empty shadowed_segments list - add some comments - add repo_dump test debug tool --- src/borg/repository.py | 7 +++++++ src/borg/testsuite/repository.py | 28 ++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/borg/repository.py b/src/borg/repository.py index 41dbe36f7..091f2852c 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -794,6 +794,8 @@ class Repository: try: self.shadow_index[key].remove(segment) except (KeyError, ValueError): + # do not remove entry with empty shadowed_segments list here, + # it is needed for shadowed_put_exists code (see below)! pass elif tag == TAG_DELETE and not in_index: # If the shadow index doesn't contain this key, then we can't say if there's a shadowed older tag, @@ -845,6 +847,11 @@ class Repository: new_segment, size = self.io.write_delete(key) self.compact[new_segment] += size segments.setdefault(new_segment, 0) + else: + # we did not keep the delete tag for key (see if-branch) + if not self.shadow_index[key]: + # shadowed segments list is empty -> remove it + del self.shadow_index[key] assert segments[segment] == 0, 'Corrupted segment reference count - corrupted index or hints' unused.append(segment) pi.show() diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index 3a2d92368..ef142a8b0 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -14,7 +14,7 @@ from ..helpers import IntegrityError from ..helpers import msgpack from ..locking import Lock, LockFailed from ..remote import RemoteRepository, InvalidRPCMethod, PathNotAllowed, ConnectionClosedWithHint, handle_remote_line -from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE +from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT, TAG_COMMIT from . import BaseTestCase from .hashindex import H @@ -54,6 +54,16 @@ class RepositoryTestCaseBase(BaseTestCase): self.repository.put(H(2), b'boo') self.repository.delete(H(3)) + def repo_dump(self, label=None): + label = label + ': ' if label is not None else '' + H_trans = {H(i): i for i in range(10)} + H_trans[None] = -1 # key == None appears in commits + tag_trans = {TAG_PUT: 'put', TAG_DELETE: 'del', TAG_COMMIT: 'comm'} + for segment, fn in self.repository.io.segment_iterator(): + for tag, key, offset, size in self.repository.io.iter_objects(segment): + print("%s%s H(%d) -> %s[%d..+%d]" % (label, tag_trans[tag], H_trans[key], fn, offset, size)) + print() + class RepositoryTestCase(RepositoryTestCaseBase): @@ -315,8 +325,10 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.repository.put(H(1), b'1') self.repository.put(H(2), b'2') self.repository.commit(compact=False) + self.repo_dump('p1 p2 c') self.repository.delete(H(1)) self.repository.commit(compact=True) + self.repo_dump('d1 cc') last_segment = self.repository.io.get_latest_segment() - 1 num_deletes = 0 for tag, key, offset, size in self.repository.io.iter_objects(last_segment): @@ -327,11 +339,16 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): assert last_segment in self.repository.compact self.repository.put(H(3), b'3') self.repository.commit(compact=True) + self.repo_dump('p3 cc') assert last_segment not in self.repository.compact assert not self.repository.io.segment_exists(last_segment) for segment, _ in self.repository.io.segment_iterator(): for tag, key, offset, size in self.repository.io.iter_objects(segment): assert tag != TAG_DELETE + assert key != H(1) + # after compaction, there should be no empty shadowed_segments lists left over. + # we have no put or del any more for H(1), so we lost knowledge about H(1). + assert H(1) not in self.repository.shadow_index def test_shadowed_entries_are_preserved(self): get_latest_segment = self.repository.io.get_latest_segment @@ -372,16 +389,19 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.repository.delete(H(1)) assert self.repository.shadow_index[H(1)] == [0] self.repository.commit(compact=True) + self.repo_dump('p1 d1 cc') # note how an empty list means that nothing is shadowed for sure - assert self.repository.shadow_index[H(1)] == [] + assert self.repository.shadow_index[H(1)] == [] # because the delete is considered unstable self.repository.put(H(1), b'1') self.repository.delete(H(1)) + self.repo_dump('p1 d1') # 0 put/delete; 1 commit; 2 compacted; 3 commit; 4 put/delete assert self.repository.shadow_index[H(1)] == [4] self.repository.rollback() + self.repo_dump('r') self.repository.put(H(2), b'1') # After the rollback segment 4 shouldn't be considered anymore - assert self.repository.shadow_index[H(1)] == [] + assert self.repository.shadow_index[H(1)] == [] # because the delete is considered unstable class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): @@ -826,7 +846,7 @@ class RepositoryHintsTestCase(RepositoryTestCaseBase): self.repository.put(H(42), b'foobar') # see also do_compact() self.repository.commit(compact=True, threshold=0.0) # compact completely! # nothing to compact any more! no info left about stuff that does not exist any more: - self.assert_equal(self.repository.shadow_index[H(0)], []) + self.assert_not_in(H(0), self.repository.shadow_index) # segment 0 was compacted away, no info about it left: self.assert_not_in(0, self.repository.compact) self.assert_not_in(0, self.repository.segments)