mirror of https://github.com/borgbackup/borg.git
Merge pull request #7910 from ThomasWaldmann/fix-shadow-index-master
Fix shadow index (master)
This commit is contained in:
commit
e431b838f4
|
@ -195,6 +195,9 @@ class Repository:
|
|||
# segment_n PUT A, segment_x DELETE A
|
||||
# After the "DELETE A" in segment_x the shadow index will contain "A -> [n]".
|
||||
# .delete() is updating this index, it is persisted into "hints" file and is later used by .compact_segments().
|
||||
# We need the entries in the shadow_index to not accidentally drop the "DELETE A" when we compact segment_x
|
||||
# only (and we do not compact segment_n), because DELETE A is still needed then because PUT A will be still
|
||||
# there. Otherwise chunk A would reappear although it was previously deleted.
|
||||
self.shadow_index = {}
|
||||
self._active_txn = False
|
||||
self.lock_wait = lock_wait
|
||||
|
@ -960,6 +963,7 @@ class Repository:
|
|||
in_index = self.index[key]
|
||||
self.compact[in_index.segment] += header_size(tag) + size
|
||||
self.segments[in_index.segment] -= 1
|
||||
self.shadow_index.setdefault(key, []).append(in_index.segment)
|
||||
except KeyError:
|
||||
pass
|
||||
self.index[key] = NSIndexEntry(segment, offset, size)
|
||||
|
@ -977,6 +981,7 @@ class Repository:
|
|||
# is already gone, then it was already compacted.
|
||||
self.segments[in_index.segment] -= 1
|
||||
self.compact[in_index.segment] += header_size(tag) + in_index.size
|
||||
self.shadow_index.setdefault(key, []).append(in_index.segment)
|
||||
elif tag == TAG_COMMIT:
|
||||
continue
|
||||
else:
|
||||
|
@ -1299,11 +1304,11 @@ class Repository:
|
|||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
# note: doing a delete first will do some bookkeeping.
|
||||
# we do not want to update the shadow_index here, because
|
||||
# we know already that we will PUT to this id, so it will
|
||||
# be in the repo index (and we won't need it in the shadow_index).
|
||||
self._delete(id, in_index.segment, in_index.offset, in_index.size, update_shadow_index=False)
|
||||
# this put call supersedes a previous put to same id.
|
||||
# it is essential to do a delete first to get correct quota bookkeeping
|
||||
# and also a correctly updated shadow_index, so that the compaction code
|
||||
# does not wrongly resurrect an old PUT by dropping a DEL that is still needed.
|
||||
self._delete(id, in_index.segment, in_index.offset, in_index.size)
|
||||
segment, offset = self.io.write_put(id, data)
|
||||
self.storage_quota_use += header_size(TAG_PUT2) + len(data)
|
||||
self.segments.setdefault(segment, 0)
|
||||
|
@ -1327,16 +1332,15 @@ class Repository:
|
|||
in_index = self.index.pop(id)
|
||||
except KeyError:
|
||||
raise self.ObjectNotFound(id, self.path) from None
|
||||
# if we get here, there is an object with this id in the repo,
|
||||
# we write a DEL here that shadows the respective PUT.
|
||||
# after the delete, the object is not in the repo index any more,
|
||||
# for the compaction code, we need to update the shadow_index in this case.
|
||||
self._delete(id, in_index.segment, in_index.offset, in_index.size, update_shadow_index=True)
|
||||
self._delete(id, in_index.segment, in_index.offset, in_index.size)
|
||||
|
||||
def _delete(self, id, segment, offset, size, *, update_shadow_index):
|
||||
def _delete(self, id, segment, offset, size):
|
||||
# common code used by put and delete
|
||||
if update_shadow_index:
|
||||
self.shadow_index.setdefault(id, []).append(segment)
|
||||
# because we'll write a DEL tag to the repository, we must update the shadow index.
|
||||
# this is always true, no matter whether we are called from put() or delete().
|
||||
# the compaction code needs this to not drop DEL tags if they are still required
|
||||
# to keep a PUT in an earlier segment in the "effectively deleted" state.
|
||||
self.shadow_index.setdefault(id, []).append(segment)
|
||||
self.segments[segment] -= 1
|
||||
self.compact[segment] += header_size(TAG_PUT2) + size
|
||||
segment, size = self.io.write_delete(id)
|
||||
|
|
|
@ -516,7 +516,8 @@ def test_moved_deletes_are_tracked(repository):
|
|||
assert H(1) not in repository.shadow_index
|
||||
|
||||
|
||||
def test_shadowed_entries_are_preserved(repository):
|
||||
def test_shadowed_entries_are_preserved1(repository):
|
||||
# this tests the shadowing-by-del behaviour
|
||||
with repository:
|
||||
get_latest_segment = repository.io.get_latest_segment
|
||||
repository.put(H(1), fchunk(b"1"))
|
||||
|
@ -546,6 +547,50 @@ def test_shadowed_entries_are_preserved(repository):
|
|||
assert H(1) not in repository
|
||||
|
||||
|
||||
def test_shadowed_entries_are_preserved2(repository):
|
||||
# this tests the shadowing-by-double-put behaviour, see issue #5661
|
||||
# assume this repo state:
|
||||
# seg1: PUT H1
|
||||
# seg2: COMMIT
|
||||
# seg3: DEL H1, PUT H1, DEL H1, PUT H2
|
||||
# seg4: COMMIT
|
||||
# Note how due to the final DEL H1 in seg3, H1 is effectively deleted.
|
||||
#
|
||||
# compaction of only seg3:
|
||||
# PUT H1 gets dropped because it is not needed any more.
|
||||
# DEL H1 must be kept, because there is still a PUT H1 in seg1 which must not
|
||||
# "reappear" in the index if the index gets rebuilt.
|
||||
with repository:
|
||||
get_latest_segment = repository.io.get_latest_segment
|
||||
repository.put(H(1), fchunk(b"1"))
|
||||
# This is the segment with our original PUT of interest
|
||||
put_segment = get_latest_segment()
|
||||
repository.commit(compact=False)
|
||||
# We now put H(1) again (which implicitly does DEL(H(1)) followed by PUT(H(1), ...)),
|
||||
# delete H(1) afterwards, and force this segment to not be compacted, which can happen
|
||||
# if it's not sparse enough (symbolized by H(2) here).
|
||||
repository.put(H(1), fchunk(b"1"))
|
||||
repository.delete(H(1))
|
||||
repository.put(H(2), fchunk(b"1"))
|
||||
delete_segment = get_latest_segment()
|
||||
# We pretend these are mostly dense (not sparse) and won't be compacted
|
||||
del repository.compact[put_segment]
|
||||
del repository.compact[delete_segment]
|
||||
repository.commit(compact=True)
|
||||
# Now we perform an unrelated operation on the segment containing the DELETE,
|
||||
# causing it to be compacted.
|
||||
repository.delete(H(2))
|
||||
repository.commit(compact=True)
|
||||
assert repository.io.segment_exists(put_segment)
|
||||
assert not repository.io.segment_exists(delete_segment)
|
||||
# Basic case, since the index survived this must be ok
|
||||
assert H(1) not in repository
|
||||
# Nuke index, force replay
|
||||
os.unlink(os.path.join(repository.path, "index.%d" % get_latest_segment()))
|
||||
# Must not reappear
|
||||
assert H(1) not in repository # F
|
||||
|
||||
|
||||
def test_shadow_index_rollback(repository):
|
||||
with repository:
|
||||
repository.put(H(1), fchunk(b"1"))
|
||||
|
@ -1016,6 +1061,15 @@ def test_hints_persistence(repository):
|
|||
assert compact_expected == repository.compact
|
||||
del repository.segments[2] # ignore the segment created by put(H(42), ...)
|
||||
assert segments_expected == repository.segments
|
||||
with reopen(repository) as repository:
|
||||
check(repository, repository.path, repair=True)
|
||||
with reopen(repository) as repository:
|
||||
repository.put(H(42), fchunk(b"foobar")) # this will call prepare_txn() and load the hints data
|
||||
assert shadow_index_expected == repository.shadow_index
|
||||
# sizes do not match, with vs. without header?
|
||||
# assert compact_expected == repository.compact
|
||||
del repository.segments[2] # ignore the segment created by put(H(42), ...)
|
||||
assert segments_expected == repository.segments
|
||||
|
||||
|
||||
def test_hints_behaviour(repository):
|
||||
|
|
Loading…
Reference in New Issue