From 98f3b8d937dea79e86b0c2f9c608a094d01325eb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 Nov 2015 13:46:00 +0100 Subject: [PATCH 1/3] fix "check" for repos that have incomplete chunks, fixes #364 added try/finally (the code in between was just indented, no other code changes) to make sure it sets self.index back to None, even if the code crashes e.g. due to an IntegrityError caused by an incomplete segment caused by a disk full condition. also, in prepare_txn, create an empty in-memory index if transaction_id is None, which is required by the Repository.check code to work correctly. If the index is not empty there, it will miscalculate segment usage (self.segments). --- borg/repository.py | 69 ++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 230c7d866..bf7a27a15 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -175,7 +175,7 @@ class Repository: # the repository instance lives on - even if exceptions happened. self._active_txn = False raise - if not self.index: + if not self.index or transaction_id is None: self.index = self.open_index(transaction_id) if transaction_id is None: self.segments = {} @@ -237,38 +237,41 @@ class Repository: def replay_segments(self, index_transaction_id, segments_transaction_id): self.prepare_txn(index_transaction_id, do_cleanup=False) - for segment, filename in self.io.segment_iterator(): - if index_transaction_id is not None and segment <= index_transaction_id: - continue - if segment > segments_transaction_id: - break - self.segments[segment] = 0 - for tag, key, offset in self.io.iter_objects(segment): - if tag == TAG_PUT: - try: - s, _ = self.index[key] - self.compact.add(s) - self.segments[s] -= 1 - except KeyError: - pass - self.index[key] = segment, offset - self.segments[segment] += 1 - elif tag == TAG_DELETE: - try: - s, _ = self.index.pop(key) - self.segments[s] -= 1 - self.compact.add(s) - except KeyError: - pass - self.compact.add(segment) - elif tag == TAG_COMMIT: + try: + for segment, filename in self.io.segment_iterator(): + if index_transaction_id is not None and segment <= index_transaction_id: continue - else: - raise self.CheckNeeded(self.path) - if self.segments[segment] == 0: - self.compact.add(segment) - self.write_index() - self.rollback() + if segment > segments_transaction_id: + break + # code duplication below?? vvv (see similar code in check()) + self.segments[segment] = 0 + for tag, key, offset in self.io.iter_objects(segment): + if tag == TAG_PUT: + try: + s, _ = self.index[key] + self.compact.add(s) + self.segments[s] -= 1 + except KeyError: + pass + self.index[key] = segment, offset + self.segments[segment] += 1 + elif tag == TAG_DELETE: + try: + s, _ = self.index.pop(key) + self.segments[s] -= 1 + self.compact.add(s) + except KeyError: + pass + self.compact.add(segment) + elif tag == TAG_COMMIT: + continue + else: + raise self.CheckNeeded(self.path) + if self.segments[segment] == 0: + self.compact.add(segment) + self.write_index() + finally: + self.rollback() def check(self, repair=False): """Check repository consistency @@ -297,7 +300,7 @@ class Repository: if repair: self.io.cleanup(transaction_id) segments_transaction_id = self.io.get_segments_transaction_id() - self.prepare_txn(None) + self.prepare_txn(None) # self.index, self.compact, self.segments all empty now! for segment, filename in self.io.segment_iterator(): if segment > transaction_id: continue From f804c6fb1ca3ece53fbf62f3c80161808fc54811 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 Nov 2015 13:52:26 +0100 Subject: [PATCH 2/3] repository check code: added some comments --- borg/repository.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index bf7a27a15..e1651be29 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -178,8 +178,8 @@ class Repository: if not self.index or transaction_id is None: self.index = self.open_index(transaction_id) if transaction_id is None: - self.segments = {} - self.compact = set() + self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x] + self.compact = set() # XXX bad name: segments_needing_compaction = self.compact else: if do_cleanup: self.io.cleanup(transaction_id) @@ -335,12 +335,15 @@ class Repository: continue else: report_error('Unexpected tag {} in segment {}'.format(tag, segment)) + # self.index, self.segments, self.compact now reflect the state of the segment files up to # We might need to add a commit tag if no committed segment is found if repair and segments_transaction_id is None: report_error('Adding commit tag to segment {}'.format(transaction_id)) self.io.segment = transaction_id + 1 self.io.write_commit() if current_index and not repair: + # current_index = "as found on disk" + # self.index = "as rebuilt in-memory from segments" if len(current_index) != len(self.index): report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index))) elif current_index: From c01efa46663f634543e09f81c0a1cf8cdd107654 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 6 Nov 2015 19:37:39 +0100 Subject: [PATCH 3/3] repository: refactor some duplicate code --- borg/repository.py | 84 +++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 49 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index e1651be29..048fa032d 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -243,36 +243,44 @@ class Repository: continue if segment > segments_transaction_id: break - # code duplication below?? vvv (see similar code in check()) - self.segments[segment] = 0 - for tag, key, offset in self.io.iter_objects(segment): - if tag == TAG_PUT: - try: - s, _ = self.index[key] - self.compact.add(s) - self.segments[s] -= 1 - except KeyError: - pass - self.index[key] = segment, offset - self.segments[segment] += 1 - elif tag == TAG_DELETE: - try: - s, _ = self.index.pop(key) - self.segments[s] -= 1 - self.compact.add(s) - except KeyError: - pass - self.compact.add(segment) - elif tag == TAG_COMMIT: - continue - else: - raise self.CheckNeeded(self.path) - if self.segments[segment] == 0: - self.compact.add(segment) + objects = self.io.iter_objects(segment) + self._update_index(segment, objects) self.write_index() finally: self.rollback() + def _update_index(self, segment, objects, report=None): + """some code shared between replay_segments and check""" + self.segments[segment] = 0 + for tag, key, offset in objects: + if tag == TAG_PUT: + try: + s, _ = self.index[key] + self.compact.add(s) + self.segments[s] -= 1 + except KeyError: + pass + self.index[key] = segment, offset + self.segments[segment] += 1 + elif tag == TAG_DELETE: + try: + s, _ = self.index.pop(key) + self.segments[s] -= 1 + self.compact.add(s) + except KeyError: + pass + self.compact.add(segment) + elif tag == TAG_COMMIT: + continue + else: + msg = 'Unexpected tag {} in segment {}'.format(tag, segment) + if report is None: + raise self.CheckNeeded(msg) + else: + report(msg) + if self.segments[segment] == 0: + self.compact.add(segment) + def check(self, repair=False): """Check repository consistency @@ -312,29 +320,7 @@ class Repository: if repair: self.io.recover_segment(segment, filename) objects = list(self.io.iter_objects(segment)) - self.segments[segment] = 0 - for tag, key, offset in objects: - if tag == TAG_PUT: - try: - s, _ = self.index[key] - self.compact.add(s) - self.segments[s] -= 1 - except KeyError: - pass - self.index[key] = segment, offset - self.segments[segment] += 1 - elif tag == TAG_DELETE: - try: - s, _ = self.index.pop(key) - self.segments[s] -= 1 - self.compact.add(s) - except KeyError: - pass - self.compact.add(segment) - elif tag == TAG_COMMIT: - continue - else: - report_error('Unexpected tag {} in segment {}'.format(tag, segment)) + self._update_index(segment, objects, report_error) # self.index, self.segments, self.compact now reflect the state of the segment files up to # We might need to add a commit tag if no committed segment is found if repair and segments_transaction_id is None: