From 1809ea2f3eafe917ac489b9de84efc09ae1db7fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Sun, 9 Feb 2014 15:52:36 +0100 Subject: [PATCH] More attic check --repair improvements --- attic/archiver.py | 9 +++ attic/remote.py | 4 +- attic/repository.py | 23 ++++---- attic/testsuite/repository.py | 107 +++++++++++++++++++++++++++------- 4 files changed, 109 insertions(+), 34 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 13f0edef5..933eaa9f5 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -64,6 +64,15 @@ def do_check(self, args): """Check repository consistency """ repository = self.open_repository(args.repository) + if args.repair: + while True: + self.print_error("""Warning: check --repair is an experimental feature that might result +in data loss. Checking and repairing archive metadata consistency is not yet +supported so some types of corruptions will be undetected and not repaired. + +Type "Yes I am sure" if you understand this and want to continue.\n""") + if input('Do you want to continue? ') == 'Yes I am sure': + break if args.progress is None: args.progress = is_a_terminal(sys.stdout) or args.verbose if not repository.check(progress=args.progress, repair=args.repair): diff --git a/attic/remote.py b/attic/remote.py index 6f750871c..0c98e21d7 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -5,7 +5,7 @@ from subprocess import Popen, PIPE import sys -from .helpers import Error +from .helpers import Error, IntegrityError from .repository import Repository BUFSIZE = 10 * 1024 * 1024 @@ -134,6 +134,8 @@ def fetch_from_cache(args): raise Repository.AlreadyExists(self.location.orig) elif error == b'CheckNeeded': raise Repository.CheckNeeded(self.location.orig) + elif error == b'IntegrityError': + raise IntegrityError raise self.RPCError(error) else: yield res diff --git a/attic/repository.py b/attic/repository.py index 6c3f0e803..70eee5535 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -192,8 +192,15 @@ def check(self, progress=False, repair=False): This method verifies all segment checksums and makes sure the index is consistent with the data stored in the segments. """ + error_found = False + def report_progress(msg, error=False): + nonlocal error_found + if error: + error_found = True + if error or progress: + print(msg, file=sys.stderr) + assert not self._active_txn - assert not self.index index_transaction_id = self.get_index_transaction_id() segments_transaction_id = self.io.get_segments_transaction_id(index_transaction_id) if index_transaction_id is None and segments_transaction_id is None: @@ -204,15 +211,8 @@ def check(self, progress=False, repair=False): current_index = self.get_read_only_index(transaction_id) else: current_index = None + report_progress('No suitable index found', error=True) progress_time = None - error_found = False - - def report_progress(msg, error=False): - nonlocal error_found - if error: - error_found = True - if error or progress: - print(msg, file=sys.stderr) for segment, filename in self.io.segment_iterator(): if segment > transaction_id: @@ -259,7 +259,8 @@ def report_progress(msg, error=False): report_progress('Check complete, no errors found.') if repair: self.write_index() - return not error_found + self.rollback() + return not error_found or repair def rollback(self): """ @@ -362,7 +363,7 @@ def get_segments_transaction_id(self, index_transaction_id=0): """Verify that the transaction id is consistent with the index transaction id """ for segment, filename in self.segment_iterator(reverse=True): - if segment < index_transaction_id: + if index_transaction_id is not None and segment < index_transaction_id: # The index is newer than any committed transaction found return -1 if self.is_committed_segment(filename): diff --git a/attic/testsuite/repository.py b/attic/testsuite/repository.py index 27ffeb074..6e06ea474 100644 --- a/attic/testsuite/repository.py +++ b/attic/testsuite/repository.py @@ -2,7 +2,7 @@ import shutil import tempfile from attic.hashindex import NSIndex -from attic.helpers import Location +from attic.helpers import Location, IntegrityError from attic.remote import RemoteRepository from attic.repository import Repository from attic.testsuite import AtticTestCase @@ -106,10 +106,15 @@ def tearDown(self): self.repository.close() shutil.rmtree(self.tmppath) - def add_objects(self, ids): + def get_objects(self, *ids): for id_ in ids: - self.repository.put(('%032d' % id_).encode('ascii'), b'data') - self.repository.commit() + self.repository.get(('%032d' % id_).encode('ascii')) + + def add_objects(self, segments): + for ids in segments: + for id_ in ids: + self.repository.put(('%032d' % id_).encode('ascii'), b'data') + self.repository.commit() def get_head(self): return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1] @@ -124,36 +129,94 @@ def corrupt_object(self, id_): fd.seek(offset) fd.write(b'BOOM') + def delete_segment(self, segment): + os.unlink(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment))) + + def delete_index(self): + os.unlink(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head()))) + + def rename_index(self, new_name): + os.rename(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())), + os.path.join(self.tmppath, 'repository', new_name)) + def list_objects(self): return set((int(key) for key, _ in list(self.open_index().iteritems()))) - def test_check(self): - self.add_objects([1, 2, 3]) - self.add_objects([4, 5, 6]) + def test_repair_corrupted_segment(self): + self.add_objects([[1, 2, 3], [4, 5, 6]]) self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) self.assert_equal(True, self.repository.check()) self.corrupt_object(5) - self.reopen() + self.assert_raises(IntegrityError, lambda: self.get_objects(5)) + self.repository.rollback() + # Make sure a regular check does not repair anything self.assert_equal(False, self.repository.check()) - self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) - - def test_check_repair(self): - self.add_objects([1, 2, 3]) - self.add_objects([4, 5, 6]) - self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) + self.assert_equal(False, self.repository.check()) + # Make sure a repair actually repairs the repo + self.assert_equal(True, self.repository.check(repair=True)) + self.get_objects(4) self.assert_equal(True, self.repository.check()) - self.corrupt_object(5) - self.reopen() - self.assert_equal(False, self.repository.check(repair=True)) self.assert_equal(set([1, 2, 3, 4, 6]), self.list_objects()) - - def test_check_missing_or_corrupt_commit_tag(self): - self.add_objects([1, 2, 3]) + def test_repair_missing_segment(self): + self.add_objects([[1, 2, 3], [4, 5, 6]]) + self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) + self.assert_equal(True, self.repository.check()) + self.delete_segment(1) + self.repository.rollback() + self.assert_equal(True, self.repository.check(repair=True)) self.assert_equal(set([1, 2, 3]), self.list_objects()) - with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(self.get_head())), 'ab') as fd: + + def test_repair_missing_commit_segment(self): + self.add_objects([[1, 2, 3], [4, 5, 6]]) + self.delete_segment(1) + self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4)) + self.assert_equal(False, self.repository.check()) + self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4)) + self.assert_equal(True, self.repository.check(repair=True)) + self.assert_raises(Repository.DoesNotExist, lambda: self.get_objects(4)) + self.assert_equal(set([1, 2, 3]), self.list_objects()) + + def test_repair_corrupted_commit_segment(self): + self.add_objects([[1, 2, 3], [4, 5, 6]]) + with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'ab') as fd: fd.write(b'X') - self.assert_raises(Repository.CheckNeeded, lambda: self.repository.get(bytes(32))) + self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4)) + self.assert_equal(False, self.repository.check()) + self.assert_equal(True, self.repository.check(repair=True)) + self.get_objects(4) + self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) + + def test_repair_missing_index(self): + self.add_objects([[1, 2, 3], [4, 5, 6]]) + self.delete_index() + self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4)) + self.assert_equal(False, self.repository.check()) + self.assert_equal(True, self.repository.check(repair=True)) + self.assert_equal(True, self.repository.check()) + self.get_objects(4) + self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) + + def test_repair_index_too_old(self): + self.add_objects([[1, 2, 3], [4, 5, 6]]) + self.rename_index('index.0') + self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4)) + self.assert_equal(False, self.repository.check()) + self.assert_equal(True, self.repository.check(repair=True)) + self.assert_equal(True, self.repository.check()) + self.get_objects(4) + self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) + + def test_repair_index_too_new(self): + self.add_objects([[1, 2, 3], [4, 5, 6]]) + self.rename_index('index.100') + self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4)) + self.assert_equal(False, self.repository.check()) + self.assert_equal(True, self.repository.check(repair=True)) + self.assert_equal(True, self.repository.check()) + self.get_objects(4) + self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) + class RemoteRepositoryTestCase(RepositoryTestCase):