Basic repository consistency check functionality.

Still no archive metadata validation or repair functionality.
This commit is contained in:
Jonas Borgström 2014-02-04 23:49:10 +01:00
parent b5037cf460
commit faedaf8160
11 changed files with 177 additions and 2 deletions

View File

@ -3,6 +3,14 @@ Attic Changelog
Here you can see the full list of changes between each Attic release.
Version 0.11
------------
(feature release, released on X)
- New "check" command for repository consistency checking (#24)
- Documentation improvements
Version 0.10
------------

View File

@ -13,7 +13,7 @@ from attic.cache import Cache
from attic.key import key_creator
from attic.helpers import Error, location_validator, format_time, \
format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates
get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates, is_a_terminal
from attic.remote import RepositoryServer, RemoteRepository
@ -59,6 +59,17 @@ class Archiver:
repository.commit()
return self.exit_code
def do_check(self, args):
"""Check repository consistency
"""
repository = self.open_repository(args.repository)
if args.progress is None:
args.progress = is_a_terminal(sys.stdout) or args.verbose
if not repository.check(progress=args.progress):
if args.progress:
print('No problems found', file=sys.stderr)
return self.exit_code
def do_change_passphrase(self, args):
"""Change repository key file passphrase
"""
@ -360,6 +371,24 @@ class Archiver:
choices=('none', 'passphrase', 'keyfile'), default='none',
help='select encryption method')
check_epilog = """
Progress status will be reported on the standard output stream by default when
it is attached to a terminal. Any problems found are printed to the standard error
stream and the command will have a non zero exit code.
"""
subparser = subparsers.add_parser('check', parents=[common_parser],
description=self.do_check.__doc__,
epilog=check_epilog)
subparser.set_defaults(func=self.do_check)
subparser.add_argument('repository', metavar='REPOSITORY',
type=location_validator(archive=False),
help='repository to check consistency of')
subparser.add_argument('--progress', dest='progress', action='store_true',
default=None,
help='Report progress status to standard output stream')
subparser.add_argument('--no-progress', dest='progress', action='store_false',
help='Disable progress reporting')
subparser = subparsers.add_parser('change-passphrase', parents=[common_parser],
description=self.do_change_passphrase.__doc__)
subparser.set_defaults(func=self.do_change_passphrase)

View File

@ -107,11 +107,13 @@ cdef class NSIndex(IndexBase):
def iteritems(self, marker=None, limit=0):
iter = NSKeyIterator()
iter.idx = self
iter.index = self.index
return iter
cdef class NSKeyIterator:
cdef NSIndex idx
cdef HashIndex *index
cdef char *key
@ -156,11 +158,13 @@ cdef class ChunkIndex(IndexBase):
def iteritems(self, marker=None, limit=0):
iter = ChunkKeyIterator()
iter.idx = self
iter.index = self.index
return iter
cdef class ChunkKeyIterator:
cdef ChunkIndex idx
cdef HashIndex *index
cdef char *key

View File

@ -431,6 +431,15 @@ def daemonize():
os.dup2(fd, 2)
def is_a_terminal(fd):
"""Determine if `fd` is associated with a terminal or not
"""
try:
os.ttyname(fd.fileno())
return True
except:
return False
if sys.version < '3.3':
# st_mtime_ns attribute only available in 3.3+
def st_mtime_ns(st):

View File

@ -178,6 +178,9 @@ class RemoteRepository(object):
w_fds = []
self.ignore_responses |= set(waiting_for)
def check(self, progress=False):
return self.call('check', progress)
def commit(self, *args):
return self.call('commit')

View File

@ -5,6 +5,7 @@ import os
import re
import shutil
import struct
import sys
from zlib import crc32
from .hashindex import NSIndex
@ -198,6 +199,41 @@ class Repository(object):
if self.io.head is not None:
self.write_index()
def check(self, progress=False):
"""Check repository consistency
This method verifies all segment checksums and makes sure
the index is consistent with the data stored in the segments.
"""
error_found = False
def report_error(msg):
nonlocal error_found
error_found = True
print(msg, file=sys.stderr)
seen = set()
for segment, filename in self.io._segment_names():
if progress:
print('Checking segment {}/{}'.format(segment, self.io.head))
try:
objects = list(self.io.iter_objects(segment))
except (IntegrityError, struct.error):
report_error('Error reading segment {}'.format(segment))
objects = []
for tag, key, offset in objects:
if tag == TAG_PUT:
if key in seen:
report_error('Key found in more than one segment. Segment={}, key={}'.format(segment, hexlify(key)))
seen.add(key)
if self.index.get(key, (0, 0)) != (segment, offset):
report_error('Index vs segment header mismatch. Segment={}, key={}'.format(segment, hexlify(key)))
elif tag == TAG_COMMIT:
continue
else:
raise self.RepositoryCheckFailed(self.path, 'Unexpected tag {} in segment {}'.format(tag, segment))
if len(self.index) != len(seen):
report_error('Index object count mismatch. {} != {}'.format(len(self.index), len(seen)))
return not error_found
def rollback(self):
"""
"""
@ -309,6 +345,8 @@ class LoggedIO(object):
"""
self.head = None
self.segment = 0
# FIXME: Only delete segments if we're sure there's at least
# one complete segment somewhere
for segment, filename in self._segment_names(reverse=True):
if self.is_complete_segment(filename):
self.head = segment

View File

@ -205,12 +205,14 @@ class ArchiverTestCase(AtticTestCase):
self.attic('init', self.repository_location)
self.create_src_archive('test')
self.attic('verify', self.repository_location + '::test')
self.attic('check', self.repository_location)
name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
fd = open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+')
fd.seek(100)
fd.write('XXXX')
fd.close()
self.attic('verify', self.repository_location + '::test', exit_code=1)
self.attic('check', self.repository_location, exit_code=1)
def test_readonly_repository(self):
self.attic('init', self.repository_location)

View File

@ -102,7 +102,55 @@ class RepositoryTestCase(AtticTestCase):
self.repository.commit()
class RepositoryCheckTestCase(AtticTestCase):
def open(self, create=False):
return Repository(os.path.join(self.tmppath, 'repository'), create=create)
def setUp(self):
self.tmppath = tempfile.mkdtemp()
self.repository = self.open(create=True)
def tearDown(self):
self.repository.close()
shutil.rmtree(self.tmppath)
def add_objects(self, ids):
for id_ in ids:
self.repository.put(('%032d' % id_).encode('ascii'), b'data')
self.repository.commit()
def open_index(self):
head = sorted(int(n[6:]) for n in os.listdir(os.path.join(self.tmppath, 'repository')) if n.startswith('index') and n[6:].isdigit())[0]
return NSIndex(os.path.join(self.tmppath, 'repository', 'index.{}'.format(head)))
def corrupt_object(self, id_):
idx = self.open_index()
segment, offset = idx[('%032d' % id_).encode('ascii')]
with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)), 'r+b') as fd:
fd.seek(offset)
fd.write(b'BOOM')
def list_objects(self):
return set((int(key) for key, _ in list(self.open_index().iteritems())))
def test_check(self):
self.add_objects([1, 2, 3])
self.add_objects([4, 5, 6])
self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
self.assert_equal(True, self.repository.check())
self.corrupt_object(5)
self.assert_equal(False, self.repository.check())
self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
class RemoteRepositoryTestCase(RepositoryTestCase):
def open(self, create=False):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
def open(self, create=False):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)

View File

@ -2,7 +2,7 @@
if [ ! -d usage ]; then
mkdir usage
fi
for cmd in change-passphrase create delete extract info init list mount prune verify; do
for cmd in change-passphrase check create delete extract info init list mount prune verify; do
FILENAME="usage/$cmd.rst.inc"
LINE=`echo -n attic $cmd | tr 'a-z- ' '-'`
echo -e ".. _attic_$cmd:\n" > $FILENAME

View File

@ -93,6 +93,12 @@ not corrupt. |project_name| will not compare the the archived files with the
files on disk.
.. include:: usage/check.rst.inc
The check command verifies the consistency of a repository. Any inconsistencies
found are reported to the standard error stream and the command will have a
non zero exit code.
.. include:: usage/delete.rst.inc
This command deletes an archive from the repository. Any disk space not

28
docs/usage/check.rst.inc Normal file
View File

@ -0,0 +1,28 @@
.. _attic_check:
attic check
-----------
::
usage: attic check [-h] [-v] [--progress] [--no-progress] REPOSITORY
Check repository consistency
positional arguments:
REPOSITORY repository to check consistency of
optional arguments:
-h, --help show this help message and exit
-v, --verbose verbose output
--progress Report progress status to standard output stream
--no-progress Disable progress reporting
Progress status will be reported on the standard output stream by default when
it is attached to a terminal. Any problems found are printed to the standard
error stream and the command will have a non zero exit code.
Description
~~~~~~~~~~~