borg/darc/archiver.py

444 lines
20 KiB
Python
Raw Normal View History

2010-10-15 18:35:49 +00:00
import argparse
2013-06-03 11:45:48 +00:00
from binascii import hexlify
2011-08-12 06:49:01 +00:00
from datetime import datetime
from operator import attrgetter
2010-10-25 18:22:20 +00:00
import os
2010-10-30 11:44:25 +00:00
import stat
2010-10-16 09:45:36 +00:00
import sys
2010-02-23 21:12:22 +00:00
2010-10-20 17:59:15 +00:00
from .archive import Archive
2013-06-20 10:44:58 +00:00
from .repository import Repository
2010-10-20 17:59:15 +00:00
from .cache import Cache
from .key import key_creator
2011-08-11 19:18:13 +00:00
from .helpers import location_validator, format_time, \
format_file_mode, IncludePattern, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
2013-06-26 11:55:41 +00:00
get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, Location, remove_surrogates
from .remote import RepositoryServer, RemoteRepository, ConnectionClosed
2011-10-29 15:01:07 +00:00
2013-06-26 19:20:31 +00:00
class Archiver:
2010-10-30 11:44:25 +00:00
def __init__(self):
self.exit_code = 0
2013-06-20 10:44:58 +00:00
def open_repository(self, location, create=False):
if location.proto == 'ssh':
2013-06-20 10:44:58 +00:00
repository = RemoteRepository(location, create=create)
2010-11-15 21:18:47 +00:00
else:
2013-06-20 10:44:58 +00:00
repository = Repository(location.path, create=create)
repository._location = location
return repository
2010-02-23 20:34:28 +00:00
2010-10-30 11:44:25 +00:00
def print_error(self, msg, *args):
msg = args and msg % args or msg
self.exit_code = 1
2013-06-03 11:45:48 +00:00
print('darc: ' + msg, file=sys.stderr)
2010-10-30 11:44:25 +00:00
def print_verbose(self, msg, *args, **kw):
if self.verbose:
msg = args and msg % args or msg
if kw.get('newline', True):
2013-06-03 11:45:48 +00:00
print(msg)
2010-10-30 11:44:25 +00:00
else:
2013-06-03 11:45:48 +00:00
print(msg, end=' ')
2010-10-30 11:44:25 +00:00
2010-11-15 21:18:47 +00:00
def do_serve(self, args):
2013-06-20 10:44:58 +00:00
return RepositoryServer().serve()
2010-11-15 21:18:47 +00:00
def do_init(self, args):
2013-06-20 10:44:58 +00:00
print('Initializing repository at "%s"' % args.repository.orig)
repository = self.open_repository(args.repository, create=True)
key = key_creator(repository, args)
manifest = Manifest()
2013-06-20 10:44:58 +00:00
manifest.repository = repository
manifest.key = key
manifest.write()
2013-06-20 10:44:58 +00:00
repository.commit()
2011-08-06 11:01:58 +00:00
return self.exit_code
def do_change_passphrase(self, args):
2013-06-20 10:44:58 +00:00
repository = self.open_repository(Location(args.repository))
manifest, key = Manifest.load(repository)
key.change_passphrase()
2011-10-27 20:17:47 +00:00
return self.exit_code
2010-10-15 18:35:49 +00:00
def do_create(self, args):
t0 = datetime.now()
2013-06-20 10:44:58 +00:00
repository = self.open_repository(args.archive)
manifest, key = Manifest.load(repository)
cache = Cache(repository, key, manifest)
archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
2012-02-29 22:59:17 +00:00
create=True, checkpoint_interval=args.checkpoint_interval,
numeric_owner=args.numeric_owner)
# Add darc cache dir to inode_skip list
2011-01-04 22:16:55 +00:00
skip_inodes = set()
try:
2011-08-06 11:01:58 +00:00
st = os.stat(get_cache_dir())
2011-01-04 22:16:55 +00:00
skip_inodes.add((st.st_ino, st.st_dev))
except IOError:
pass
2013-06-20 10:44:58 +00:00
# Add local repository dir to inode_skip list
if not args.archive.host:
try:
st = os.stat(args.archive.path)
2011-01-04 22:16:55 +00:00
skip_inodes.add((st.st_ino, st.st_dev))
except IOError:
pass
2010-10-30 11:44:25 +00:00
for path in args.paths:
if args.dontcross:
try:
restrict_dev = os.lstat(path).st_dev
2013-06-03 11:45:48 +00:00
except OSError as e:
self.print_error('%s: %s', path, e)
continue
else:
restrict_dev = None
self._process(archive, cache, args.patterns, skip_inodes, path, restrict_dev)
2011-09-10 15:19:02 +00:00
archive.save()
if args.stats:
t = datetime.now()
diff = t - t0
2013-06-03 11:45:48 +00:00
print('-' * 40)
print('Archive name: %s' % args.archive.archive)
print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
print('Start time: %s' % t0.strftime('%c'))
print('End time: %s' % t.strftime('%c'))
print('Duration: %s' % format_timedelta(diff))
archive.stats.print_()
2013-06-03 11:45:48 +00:00
print('-' * 40)
2010-10-30 11:44:25 +00:00
return self.exit_code
2010-02-20 21:28:46 +00:00
def _process(self, archive, cache, patterns, skip_inodes, path, restrict_dev):
if exclude_path(path, patterns):
return
try:
st = os.lstat(path)
2013-06-03 11:45:48 +00:00
except OSError as e:
self.print_error('%s: %s', path, e)
return
2011-01-04 22:16:55 +00:00
if (st.st_ino, st.st_dev) in skip_inodes:
return
# Entering a new filesystem?
if restrict_dev and st.st_dev != restrict_dev:
return
2011-08-07 17:44:13 +00:00
# Ignore unix sockets
if stat.S_ISSOCK(st.st_mode):
return
2013-06-03 11:45:48 +00:00
self.print_verbose(remove_surrogates(path))
2012-03-03 13:02:22 +00:00
if stat.S_ISREG(st.st_mode):
try:
archive.process_file(path, st, cache)
2013-06-03 11:45:48 +00:00
except IOError as e:
2012-03-03 13:02:22 +00:00
self.print_error('%s: %s', path, e)
elif stat.S_ISDIR(st.st_mode):
archive.process_item(path, st)
try:
entries = os.listdir(path)
2013-06-03 11:45:48 +00:00
except OSError as e:
self.print_error('%s: %s', path, e)
else:
2011-07-01 20:01:24 +00:00
for filename in sorted(entries):
2011-01-04 22:16:55 +00:00
self._process(archive, cache, patterns, skip_inodes,
os.path.join(path, filename), restrict_dev)
elif stat.S_ISLNK(st.st_mode):
archive.process_symlink(path, st)
2012-03-03 13:02:22 +00:00
elif stat.S_ISFIFO(st.st_mode):
archive.process_item(path, st)
elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
archive.process_dev(path, st)
else:
self.print_error('Unknown file type: %s', path)
2010-10-15 18:35:49 +00:00
def do_extract(self, args):
2013-06-20 10:44:58 +00:00
repository = self.open_repository(args.archive)
manifest, key = Manifest.load(repository)
archive = Archive(repository, key, manifest, args.archive.archive,
numeric_owner=args.numeric_owner)
dirs = []
2013-06-03 11:45:48 +00:00
for item, peek in archive.iter_items(lambda item: not exclude_path(item[b'path'], args.patterns)):
while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
archive.extract_item(dirs.pop(-1), args.dest)
2013-06-03 11:45:48 +00:00
self.print_verbose(remove_surrogates(item[b'path']))
try:
2013-06-03 11:45:48 +00:00
if stat.S_ISDIR(item[b'mode']):
dirs.append(item)
archive.extract_item(item, args.dest, restore_attrs=False)
else:
archive.extract_item(item, args.dest, peek=peek)
2013-06-03 11:45:48 +00:00
except IOError as e:
self.print_error('%s: %s', remove_surrogates(item[b'path']), e)
2010-10-31 19:12:32 +00:00
while dirs:
archive.extract_item(dirs.pop(-1), args.dest)
2010-10-30 11:44:25 +00:00
return self.exit_code
2010-10-15 18:35:49 +00:00
def do_delete(self, args):
2013-06-20 10:44:58 +00:00
repository = self.open_repository(args.archive)
manifest, key = Manifest.load(repository)
cache = Cache(repository, key, manifest)
archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
2010-10-15 18:35:49 +00:00
archive.delete(cache)
2010-10-30 11:44:25 +00:00
return self.exit_code
2010-10-15 18:35:49 +00:00
def do_list(self, args):
2013-06-20 10:44:58 +00:00
repository = self.open_repository(args.src)
manifest, key = Manifest.load(repository)
2010-10-15 18:35:49 +00:00
if args.src.archive:
2013-06-03 11:45:48 +00:00
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
2013-06-20 10:44:58 +00:00
archive = Archive(repository, key, manifest, args.src.archive)
for item, _ in archive.iter_items():
2013-06-03 11:45:48 +00:00
type = tmap.get(item[b'mode'] // 4096, '?')
mode = format_file_mode(item[b'mode'])
size = 0
if type == '-':
try:
2013-06-03 11:45:48 +00:00
size = sum(size for _, size, _ in item[b'chunks'])
except KeyError:
pass
2013-06-15 18:56:27 +00:00
mtime = format_time(datetime.fromtimestamp(item[b'mtime'] / 10**9))
2013-06-03 11:45:48 +00:00
if b'source' in item:
if type == 'l':
2013-06-03 11:45:48 +00:00
extra = ' -> %s' % item[b'source']
else:
type = 'h'
2013-06-03 11:45:48 +00:00
extra = ' link to %s' % item[b'source']
else:
extra = ''
2013-06-03 11:45:48 +00:00
print('%s%s %-6s %-6s %8d %s %s%s' % (type, mode, item[b'user'] or item[b'uid'],
item[b'group'] or item[b'gid'], size, mtime,
remove_surrogates(item[b'path']), extra))
2010-10-15 18:35:49 +00:00
else:
2013-06-20 10:44:58 +00:00
for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')):
2013-06-03 11:45:48 +00:00
print('%-20s %s' % (archive.metadata[b'name'], to_localtime(archive.ts).strftime('%c')))
2010-10-30 11:44:25 +00:00
return self.exit_code
2010-10-15 18:35:49 +00:00
def do_verify(self, args):
2013-06-20 10:44:58 +00:00
repository = self.open_repository(args.archive)
manifest, key = Manifest.load(repository)
archive = Archive(repository, key, manifest, args.archive.archive)
2011-10-29 15:01:07 +00:00
2011-07-17 21:53:23 +00:00
def start_cb(item):
2013-06-03 11:45:48 +00:00
self.print_verbose('%s ...', remove_surrogates(item[b'path']), newline=False)
2011-10-29 15:01:07 +00:00
2011-07-17 21:53:23 +00:00
def result_cb(item, success):
if success:
self.print_verbose('OK')
else:
self.print_verbose('ERROR')
2013-06-03 11:45:48 +00:00
self.print_error('%s: verification failed' % remove_surrogates(item[b'path']))
for item, peek in archive.iter_items(lambda item: not exclude_path(item[b'path'], args.patterns)):
if stat.S_ISREG(item[b'mode']) and b'chunks' in item:
archive.verify_file(item, start_cb, result_cb, peek=peek)
2010-10-30 11:44:25 +00:00
return self.exit_code
2010-10-15 18:35:49 +00:00
def do_info(self, args):
2013-06-20 10:44:58 +00:00
repository = self.open_repository(args.archive)
manifest, key = Manifest.load(repository)
cache = Cache(repository, key, manifest)
archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
stats = archive.calc_stats(cache)
2013-06-03 11:45:48 +00:00
print('Name:', archive.name)
print('Fingerprint: %s' % hexlify(archive.id).decode('ascii'))
print('Hostname:', archive.metadata[b'hostname'])
print('Username:', archive.metadata[b'username'])
print('Time: %s' % to_localtime(archive.ts).strftime('%c'))
print('Command line:', remove_surrogates(' '.join(archive.metadata[b'cmdline'])))
stats.print_()
2010-10-30 11:44:25 +00:00
return self.exit_code
2010-04-18 20:34:21 +00:00
2011-11-22 20:47:17 +00:00
def do_prune(self, args):
2013-06-20 10:44:58 +00:00
repository = self.open_repository(args.repository)
manifest, key = Manifest.load(repository)
cache = Cache(repository, key, manifest)
archives = list(sorted(Archive.list_archives(repository, key, manifest, cache),
key=attrgetter('ts'), reverse=True))
if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0:
self.print_error('At least one of the "hourly", "daily", "weekly", "monthly" or "yearly" '
'settings must be specified')
return 1
2011-08-11 19:18:13 +00:00
if args.prefix:
archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
keep = []
if args.hourly:
2011-11-22 20:47:17 +00:00
keep += prune_split(archives, '%Y-%m-%d %H', args.hourly)
2011-08-11 19:18:13 +00:00
if args.daily:
2011-11-22 20:47:17 +00:00
keep += prune_split(archives, '%Y-%m-%d', args.daily, keep)
2011-08-12 06:49:01 +00:00
if args.weekly:
keep += prune_split(archives, '%G-%V', args.weekly, keep)
2011-08-12 06:49:01 +00:00
if args.monthly:
2011-11-22 20:47:17 +00:00
keep += prune_split(archives, '%Y-%m', args.monthly, keep)
2011-08-12 06:49:01 +00:00
if args.yearly:
2011-11-22 20:47:17 +00:00
keep += prune_split(archives, '%Y', args.yearly, keep)
keep.sort(key=attrgetter('ts'), reverse=True)
to_delete = [a for a in archives if a not in keep]
2011-08-11 19:18:13 +00:00
for archive in keep:
self.print_verbose('Keeping archive "%s"' % archive.name)
2011-08-11 19:18:13 +00:00
for archive in to_delete:
self.print_verbose('Pruning archive "%s"', archive.name)
archive.delete(cache)
return self.exit_code
2010-10-16 09:45:36 +00:00
def run(self, args=None):
2013-06-26 11:55:41 +00:00
keys_dir = get_keys_dir()
if not os.path.exists(keys_dir):
os.makedirs(keys_dir)
os.chmod(keys_dir, stat.S_IRWXU)
cache_dir = get_cache_dir()
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
os.chmod(cache_dir, stat.S_IRWXU)
2011-09-10 15:32:05 +00:00
common_parser = argparse.ArgumentParser(add_help=False)
common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
2010-10-15 18:35:49 +00:00
default=False,
help='Verbose output')
2013-06-26 19:20:31 +00:00
parser = argparse.ArgumentParser(description='Darc - Deduplicating Archiver')
2010-10-15 18:35:49 +00:00
subparsers = parser.add_subparsers(title='Available subcommands')
2011-09-10 15:32:05 +00:00
subparser = subparsers.add_parser('serve', parents=[common_parser])
2010-11-15 21:18:47 +00:00
subparser.set_defaults(func=self.do_serve)
2011-09-10 15:32:05 +00:00
subparser = subparsers.add_parser('init', parents=[common_parser])
subparser.set_defaults(func=self.do_init)
2013-06-20 10:44:58 +00:00
subparser.add_argument('repository',
type=location_validator(archive=False),
2013-06-20 10:44:58 +00:00
help='Repository to create')
subparser.add_argument('--key-file', dest='keyfile',
action='store_true', default=False,
help='Encrypt data using key file')
subparser.add_argument('--passphrase', dest='passphrase',
action='store_true', default=False,
help='Encrypt data using passphrase derived key')
subparser = subparsers.add_parser('change-passphrase', parents=[common_parser])
subparser.set_defaults(func=self.do_change_passphrase)
2013-06-20 10:44:58 +00:00
subparser.add_argument('repository', type=location_validator(archive=False))
2011-10-27 20:17:47 +00:00
2011-09-10 15:32:05 +00:00
subparser = subparsers.add_parser('create', parents=[common_parser])
2010-10-15 18:35:49 +00:00
subparser.set_defaults(func=self.do_create)
subparser.add_argument('-s', '--stats', dest='stats',
action='store_true', default=False,
help='Print statistics for the created archive')
2010-11-02 21:47:39 +00:00
subparser.add_argument('-i', '--include', dest='patterns',
type=IncludePattern, action='append',
help='Include condition')
subparser.add_argument('-e', '--exclude', dest='patterns',
type=ExcludePattern, action='append',
help='Include condition')
2011-09-10 15:19:02 +00:00
subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
type=int, default=300, metavar='SECONDS',
help='Write checkpointe ever SECONDS seconds (Default: 300)')
subparser.add_argument('--do-not-cross-mountpoints', dest='dontcross',
action='store_true', default=False,
help='Do not cross mount points')
2012-02-29 22:59:17 +00:00
subparser.add_argument('--numeric-owner', dest='numeric_owner',
action='store_true', default=False,
help='Only store numeric user and group identifiers')
2010-10-15 18:35:49 +00:00
subparser.add_argument('archive', metavar='ARCHIVE',
type=location_validator(archive=True),
help='Archive to create')
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
default=['.'], help='Paths to add to archive')
2010-10-15 18:35:49 +00:00
2011-09-10 15:32:05 +00:00
subparser = subparsers.add_parser('extract', parents=[common_parser])
2010-10-15 18:35:49 +00:00
subparser.set_defaults(func=self.do_extract)
2010-11-02 21:47:39 +00:00
subparser.add_argument('-i', '--include', dest='patterns',
type=IncludePattern, action='append',
help='Include condition')
subparser.add_argument('-e', '--exclude', dest='patterns',
type=ExcludePattern, action='append',
help='Include condition')
2012-02-29 22:59:17 +00:00
subparser.add_argument('--numeric-owner', dest='numeric_owner',
action='store_true', default=False,
help='Only obey numeric user and group identifiers')
2010-10-15 18:35:49 +00:00
subparser.add_argument('archive', metavar='ARCHIVE',
type=location_validator(archive=True),
help='Archive to create')
subparser.add_argument('dest', metavar='DEST', type=str, nargs='?',
help='Where to extract files')
2011-09-10 15:32:05 +00:00
subparser = subparsers.add_parser('delete', parents=[common_parser])
2010-10-15 18:35:49 +00:00
subparser.set_defaults(func=self.do_delete)
subparser.add_argument('archive', metavar='ARCHIVE',
type=location_validator(archive=True),
help='Archive to delete')
2011-09-10 15:32:05 +00:00
subparser = subparsers.add_parser('list', parents=[common_parser])
2010-10-15 18:35:49 +00:00
subparser.set_defaults(func=self.do_list)
subparser.add_argument('src', metavar='SRC', type=location_validator(),
2013-06-20 10:44:58 +00:00
help='Repository/Archive to list contents of')
2010-10-15 18:35:49 +00:00
2011-09-12 19:34:09 +00:00
subparser = subparsers.add_parser('verify', parents=[common_parser])
2010-10-15 18:35:49 +00:00
subparser.set_defaults(func=self.do_verify)
subparser.add_argument('-i', '--include', dest='patterns',
type=IncludePattern, action='append',
help='Include condition')
subparser.add_argument('-e', '--exclude', dest='patterns',
type=ExcludePattern, action='append',
help='Include condition')
2010-10-15 18:35:49 +00:00
subparser.add_argument('archive', metavar='ARCHIVE',
type=location_validator(archive=True),
help='Archive to verity integrity of')
2011-09-10 15:32:05 +00:00
subparser = subparsers.add_parser('info', parents=[common_parser])
2010-10-15 18:35:49 +00:00
subparser.set_defaults(func=self.do_info)
subparser.add_argument('archive', metavar='ARCHIVE',
type=location_validator(archive=True),
help='Archive to display information about')
2011-11-22 20:47:17 +00:00
subparser = subparsers.add_parser('prune', parents=[common_parser])
subparser.set_defaults(func=self.do_prune)
subparser.add_argument('-H', '--hourly', dest='hourly', type=int, default=0,
help='Number of hourly archives to keep')
subparser.add_argument('-d', '--daily', dest='daily', type=int, default=0,
help='Number of daily archives to keep')
subparser.add_argument('-w', '--weekly', dest='weekly', type=int, default=0,
help='Number of daily archives to keep')
subparser.add_argument('-m', '--monthly', dest='monthly', type=int, default=0,
help='Number of monthly archives to keep')
subparser.add_argument('-y', '--yearly', dest='yearly', type=int, default=0,
help='Number of yearly archives to keep')
subparser.add_argument('-p', '--prefix', dest='prefix', type=str,
help='Only consider archive names starting with this prefix')
2013-06-20 10:44:58 +00:00
subparser.add_argument('repository', metavar='REPOSITORY',
type=location_validator(archive=False),
2013-06-20 10:44:58 +00:00
help='Repository to prune')
2013-06-26 19:20:31 +00:00
args = parser.parse_args(args or ['-h'])
if getattr(args, 'patterns', None):
adjust_patterns(args.patterns)
2010-10-30 11:44:25 +00:00
self.verbose = args.verbose
2010-10-16 09:45:36 +00:00
return args.func(args)
2010-03-06 17:25:35 +00:00
2011-10-29 15:01:07 +00:00
2010-02-20 17:23:46 +00:00
def main():
archiver = Archiver()
2012-12-09 22:06:33 +00:00
try:
2013-06-26 19:20:31 +00:00
exit_code = archiver.run(sys.argv[1:])
2013-06-20 10:44:58 +00:00
except Repository.DoesNotExist:
archiver.print_error('Error: Repository not found')
2012-12-09 22:06:33 +00:00
exit_code = 1
2013-06-20 10:44:58 +00:00
except Repository.AlreadyExists:
archiver.print_error('Error: Repository already exists')
2012-12-09 22:06:33 +00:00
exit_code = 1
2013-06-03 11:45:48 +00:00
except Archive.AlreadyExists as e:
2012-12-09 22:06:33 +00:00
archiver.print_error('Error: Archive "%s" already exists', e)
exit_code = 1
2013-06-03 11:45:48 +00:00
except Archive.DoesNotExist as e:
2012-12-09 22:06:33 +00:00
archiver.print_error('Error: Archive "%s" does not exist', e)
exit_code = 1
except ConnectionClosed:
archiver.print_error('Connection closed by remote host')
exit_code = 1
2012-12-09 22:06:33 +00:00
except KeyboardInterrupt:
archiver.print_error('Error: Keyboard interrupt')
exit_code = 1
else:
if exit_code:
archiver.print_error('Exiting with failure status due to previous errors')
sys.exit(exit_code)
2010-02-20 17:23:46 +00:00
if __name__ == '__main__':
2010-03-06 17:25:35 +00:00
main()