borg/attic/helpers.py

515 lines
15 KiB
Python
Raw Normal View History

2010-10-15 18:46:17 +00:00
import argparse
import binascii
import grp
import msgpack
2010-10-31 19:12:32 +00:00
import os
import pwd
import re
2010-10-31 19:12:32 +00:00
import stat
import sys
import time
from datetime import datetime, timezone, timedelta
from fnmatch import translate
from operator import attrgetter
import fcntl
import attic.hashindex
import attic.chunker
import attic.crypto
class Error(Exception):
"""Error base class"""
exit_code = 1
def get_message(self):
return 'Error: ' + type(self).__doc__.format(*self.args)
class ExtensionModuleError(Error):
"""The Attic binary extension modules does not seem to be properly installed"""
class UpgradableLock:
class LockUpgradeFailed(Error):
"""Failed to acquire write lock on {}"""
def __init__(self, path, exclusive=False):
self.path = path
try:
self.fd = open(path, 'r+')
except IOError:
self.fd = open(path, 'r')
if exclusive:
fcntl.lockf(self.fd, fcntl.LOCK_EX)
else:
fcntl.lockf(self.fd, fcntl.LOCK_SH)
self.is_exclusive = exclusive
def upgrade(self):
try:
fcntl.lockf(self.fd, fcntl.LOCK_EX)
except OSError as e:
raise self.LockUpgradeFailed(self.path)
self.is_exclusive = True
def release(self):
fcntl.lockf(self.fd, fcntl.LOCK_UN)
self.fd.close()
2011-10-27 20:17:47 +00:00
def check_extension_modules():
if (attic.hashindex.API_VERSION != 1 or
attic.chunker.API_VERSION != 1 or
attic.crypto.API_VERSION != 1):
raise ExtensionModuleError
class Manifest:
2013-06-03 11:45:48 +00:00
MANIFEST_ID = b'\0' * 32
def __init__(self, key, repository):
self.archives = {}
self.config = {}
self.key = key
self.repository = repository
@classmethod
def load(cls, repository, key=None):
from .key import key_factory
cdata = repository.get(cls.MANIFEST_ID)
if not key:
key = key_factory(repository, cdata)
manifest = cls(key, repository)
data = key.decrypt(None, cdata)
manifest.id = key.id_hash(data)
m = msgpack.unpackb(data)
2013-06-03 11:45:48 +00:00
if not m.get(b'version') == 1:
raise ValueError('Invalid manifest version')
2013-06-03 11:45:48 +00:00
manifest.archives = dict((k.decode('utf-8'), v) for k,v in m[b'archives'].items())
manifest.timestamp = m.get(b'timestamp')
if manifest.timestamp:
manifest.timestamp = manifest.timestamp.decode('ascii')
2013-06-03 11:45:48 +00:00
manifest.config = m[b'config']
return manifest, key
def write(self):
self.timestamp = datetime.utcnow().isoformat()
2014-02-18 22:09:12 +00:00
data = msgpack.packb(StableDict({
'version': 1,
'archives': self.archives,
'timestamp': self.timestamp,
'config': self.config,
2014-02-18 22:09:12 +00:00
}))
self.id = self.key.id_hash(data)
2013-06-20 10:44:58 +00:00
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
def prune_within(archives, within):
multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365}
try:
hours = int(within[:-1]) * multiplier[within[-1]]
except (KeyError, ValueError):
# I don't like how this displays the original exception too:
raise argparse.ArgumentTypeError('Unable to parse --within option: "%s"' % within)
if hours <= 0:
raise argparse.ArgumentTypeError('Number specified using --within option must be positive')
target = datetime.now(timezone.utc) - timedelta(seconds=hours*60*60)
return [a for a in archives if a.ts > target]
2011-11-22 20:47:17 +00:00
def prune_split(archives, pattern, n, skip=[]):
2014-02-03 04:45:53 +00:00
last = None
2011-08-12 06:49:01 +00:00
keep = []
2014-02-03 04:45:53 +00:00
if n == 0:
return keep
for a in sorted(archives, key=attrgetter('ts'), reverse=True):
period = a.ts.strftime(pattern)
if period != last:
last = period
if a not in skip:
keep.append(a)
if len(keep) == n: break
return keep
2011-08-11 19:18:13 +00:00
class Statistics:
def __init__(self):
self.osize = self.csize = self.usize = self.nfiles = 0
def update(self, size, csize, unique):
self.osize += size
self.csize += csize
if unique:
self.usize += csize
def print_(self, label, cache):
total_size, total_csize, unique_size, unique_csize = cache.chunks.summarize()
print()
2014-03-19 20:52:49 +00:00
print(' Original size Compressed size Deduplicated size')
print('%-15s %20s %20s %20s' % (label, format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize)))
2014-03-19 20:52:49 +00:00
print('All archives: %20s %20s %20s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize)))
2010-12-19 11:46:42 +00:00
2011-08-06 11:01:58 +00:00
def get_keys_dir():
2013-06-20 10:44:58 +00:00
"""Determine where to repository keys and cache"""
2013-07-08 21:38:27 +00:00
return os.environ.get('ATTIC_KEYS_DIR',
os.path.join(os.path.expanduser('~'), '.attic', 'keys'))
2011-08-06 11:01:58 +00:00
2011-08-06 11:01:58 +00:00
def get_cache_dir():
2013-06-20 10:44:58 +00:00
"""Determine where to repository keys and cache"""
2013-07-08 21:38:27 +00:00
return os.environ.get('ATTIC_CACHE_DIR',
os.path.join(os.path.expanduser('~'), '.cache', 'attic'))
2011-08-06 11:01:58 +00:00
def to_localtime(ts):
"""Convert datetime object from UTC to local time zone"""
2013-12-15 20:36:20 +00:00
return datetime(*time.localtime((ts - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds())[:6])
2010-12-19 11:46:42 +00:00
2014-02-08 17:44:48 +00:00
def update_excludes(args):
"""Merge exclude patterns from files with those on command line.
Empty lines and lines starting with '#' are ignored, but whitespace
is not stripped."""
if hasattr(args, 'exclude_files') and args.exclude_files:
if not hasattr(args, 'excludes') or args.excludes is None:
args.excludes = []
for file in args.exclude_files:
patterns = [line.rstrip('\r\n') for line in file if not line.startswith('#')]
args.excludes += [ExcludePattern(pattern) for pattern in patterns if pattern]
file.close()
2013-06-30 20:32:27 +00:00
def adjust_patterns(paths, excludes):
if paths:
return (excludes or []) + [IncludePattern(path) for path in paths] + [ExcludePattern('*')]
else:
return excludes
2010-11-02 21:47:39 +00:00
def exclude_path(path, patterns):
"""Used by create and extract sub-commands to determine
whether or not an item should be processed.
2010-11-02 21:47:39 +00:00
"""
for pattern in (patterns or []):
if pattern.match(path):
return isinstance(pattern, ExcludePattern)
return False
# For both IncludePattern and ExcludePattern, we require that
# the pattern either match the whole path or an initial segment
# of the path up to but not including a path separator. To
# unify the two cases, we add a path separator to the end of
# the path before matching.
class IncludePattern:
"""Literal files or directories listed on the command line
2014-02-07 23:10:52 +00:00
for some operations (e.g. extract, but not create).
If a directory is specified, all paths that start with that
path match as well. A trailing slash makes no difference.
2010-11-02 21:47:39 +00:00
"""
def __init__(self, pattern):
self.pattern = pattern.rstrip(os.path.sep)+os.path.sep
2010-11-02 21:47:39 +00:00
def match(self, path):
return (path+os.path.sep).startswith(self.pattern)
2010-11-02 21:47:39 +00:00
def __repr__(self):
return '%s(%s)' % (type(self), self.pattern)
class ExcludePattern(IncludePattern):
"""Shell glob patterns to exclude. A trailing slash means to
exclude the contents of a directory, but not the directory itself.
2010-11-02 21:47:39 +00:00
"""
2013-06-30 20:32:27 +00:00
def __init__(self, pattern):
if pattern.endswith(os.path.sep):
self.pattern = pattern+'*'+os.path.sep
else:
self.pattern = pattern+os.path.sep+'*'
# fnmatch and re.match both cache compiled regular expressions.
# Nevertheless, this is about 10 times faster.
self.regex = re.compile(translate(self.pattern))
2013-06-30 20:32:27 +00:00
def match(self, path):
return self.regex.match(path+os.path.sep) is not None
2013-06-30 20:32:27 +00:00
def __repr__(self):
return '%s(%s)' % (type(self), self.pattern)
2010-11-02 21:47:39 +00:00
def walk_path(path, skip_inodes=None):
2010-10-31 19:12:32 +00:00
st = os.lstat(path)
if skip_inodes and (st.st_ino, st.st_dev) in skip_inodes:
return
2010-10-31 19:12:32 +00:00
yield path, st
if stat.S_ISDIR(st.st_mode):
for f in os.listdir(path):
for x in walk_path(os.path.join(path, f), skip_inodes):
2010-10-31 19:12:32 +00:00
yield x
2010-10-15 18:46:17 +00:00
2010-10-27 17:30:21 +00:00
def format_time(t):
"""Format datetime suitable for fixed length list output
"""
if (datetime.now() - t).days < 365:
return t.strftime('%b %d %H:%M')
else:
return t.strftime('%b %d %Y')
2010-10-27 17:30:21 +00:00
def format_timedelta(td):
"""Format timedelta in a human friendly format
"""
# Since td.total_seconds() requires python 2.7
2011-10-31 20:18:28 +00:00
ts = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / float(10 ** 6)
s = ts % 60
m = int(ts / 60) % 60
h = int(ts / 3600) % 24
txt = '%.2f seconds' % s
if m:
txt = '%d minutes %s' % (m, txt)
if h:
txt = '%d hours %s' % (h, txt)
if td.days:
txt = '%d days %s' % (td.days, txt)
return txt
2010-10-27 17:30:21 +00:00
def format_file_mode(mod):
"""Format file mode bits for list output
2010-10-23 19:38:42 +00:00
"""
2010-10-27 17:30:21 +00:00
def x(v):
return ''.join(v & m and s or '-'
for m, s in ((4, 'r'), (2, 'w'), (1, 'x')))
2013-06-03 11:45:48 +00:00
return '%s%s%s' % (x(mod // 64), x(mod // 8), x(mod))
2010-10-27 17:30:21 +00:00
2011-10-31 20:18:28 +00:00
2010-10-27 17:30:21 +00:00
def format_file_size(v):
"""Format file size into a human friendly format
2010-10-23 19:38:42 +00:00
"""
if abs(v) > 10**12:
return '%.2f TB' % (v / 10**12)
elif abs(v) > 10**9:
return '%.2f GB' % (v / 10**9)
elif abs(v) > 10**6:
return '%.2f MB' % (v / 10**6)
elif abs(v) > 10**3:
return '%.2f kB' % (v / 10**3)
2010-10-27 17:30:21 +00:00
else:
return '%d B' % v
2010-10-23 19:38:42 +00:00
2011-10-31 20:18:28 +00:00
def format_archive(archive):
return '%-36s %s' % (archive.name, to_localtime(archive.ts).strftime('%c'))
class IntegrityError(Error):
"""Data integrity error"""
2010-10-23 19:38:42 +00:00
2011-10-31 20:18:28 +00:00
def memoize(function):
cache = {}
2011-10-31 20:18:28 +00:00
def decorated_function(*args):
try:
return cache[args]
except KeyError:
val = function(*args)
cache[args] = val
return val
return decorated_function
2010-10-15 18:46:17 +00:00
2011-10-31 20:18:28 +00:00
@memoize
def uid2user(uid):
try:
return pwd.getpwuid(uid).pw_name
except KeyError:
return None
2011-10-31 20:18:28 +00:00
@memoize
def user2uid(user):
try:
2012-03-03 13:02:22 +00:00
return user and pwd.getpwnam(user).pw_uid
except KeyError:
return None
2011-10-31 20:18:28 +00:00
@memoize
def gid2group(gid):
try:
return grp.getgrgid(gid).gr_name
except KeyError:
return None
2010-10-19 19:12:12 +00:00
2011-10-31 20:18:28 +00:00
@memoize
def group2gid(group):
try:
2012-03-03 13:02:22 +00:00
return group and grp.getgrnam(group).gr_gid
except KeyError:
return None
2010-10-27 17:30:21 +00:00
class Location:
2013-06-20 10:44:58 +00:00
"""Object representing a repository / archive location
"""
proto = user = host = port = path = archive = None
ssh_re = re.compile(r'(?P<proto>ssh)://(?:(?P<user>[^@]+)@)?'
r'(?P<host>[^:/#]+)(?::(?P<port>\d+))?'
r'(?P<path>[^:]+)(?:::(?P<archive>.+))?')
file_re = re.compile(r'(?P<proto>file)://'
r'(?P<path>[^:]+)(?:::(?P<archive>.+))?')
scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'
r'(?P<path>[^:]+)(?:::(?P<archive>.+))?')
2010-10-15 18:46:17 +00:00
def __init__(self, text):
2011-10-27 20:17:47 +00:00
self.orig = text
if not self.parse(text):
2010-10-15 18:46:17 +00:00
raise ValueError
def parse(self, text):
m = self.ssh_re.match(text)
if m:
self.proto = m.group('proto')
self.user = m.group('user')
self.host = m.group('host')
self.port = m.group('port') and int(m.group('port')) or None
self.path = m.group('path')
self.archive = m.group('archive')
return True
m = self.file_re.match(text)
if m:
self.proto = m.group('proto')
self.path = m.group('path')
self.archive = m.group('archive')
return True
m = self.scp_re.match(text)
if m:
self.user = m.group('user')
self.host = m.group('host')
self.path = m.group('path')
self.archive = m.group('archive')
self.proto = self.host and 'ssh' or 'file'
return True
return False
2010-10-15 18:46:17 +00:00
def __str__(self):
items = []
items.append('proto=%r' % self.proto)
items.append('user=%r' % self.user)
items.append('host=%r' % self.host)
items.append('port=%r' % self.port)
2012-07-31 12:43:32 +00:00
items.append('path=%r' % self.path)
items.append('archive=%r' % self.archive)
return ', '.join(items)
2010-10-15 18:46:17 +00:00
2011-08-04 13:27:52 +00:00
def to_key_filename(self):
name = re.sub('[^\w]', '_', self.path).strip('_')
if self.proto != 'file':
name = self.host + '__' + name
2011-08-06 11:01:58 +00:00
return os.path.join(get_keys_dir(), name)
2011-08-04 13:27:52 +00:00
2010-10-15 18:46:17 +00:00
def __repr__(self):
return "Location(%s)" % self
2010-10-15 18:46:17 +00:00
def location_validator(archive=None):
def validator(text):
try:
loc = Location(text)
except ValueError:
raise argparse.ArgumentTypeError('Invalid location format: "%s"' % text)
if archive is True and not loc.archive:
raise argparse.ArgumentTypeError('"%s": No archive specified' % text)
elif archive is False and loc.archive:
raise argparse.ArgumentTypeError('"%s" No archive can be specified' % text)
return loc
return validator
def read_msgpack(filename):
with open(filename, 'rb') as fd:
return msgpack.unpack(fd)
2012-07-31 12:43:32 +00:00
def write_msgpack(filename, d):
2012-07-31 12:43:32 +00:00
with open(filename + '.tmp', 'wb') as fd:
msgpack.pack(d, fd)
fd.flush()
os.fsync(fd)
2012-07-31 12:43:32 +00:00
os.rename(filename + '.tmp', filename)
2013-06-03 11:45:48 +00:00
def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
for key in keys:
if isinstance(d.get(key), bytes):
d[key] = d[key].decode(encoding, errors)
return d
def remove_surrogates(s, errors='replace'):
"""Replace surrogates generated by fsdecode with '?'
"""
2013-06-15 18:56:27 +00:00
return s.encode('utf-8', errors).decode('utf-8')
_safe_re = re.compile('^((..)?/+)+')
def make_path_safe(path):
"""Make path safe by making it relative and local
"""
return _safe_re.sub('', path) or '.'
def daemonize():
"""Detach process from controlling terminal and run in background
"""
pid = os.fork()
if pid:
os._exit(0)
os.setsid()
pid = os.fork()
if pid:
os._exit(0)
os.chdir('/')
os.close(0)
os.close(1)
os.close(2)
fd = os.open('/dev/null', os.O_RDWR)
os.dup2(fd, 0)
os.dup2(fd, 1)
os.dup2(fd, 2)
class StableDict(dict):
"""A dict subclass with stable items() ordering"""
def items(self):
return sorted(super(StableDict, self).items())
2013-06-15 18:56:27 +00:00
if sys.version < '3.3':
# st_mtime_ns attribute only available in 3.3+
2013-06-15 18:56:27 +00:00
def st_mtime_ns(st):
2013-07-29 11:57:43 +00:00
return int(st.st_mtime * 1e9)
# unhexlify in < 3.3 incorrectly only accepts bytes input
def unhexlify(data):
if isinstance(data, str):
data = data.encode('ascii')
return binascii.unhexlify(data)
2013-06-15 18:56:27 +00:00
else:
def st_mtime_ns(st):
return st.st_mtime_ns
unhexlify = binascii.unhexlify