1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-26 01:37:20 +00:00
borg/darc/helpers.py

405 lines
11 KiB
Python
Raw Normal View History

2011-06-23 20:47:51 +00:00
from __future__ import with_statement
2010-10-15 18:46:17 +00:00
import argparse
from datetime import datetime, timedelta
2010-11-02 21:47:39 +00:00
from fnmatch import fnmatchcase
2011-08-11 19:18:13 +00:00
from operator import attrgetter
import grp
2010-10-31 19:12:32 +00:00
import os
import pwd
import re
2010-10-31 19:12:32 +00:00
import stat
2010-12-19 11:46:42 +00:00
import struct
import sys
import time
import urllib
2011-08-12 06:49:01 +00:00
def purge_split(archives, pattern, n, reverse=False):
items = {}
keep = []
delete = []
for a in archives:
key = to_localtime(a.ts).strftime(pattern)
items.setdefault(key, [])
items[key].append(a)
for key, values in sorted(items.items(), reverse=reverse):
if n:
values.sort(key=attrgetter('ts'), reverse=reverse)
keep.append(values[0])
delete += values[1:]
n -= 1
else:
delete += values
return keep, delete
2011-08-11 19:18:13 +00:00
class Statistics(object):
def __init__(self):
self.osize = self.csize = self.usize = self.nfiles = 0
def update(self, size, csize, unique):
self.osize += size
self.csize += csize
if unique:
self.usize += csize
def print_(self):
print 'Number of files: %d' % self.nfiles
print 'Original size: %d (%s)' % (self.osize, format_file_size(self.osize))
print 'Compressed size: %s (%s)'% (self.csize, format_file_size(self.csize))
print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize))
# OSX filenames are UTF-8 Only so any non-utf8 filenames are url encoded
if sys.platform == 'darwin':
def encode_filename(name):
try:
name.decode('utf-8')
return name
except UnicodeDecodeError:
return urllib.quote(name)
else:
encode_filename = str
2010-12-19 11:46:42 +00:00
2011-08-02 19:45:21 +00:00
class Counter(object):
__slots__ = ('v',)
def __init__(self, value=0):
self.v = value
def inc(self, amount=1):
self.v += amount
def dec(self, amount=1):
self.v -= amount
def __cmp__(self, x):
return cmp(self.v, x)
def __repr__(self):
return '<Counter(%r)>' % self.v
2011-07-17 20:31:37 +00:00
2011-08-06 11:01:58 +00:00
def get_keys_dir():
"""Determine where to store keys and cache"""
return os.environ.get('DARC_KEYS_DIR',
os.path.join(os.path.expanduser('~'), '.darc', 'keys'))
2011-08-06 11:01:58 +00:00
def get_cache_dir():
"""Determine where to store keys and cache"""
return os.environ.get('DARC_CACHE_DIR',
os.path.join(os.path.expanduser('~'), '.darc', 'cache'))
2011-07-17 20:31:37 +00:00
def deferrable(f):
def wrapper(*args, **kw):
callback = kw.pop('callback', None)
if callback:
data = kw.pop('callback_data', None)
try:
2011-08-06 11:01:58 +00:00
res = f(*args, **kw)
2011-07-17 20:31:37 +00:00
except Exception, e:
callback(None, e, data)
2011-08-06 11:01:58 +00:00
else:
callback(res, None, data)
2011-07-17 20:31:37 +00:00
else:
return f(*args, **kw)
return wrapper
2011-07-17 20:31:37 +00:00
def error_callback(res, error, data):
if res:
raise res
def to_localtime(ts):
"""Convert datetime object from UTC to local time zone"""
return ts - timedelta(seconds=time.altzone)
2010-12-19 11:46:42 +00:00
2010-12-19 11:46:42 +00:00
def read_set(path):
"""Read set from disk (as int32s)
"""
with open(path, 'rb') as fd:
data = fd.read()
return set(struct.unpack('<%di' % (len(data) / 4), data))
def write_set(s, path):
"""Write set to disk (as int32s)
"""
with open(path, 'wb') as fd:
fd.write(struct.pack('<%di' % len(s), *s))
2010-10-31 19:12:32 +00:00
def encode_long(v):
bytes = []
while True:
if v > 0x7f:
bytes.append(0x80 | (v % 0x80))
v >>= 7
else:
bytes.append(v)
return ''.join(chr(x) for x in bytes)
def decode_long(bytes):
v = 0
base = 0
for x in bytes:
b = ord(x)
if b & 0x80:
v += (b & 0x7f) << base
base += 7
else:
return v + (b << base)
2010-11-02 21:47:39 +00:00
def exclude_path(path, patterns):
"""Used by create and extract sub-commands to determine
if an item should be processed or not
"""
for pattern in (patterns or []):
if pattern.match(path):
return isinstance(pattern, ExcludePattern)
return False
class IncludePattern(object):
"""--include PATTERN
>>> py = IncludePattern('*.py')
>>> foo = IncludePattern('/foo')
>>> py.match('/foo/foo.py')
True
>>> py.match('/bar/foo.java')
False
>>> foo.match('/foo/foo.py')
True
>>> foo.match('/bar/foo.java')
False
>>> foo.match('/foobar/foo.py')
False
2010-11-23 11:41:04 +00:00
>>> foo.match('/foo')
True
2010-11-02 21:47:39 +00:00
"""
def __init__(self, pattern):
self.pattern = self.dirpattern = pattern
if not pattern.endswith(os.path.sep):
self.dirpattern += os.path.sep
def match(self, path):
dir, name = os.path.split(path)
2010-11-23 11:41:04 +00:00
return (path == self.pattern
or (dir + os.path.sep).startswith(self.dirpattern)
or fnmatchcase(name, self.pattern))
2010-11-02 21:47:39 +00:00
def __repr__(self):
return '%s(%s)' % (type(self), self.pattern)
class ExcludePattern(IncludePattern):
"""
"""
def walk_path(path, skip_inodes=None):
2010-10-31 19:12:32 +00:00
st = os.lstat(path)
if skip_inodes and (st.st_ino, st.st_dev) in skip_inodes:
return
2010-10-31 19:12:32 +00:00
yield path, st
if stat.S_ISDIR(st.st_mode):
for f in os.listdir(path):
for x in walk_path(os.path.join(path, f), skip_inodes):
2010-10-31 19:12:32 +00:00
yield x
2010-10-15 18:46:17 +00:00
2010-10-27 17:30:21 +00:00
def format_time(t):
"""Format datetime suitable for fixed length list output
"""
if (datetime.now() - t).days < 365:
return t.strftime('%b %d %H:%M')
else:
return t.strftime('%b %d %Y')
2010-10-27 17:30:21 +00:00
def format_timedelta(td):
"""Format timedelta in a human friendly format
>>> from datetime import datetime
>>> t0 = datetime(2001, 1, 1, 10, 20, 3, 0)
>>> t1 = datetime(2001, 1, 1, 12, 20, 4, 100000)
>>> format_timedelta(t1 - t0)
'2 hours 1.10 seconds'
"""
# Since td.total_seconds() requires python 2.7
ts = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / float(10**6)
s = ts % 60
m = int(ts / 60) % 60
h = int(ts / 3600) % 24
txt = '%.2f seconds' % s
if m:
txt = '%d minutes %s' % (m, txt)
if h:
txt = '%d hours %s' % (h, txt)
if td.days:
txt = '%d days %s' % (td.days, txt)
return txt
2010-10-27 17:30:21 +00:00
def format_file_mode(mod):
"""Format file mode bits for list output
2010-10-23 19:38:42 +00:00
"""
2010-10-27 17:30:21 +00:00
def x(v):
return ''.join(v & m and s or '-'
for m, s in ((4, 'r'), (2, 'w'), (1, 'x')))
return '%s%s%s' % (x(mod / 64), x(mod / 8), x(mod))
def format_file_size(v):
"""Format file size into a human friendly format
2010-10-23 19:38:42 +00:00
"""
2010-10-27 17:30:21 +00:00
if v > 1024 * 1024 * 1024:
return '%.2f GB' % (v / 1024. / 1024. / 1024.)
elif v > 1024 * 1024:
return '%.2f MB' % (v / 1024. / 1024.)
elif v > 1024:
return '%.2f kB' % (v / 1024.)
else:
return str(v)
2010-10-23 19:38:42 +00:00
2010-10-27 17:30:21 +00:00
class IntegrityError(Exception):
"""
"""
2010-10-23 19:38:42 +00:00
def memoize(function):
cache = {}
def decorated_function(*args):
try:
return cache[args]
except KeyError:
val = function(*args)
cache[args] = val
return val
return decorated_function
2010-10-15 18:46:17 +00:00
@memoize
def uid2user(uid):
try:
return pwd.getpwuid(uid).pw_name
except KeyError:
return None
@memoize
def user2uid(user):
try:
return pwd.getpwnam(user).pw_uid
except KeyError:
return None
@memoize
def gid2group(gid):
try:
return grp.getgrgid(gid).gr_name
except KeyError:
return None
2010-10-19 19:12:12 +00:00
@memoize
def group2gid(group):
try:
return grp.getgrnam(group).gr_gid
except KeyError:
return None
2010-10-27 17:30:21 +00:00
2010-10-15 18:46:17 +00:00
class Location(object):
"""Object representing a store / archive location
>>> Location('ssh://user@host:1234/some/path::archive')
Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')
>>> Location('file:///some/path::archive')
Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')
>>> Location('user@host:/some/path::archive')
Location(proto='ssh', user='user', host='host', port=22, path='/some/path', archive='archive')
>>> Location('/some/path::archive')
Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')
"""
proto = user = host = port = path = archive = None
ssh_re = re.compile(r'(?P<proto>ssh)://(?:(?P<user>[^@]+)@)?'
r'(?P<host>[^:/#]+)(?::(?P<port>\d+))?'
r'(?P<path>[^:]*)(?:::(?P<archive>.+))?')
file_re = re.compile(r'(?P<proto>file)://'
r'(?P<path>[^:]*)(?:::(?P<archive>.+))?')
scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'
r'(?P<path>[^:]*)(?:::(?P<archive>.+))?')
2010-10-15 18:46:17 +00:00
def __init__(self, text):
if not self.parse(text):
2010-10-15 18:46:17 +00:00
raise ValueError
def parse(self, text):
m = self.ssh_re.match(text)
if m:
self.proto = m.group('proto')
self.user = m.group('user')
self.host = m.group('host')
self.port = m.group('port') and int(m.group('port')) or 22
self.path = m.group('path')
self.archive = m.group('archive')
return True
m = self.file_re.match(text)
if m:
self.proto = m.group('proto')
self.path = m.group('path')
self.archive = m.group('archive')
return True
m = self.scp_re.match(text)
if m:
self.user = m.group('user')
self.host = m.group('host')
self.path = m.group('path')
self.archive = m.group('archive')
self.proto = self.host and 'ssh' or 'file'
if self.proto == 'ssh':
self.port = 22
return True
return False
2010-10-15 18:46:17 +00:00
def __str__(self):
items = []
items.append('proto=%r' % self.proto)
items.append('user=%r' % self.user)
items.append('host=%r' % self.host)
items.append('port=%r' % self.port)
items.append('path=%r'% self.path)
items.append('archive=%r' % self.archive)
return ', '.join(items)
2010-10-15 18:46:17 +00:00
2011-08-04 13:27:52 +00:00
def to_key_filename(self):
name = re.sub('[^\w]', '_', self.path).strip('_')
if self.proto != 'file':
name = self.host + '__' + name
2011-08-06 11:01:58 +00:00
return os.path.join(get_keys_dir(), name)
2011-08-04 13:27:52 +00:00
2010-10-15 18:46:17 +00:00
def __repr__(self):
return "Location(%s)" % self
2010-10-15 18:46:17 +00:00
def location_validator(archive=None):
def validator(text):
try:
loc = Location(text)
except ValueError:
raise argparse.ArgumentTypeError('Invalid location format: "%s"' % text)
if archive is True and not loc.archive:
raise argparse.ArgumentTypeError('"%s": No archive specified' % text)
elif archive is False and loc.archive:
raise argparse.ArgumentTypeError('"%s" No archive can be specified' % text)
return loc
return validator