2011-06-23 20:47:51 +00:00
|
|
|
from __future__ import with_statement
|
2010-10-15 18:46:17 +00:00
|
|
|
import argparse
|
2011-06-16 19:55:54 +00:00
|
|
|
from datetime import datetime, timedelta
|
2010-11-02 21:47:39 +00:00
|
|
|
from fnmatch import fnmatchcase
|
2011-08-11 19:18:13 +00:00
|
|
|
from operator import attrgetter
|
2010-10-19 20:03:47 +00:00
|
|
|
import grp
|
2011-09-04 21:02:47 +00:00
|
|
|
import msgpack
|
2010-10-31 19:12:32 +00:00
|
|
|
import os
|
2010-10-19 20:03:47 +00:00
|
|
|
import pwd
|
2010-10-21 19:21:43 +00:00
|
|
|
import re
|
2010-10-31 19:12:32 +00:00
|
|
|
import stat
|
2011-08-07 10:37:17 +00:00
|
|
|
import sys
|
2011-06-16 19:55:54 +00:00
|
|
|
import time
|
2011-08-07 10:37:17 +00:00
|
|
|
import urllib
|
|
|
|
|
2011-10-27 20:17:47 +00:00
|
|
|
|
2011-09-04 21:02:47 +00:00
|
|
|
class Manifest(object):
|
|
|
|
|
|
|
|
MANIFEST_ID = '\0' * 32
|
|
|
|
|
|
|
|
def __init__(self, store, key, dont_load=False):
|
|
|
|
self.store = store
|
|
|
|
self.key = key
|
|
|
|
self.archives = {}
|
|
|
|
self.config = {}
|
|
|
|
if not dont_load:
|
|
|
|
self.load()
|
|
|
|
|
|
|
|
def load(self):
|
|
|
|
data = self.key.decrypt(None, self.store.get(self.MANIFEST_ID))
|
|
|
|
self.id = self.key.id_hash(data)
|
|
|
|
manifest = msgpack.unpackb(data)
|
|
|
|
if not manifest.get('version') == 1:
|
|
|
|
raise ValueError('Invalid manifest version')
|
|
|
|
self.archives = manifest['archives']
|
|
|
|
self.config = manifest['config']
|
|
|
|
self.key.post_manifest_load(self.config)
|
|
|
|
|
|
|
|
def write(self):
|
|
|
|
self.key.pre_manifest_write(self)
|
|
|
|
data = msgpack.packb({
|
|
|
|
'version': 1,
|
|
|
|
'archives': self.archives,
|
|
|
|
'config': self.config,
|
|
|
|
})
|
|
|
|
self.id = self.key.id_hash(data)
|
|
|
|
self.store.put(self.MANIFEST_ID, self.key.encrypt(data))
|
|
|
|
|
2011-08-07 12:04:14 +00:00
|
|
|
|
2011-11-22 20:47:17 +00:00
|
|
|
def prune_split(archives, pattern, n, skip=[]):
|
2011-08-12 06:49:01 +00:00
|
|
|
items = {}
|
|
|
|
keep = []
|
|
|
|
for a in archives:
|
|
|
|
key = to_localtime(a.ts).strftime(pattern)
|
|
|
|
items.setdefault(key, [])
|
|
|
|
items[key].append(a)
|
2011-08-16 20:02:42 +00:00
|
|
|
for key, values in sorted(items.items(), reverse=True):
|
|
|
|
if n and values[0] not in skip:
|
|
|
|
values.sort(key=attrgetter('ts'), reverse=True)
|
2011-08-12 06:49:01 +00:00
|
|
|
keep.append(values[0])
|
|
|
|
n -= 1
|
2011-08-16 20:02:42 +00:00
|
|
|
return keep
|
2011-08-11 19:18:13 +00:00
|
|
|
|
|
|
|
|
2011-08-07 15:10:21 +00:00
|
|
|
class Statistics(object):
|
|
|
|
|
|
|
|
def __init__(self):
|
2011-08-07 18:00:18 +00:00
|
|
|
self.osize = self.csize = self.usize = self.nfiles = 0
|
2011-08-07 15:10:21 +00:00
|
|
|
|
|
|
|
def update(self, size, csize, unique):
|
|
|
|
self.osize += size
|
|
|
|
self.csize += csize
|
|
|
|
if unique:
|
|
|
|
self.usize += csize
|
|
|
|
|
|
|
|
def print_(self):
|
2011-08-07 18:00:18 +00:00
|
|
|
print 'Number of files: %d' % self.nfiles
|
2011-08-07 15:10:21 +00:00
|
|
|
print 'Original size: %d (%s)' % (self.osize, format_file_size(self.osize))
|
2011-10-31 20:18:28 +00:00
|
|
|
print 'Compressed size: %s (%s)' % (self.csize, format_file_size(self.csize))
|
2011-08-07 15:10:21 +00:00
|
|
|
print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize))
|
|
|
|
|
2011-08-07 12:04:14 +00:00
|
|
|
|
2011-08-07 10:37:17 +00:00
|
|
|
# OSX filenames are UTF-8 Only so any non-utf8 filenames are url encoded
|
|
|
|
if sys.platform == 'darwin':
|
|
|
|
def encode_filename(name):
|
|
|
|
try:
|
2012-10-17 09:40:23 +00:00
|
|
|
return name.decode('utf-8')
|
2011-08-07 10:37:17 +00:00
|
|
|
except UnicodeDecodeError:
|
|
|
|
return urllib.quote(name)
|
|
|
|
else:
|
|
|
|
encode_filename = str
|
2010-12-19 11:46:42 +00:00
|
|
|
|
2011-08-07 12:04:14 +00:00
|
|
|
|
2011-08-06 11:01:58 +00:00
|
|
|
def get_keys_dir():
|
|
|
|
"""Determine where to store keys and cache"""
|
|
|
|
return os.environ.get('DARC_KEYS_DIR',
|
|
|
|
os.path.join(os.path.expanduser('~'), '.darc', 'keys'))
|
|
|
|
|
2011-08-07 12:04:14 +00:00
|
|
|
|
2011-08-06 11:01:58 +00:00
|
|
|
def get_cache_dir():
|
|
|
|
"""Determine where to store keys and cache"""
|
|
|
|
return os.environ.get('DARC_CACHE_DIR',
|
|
|
|
os.path.join(os.path.expanduser('~'), '.darc', 'cache'))
|
|
|
|
|
|
|
|
|
2011-06-16 19:55:54 +00:00
|
|
|
def to_localtime(ts):
|
|
|
|
"""Convert datetime object from UTC to local time zone"""
|
|
|
|
return ts - timedelta(seconds=time.altzone)
|
2010-12-19 11:46:42 +00:00
|
|
|
|
2011-08-07 12:04:14 +00:00
|
|
|
|
2012-10-17 09:40:23 +00:00
|
|
|
def adjust_patterns(patterns):
|
2012-10-17 10:27:45 +00:00
|
|
|
if patterns and not isinstance(patterns[-1], ExcludePattern):
|
2012-10-17 09:40:23 +00:00
|
|
|
patterns.append(ExcludePattern('*'))
|
|
|
|
|
|
|
|
|
2010-11-02 21:47:39 +00:00
|
|
|
def exclude_path(path, patterns):
|
|
|
|
"""Used by create and extract sub-commands to determine
|
|
|
|
if an item should be processed or not
|
|
|
|
"""
|
|
|
|
for pattern in (patterns or []):
|
|
|
|
if pattern.match(path):
|
|
|
|
return isinstance(pattern, ExcludePattern)
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
class IncludePattern(object):
|
|
|
|
"""--include PATTERN
|
|
|
|
|
|
|
|
>>> py = IncludePattern('*.py')
|
|
|
|
>>> foo = IncludePattern('/foo')
|
|
|
|
>>> py.match('/foo/foo.py')
|
|
|
|
True
|
|
|
|
>>> py.match('/bar/foo.java')
|
|
|
|
False
|
|
|
|
>>> foo.match('/foo/foo.py')
|
|
|
|
True
|
|
|
|
>>> foo.match('/bar/foo.java')
|
|
|
|
False
|
|
|
|
>>> foo.match('/foobar/foo.py')
|
|
|
|
False
|
2010-11-23 11:41:04 +00:00
|
|
|
>>> foo.match('/foo')
|
|
|
|
True
|
2010-11-02 21:47:39 +00:00
|
|
|
"""
|
|
|
|
def __init__(self, pattern):
|
|
|
|
self.pattern = self.dirpattern = pattern
|
|
|
|
if not pattern.endswith(os.path.sep):
|
|
|
|
self.dirpattern += os.path.sep
|
|
|
|
|
|
|
|
def match(self, path):
|
|
|
|
dir, name = os.path.split(path)
|
2010-11-23 11:41:04 +00:00
|
|
|
return (path == self.pattern
|
|
|
|
or (dir + os.path.sep).startswith(self.dirpattern)
|
|
|
|
or fnmatchcase(name, self.pattern))
|
2010-11-02 21:47:39 +00:00
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return '%s(%s)' % (type(self), self.pattern)
|
|
|
|
|
|
|
|
|
|
|
|
class ExcludePattern(IncludePattern):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2010-11-09 19:49:21 +00:00
|
|
|
def walk_path(path, skip_inodes=None):
|
2010-10-31 19:12:32 +00:00
|
|
|
st = os.lstat(path)
|
2010-11-09 19:49:21 +00:00
|
|
|
if skip_inodes and (st.st_ino, st.st_dev) in skip_inodes:
|
|
|
|
return
|
2010-10-31 19:12:32 +00:00
|
|
|
yield path, st
|
|
|
|
if stat.S_ISDIR(st.st_mode):
|
|
|
|
for f in os.listdir(path):
|
2010-11-09 19:49:21 +00:00
|
|
|
for x in walk_path(os.path.join(path, f), skip_inodes):
|
2010-10-31 19:12:32 +00:00
|
|
|
yield x
|
2010-10-21 19:21:43 +00:00
|
|
|
|
2010-10-15 18:46:17 +00:00
|
|
|
|
2010-10-27 17:30:21 +00:00
|
|
|
def format_time(t):
|
|
|
|
"""Format datetime suitable for fixed length list output
|
|
|
|
"""
|
|
|
|
if (datetime.now() - t).days < 365:
|
|
|
|
return t.strftime('%b %d %H:%M')
|
|
|
|
else:
|
2010-10-27 18:40:48 +00:00
|
|
|
return t.strftime('%b %d %Y')
|
2010-10-27 17:30:21 +00:00
|
|
|
|
|
|
|
|
2011-08-07 15:10:21 +00:00
|
|
|
def format_timedelta(td):
|
2011-08-08 18:55:09 +00:00
|
|
|
"""Format timedelta in a human friendly format
|
|
|
|
|
|
|
|
>>> from datetime import datetime
|
|
|
|
>>> t0 = datetime(2001, 1, 1, 10, 20, 3, 0)
|
|
|
|
>>> t1 = datetime(2001, 1, 1, 12, 20, 4, 100000)
|
|
|
|
>>> format_timedelta(t1 - t0)
|
|
|
|
'2 hours 1.10 seconds'
|
|
|
|
"""
|
|
|
|
# Since td.total_seconds() requires python 2.7
|
2011-10-31 20:18:28 +00:00
|
|
|
ts = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / float(10 ** 6)
|
2011-08-07 15:10:21 +00:00
|
|
|
s = ts % 60
|
|
|
|
m = int(ts / 60) % 60
|
|
|
|
h = int(ts / 3600) % 24
|
|
|
|
txt = '%.2f seconds' % s
|
|
|
|
if m:
|
|
|
|
txt = '%d minutes %s' % (m, txt)
|
|
|
|
if h:
|
|
|
|
txt = '%d hours %s' % (h, txt)
|
|
|
|
if td.days:
|
|
|
|
txt = '%d days %s' % (td.days, txt)
|
|
|
|
return txt
|
|
|
|
|
|
|
|
|
2010-10-27 17:30:21 +00:00
|
|
|
def format_file_mode(mod):
|
|
|
|
"""Format file mode bits for list output
|
2010-10-23 19:38:42 +00:00
|
|
|
"""
|
2010-10-27 17:30:21 +00:00
|
|
|
def x(v):
|
|
|
|
return ''.join(v & m and s or '-'
|
|
|
|
for m, s in ((4, 'r'), (2, 'w'), (1, 'x')))
|
|
|
|
return '%s%s%s' % (x(mod / 64), x(mod / 8), x(mod))
|
|
|
|
|
2011-10-31 20:18:28 +00:00
|
|
|
|
2010-10-27 17:30:21 +00:00
|
|
|
def format_file_size(v):
|
|
|
|
"""Format file size into a human friendly format
|
2010-10-23 19:38:42 +00:00
|
|
|
"""
|
2010-10-27 17:30:21 +00:00
|
|
|
if v > 1024 * 1024 * 1024:
|
|
|
|
return '%.2f GB' % (v / 1024. / 1024. / 1024.)
|
|
|
|
elif v > 1024 * 1024:
|
|
|
|
return '%.2f MB' % (v / 1024. / 1024.)
|
|
|
|
elif v > 1024:
|
|
|
|
return '%.2f kB' % (v / 1024.)
|
|
|
|
else:
|
2011-08-15 20:32:26 +00:00
|
|
|
return '%d B' % v
|
2010-10-23 19:38:42 +00:00
|
|
|
|
2011-10-31 20:18:28 +00:00
|
|
|
|
2010-10-27 17:30:21 +00:00
|
|
|
class IntegrityError(Exception):
|
|
|
|
"""
|
|
|
|
"""
|
2010-10-23 19:38:42 +00:00
|
|
|
|
2011-10-31 20:18:28 +00:00
|
|
|
|
2010-10-19 20:03:47 +00:00
|
|
|
def memoize(function):
|
|
|
|
cache = {}
|
2011-10-31 20:18:28 +00:00
|
|
|
|
2010-10-19 20:03:47 +00:00
|
|
|
def decorated_function(*args):
|
|
|
|
try:
|
|
|
|
return cache[args]
|
|
|
|
except KeyError:
|
|
|
|
val = function(*args)
|
|
|
|
cache[args] = val
|
|
|
|
return val
|
|
|
|
return decorated_function
|
2010-10-15 18:46:17 +00:00
|
|
|
|
2011-10-31 20:18:28 +00:00
|
|
|
|
2010-10-19 20:03:47 +00:00
|
|
|
@memoize
|
|
|
|
def uid2user(uid):
|
|
|
|
try:
|
|
|
|
return pwd.getpwuid(uid).pw_name
|
|
|
|
except KeyError:
|
|
|
|
return None
|
|
|
|
|
2011-10-31 20:18:28 +00:00
|
|
|
|
2010-10-19 20:03:47 +00:00
|
|
|
@memoize
|
|
|
|
def user2uid(user):
|
|
|
|
try:
|
2012-03-03 13:02:22 +00:00
|
|
|
return user and pwd.getpwnam(user).pw_uid
|
2010-10-19 20:03:47 +00:00
|
|
|
except KeyError:
|
|
|
|
return None
|
|
|
|
|
2011-10-31 20:18:28 +00:00
|
|
|
|
2010-10-19 20:03:47 +00:00
|
|
|
@memoize
|
|
|
|
def gid2group(gid):
|
|
|
|
try:
|
|
|
|
return grp.getgrgid(gid).gr_name
|
|
|
|
except KeyError:
|
|
|
|
return None
|
2010-10-19 19:12:12 +00:00
|
|
|
|
2011-10-31 20:18:28 +00:00
|
|
|
|
2010-10-19 20:03:47 +00:00
|
|
|
@memoize
|
|
|
|
def group2gid(group):
|
|
|
|
try:
|
2012-03-03 13:02:22 +00:00
|
|
|
return group and grp.getgrnam(group).gr_gid
|
2010-10-19 20:03:47 +00:00
|
|
|
except KeyError:
|
|
|
|
return None
|
|
|
|
|
2010-10-27 17:30:21 +00:00
|
|
|
|
2010-10-15 18:46:17 +00:00
|
|
|
class Location(object):
|
2010-11-17 20:28:13 +00:00
|
|
|
"""Object representing a store / archive location
|
|
|
|
|
|
|
|
>>> Location('ssh://user@host:1234/some/path::archive')
|
|
|
|
Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')
|
|
|
|
>>> Location('file:///some/path::archive')
|
|
|
|
Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')
|
|
|
|
>>> Location('user@host:/some/path::archive')
|
|
|
|
Location(proto='ssh', user='user', host='host', port=22, path='/some/path', archive='archive')
|
|
|
|
>>> Location('/some/path::archive')
|
|
|
|
Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')
|
|
|
|
"""
|
|
|
|
proto = user = host = port = path = archive = None
|
|
|
|
ssh_re = re.compile(r'(?P<proto>ssh)://(?:(?P<user>[^@]+)@)?'
|
|
|
|
r'(?P<host>[^:/#]+)(?::(?P<port>\d+))?'
|
|
|
|
r'(?P<path>[^:]*)(?:::(?P<archive>.+))?')
|
|
|
|
file_re = re.compile(r'(?P<proto>file)://'
|
|
|
|
r'(?P<path>[^:]*)(?:::(?P<archive>.+))?')
|
|
|
|
scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'
|
|
|
|
r'(?P<path>[^:]*)(?:::(?P<archive>.+))?')
|
2010-10-15 18:46:17 +00:00
|
|
|
|
|
|
|
def __init__(self, text):
|
2011-10-27 20:17:47 +00:00
|
|
|
self.orig = text
|
2010-11-17 20:28:13 +00:00
|
|
|
if not self.parse(text):
|
2010-10-15 18:46:17 +00:00
|
|
|
raise ValueError
|
2010-11-17 20:28:13 +00:00
|
|
|
|
|
|
|
def parse(self, text):
|
|
|
|
m = self.ssh_re.match(text)
|
|
|
|
if m:
|
|
|
|
self.proto = m.group('proto')
|
|
|
|
self.user = m.group('user')
|
|
|
|
self.host = m.group('host')
|
|
|
|
self.port = m.group('port') and int(m.group('port')) or 22
|
|
|
|
self.path = m.group('path')
|
|
|
|
self.archive = m.group('archive')
|
|
|
|
return True
|
|
|
|
m = self.file_re.match(text)
|
|
|
|
if m:
|
|
|
|
self.proto = m.group('proto')
|
|
|
|
self.path = m.group('path')
|
|
|
|
self.archive = m.group('archive')
|
|
|
|
return True
|
|
|
|
m = self.scp_re.match(text)
|
|
|
|
if m:
|
|
|
|
self.user = m.group('user')
|
|
|
|
self.host = m.group('host')
|
|
|
|
self.path = m.group('path')
|
|
|
|
self.archive = m.group('archive')
|
|
|
|
self.proto = self.host and 'ssh' or 'file'
|
|
|
|
if self.proto == 'ssh':
|
|
|
|
self.port = 22
|
|
|
|
return True
|
|
|
|
return False
|
2010-10-15 18:46:17 +00:00
|
|
|
|
|
|
|
def __str__(self):
|
2010-11-17 20:28:13 +00:00
|
|
|
items = []
|
|
|
|
items.append('proto=%r' % self.proto)
|
|
|
|
items.append('user=%r' % self.user)
|
|
|
|
items.append('host=%r' % self.host)
|
|
|
|
items.append('port=%r' % self.port)
|
2012-07-31 12:43:32 +00:00
|
|
|
items.append('path=%r' % self.path)
|
2010-11-17 20:28:13 +00:00
|
|
|
items.append('archive=%r' % self.archive)
|
|
|
|
return ', '.join(items)
|
2010-10-15 18:46:17 +00:00
|
|
|
|
2011-08-04 13:27:52 +00:00
|
|
|
def to_key_filename(self):
|
|
|
|
name = re.sub('[^\w]', '_', self.path).strip('_')
|
|
|
|
if self.proto != 'file':
|
|
|
|
name = self.host + '__' + name
|
2011-08-06 11:01:58 +00:00
|
|
|
return os.path.join(get_keys_dir(), name)
|
2011-08-04 13:27:52 +00:00
|
|
|
|
2010-10-15 18:46:17 +00:00
|
|
|
def __repr__(self):
|
2010-11-17 20:28:13 +00:00
|
|
|
return "Location(%s)" % self
|
2010-10-15 18:46:17 +00:00
|
|
|
|
|
|
|
|
|
|
|
def location_validator(archive=None):
|
|
|
|
def validator(text):
|
|
|
|
try:
|
|
|
|
loc = Location(text)
|
|
|
|
except ValueError:
|
|
|
|
raise argparse.ArgumentTypeError('Invalid location format: "%s"' % text)
|
|
|
|
if archive is True and not loc.archive:
|
|
|
|
raise argparse.ArgumentTypeError('"%s": No archive specified' % text)
|
|
|
|
elif archive is False and loc.archive:
|
|
|
|
raise argparse.ArgumentTypeError('"%s" No archive can be specified' % text)
|
|
|
|
return loc
|
|
|
|
return validator
|
|
|
|
|
|
|
|
|
2011-09-04 21:02:47 +00:00
|
|
|
def read_msgpack(filename):
|
|
|
|
with open(filename, 'rb') as fd:
|
|
|
|
return msgpack.unpack(fd)
|
|
|
|
|
2012-07-31 12:43:32 +00:00
|
|
|
|
2011-09-04 21:02:47 +00:00
|
|
|
def write_msgpack(filename, d):
|
2012-07-31 12:43:32 +00:00
|
|
|
with open(filename + '.tmp', 'wb') as fd:
|
2011-09-04 21:02:47 +00:00
|
|
|
msgpack.pack(d, fd)
|
|
|
|
fd.flush()
|
|
|
|
os.fsync(fd)
|
2012-07-31 12:43:32 +00:00
|
|
|
os.rename(filename + '.tmp', filename)
|