Switch to pure AES256 encryption and improved metadata storage

This commit is contained in:
Jonas Borgström 2011-07-30 21:13:48 +02:00
parent d130754a10
commit b294ceba67
10 changed files with 401 additions and 449 deletions

View File

@ -2,13 +2,3 @@
NS_CHUNK = 0
NS_ARCHIVE_METADATA = 1
NS_ARCHIVE_CHUNKS = 2
NS_ARCHIVE_ITEMS = 3
PACKET_ENCRYPT_READ = 2 ** 7
PACKET_ENCRYPT_CREATE = 2 ** 6
PACKET_CHUNK = 1 | PACKET_ENCRYPT_READ
PACKET_ARCHIVE_METADATA = 2 | PACKET_ENCRYPT_READ
PACKET_ARCHIVE_ITEMS = 3 | PACKET_ENCRYPT_READ
PACKET_ARCHIVE_CHUNKS = 1 | PACKET_ENCRYPT_CREATE

View File

@ -6,11 +6,10 @@ import os
import socket
import stat
import sys
from itertools import izip
from os.path import dirname
from xattr import xattr, XATTR_NOFOLLOW
from . import NS_ARCHIVE_METADATA, NS_ARCHIVE_ITEMS, NS_ARCHIVE_CHUNKS, NS_CHUNK, \
PACKET_ARCHIVE_METADATA, PACKET_ARCHIVE_ITEMS, PACKET_ARCHIVE_CHUNKS, PACKET_CHUNK
from . import NS_ARCHIVE_METADATA, NS_CHUNK
from ._speedups import chunkify
from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError
@ -26,22 +25,24 @@ class Archive(object):
class DoesNotExist(Exception):
pass
def __init__(self, store, keychain, name=None):
self.keychain = keychain
def __init__(self, store, key, name=None, cache=None):
self.key = key
self.store = store
self.items = []
self.cache = cache
self.items = ''
self.items_refs = []
self.items_prefix = ''
self.items_ids = []
self.hard_links = {}
if name:
self.load(self.keychain.id_hash(name))
self.load(self.key.archive_hash(name))
def load(self, id):
self.id = id
try:
kind, data, self.hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_METADATA, self.id))
data, self.hash = self.key.decrypt(self.store.get(NS_ARCHIVE_METADATA, self.id))
except self.store.DoesNotExist:
raise self.DoesNotExist
assert kind == PACKET_ARCHIVE_METADATA
self.metadata = msgpack.unpackb(data)
assert self.metadata['version'] == 1
@ -51,80 +52,90 @@ class Archive(object):
t, f = self.metadata['time'].split('.', 1)
return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') + timedelta(seconds=float('.' + f))
def get_chunks(self):
for id in self.metadata['chunks_ids']:
magic, data, hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, id))
assert magic == PACKET_ARCHIVE_CHUNKS
assert hash == id
chunks = msgpack.unpackb(data)
for chunk in chunks:
yield chunk
def get_items(self):
for id in self.metadata['items_ids']:
magic, data, items_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_ITEMS, id))
assert magic == PACKET_ARCHIVE_ITEMS
assert items_hash == id
items = msgpack.unpackb(data)
for item in items:
unpacker = msgpack.Unpacker()
for id, size, csize in self.metadata['items']:
data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
assert self.key.id_hash(data) == id
unpacker.feed(data)
for item in unpacker:
yield item
def add_item(self, item):
self.items.append(item)
if len(self.items) > 100000:
def add_item(self, item, refs=None):
data = msgpack.packb(item)
prefix = dirname(item['path'])
if self.items_prefix and self.items_prefix != prefix:
self.flush_items()
if refs:
self.items_refs += refs
self.items += data
self.items_prefix = prefix
def flush_items(self):
data, hash = self.keychain.encrypt(PACKET_ARCHIVE_ITEMS, msgpack.packb(self.items))
self.store.put(NS_ARCHIVE_ITEMS, hash, data)
self.items_ids.append(hash)
self.items = []
def save_chunks(self, cache):
chunks = []
ids = []
def flush(chunks):
data, hash = self.keychain.encrypt(PACKET_ARCHIVE_CHUNKS, msgpack.packb(chunks))
self.store.put(NS_ARCHIVE_CHUNKS, hash, data)
ids.append(hash)
for id, (count, size) in cache.chunks.iteritems():
if count > 1000000:
chunks.append((id, size))
if len(chunks) > 100000:
flush(chunks)
chunks = []
flush(chunks)
return ids
if not self.items:
return
id = self.key.id_hash(self.items)
if self.cache.seen_chunk(id):
self.items_ids.append(self.cache.chunk_incref(id))
for id in self.items_refs:
self.cache.chunk_decref(id)
else:
self.items_ids.append(self.cache.add_chunk(id, self.items))
self.items = ''
self.items_refs = []
self.items_prefix = ''
def save(self, name, cache):
self.id = self.keychain.id_hash(name)
chunks_ids = self.save_chunks(cache)
self.id = self.key.archive_hash(name)
self.flush_items()
metadata = {
'version': 1,
'name': name,
'chunks_ids': chunks_ids,
'items_ids': self.items_ids,
'items': self.items_ids,
'cmdline': sys.argv,
'hostname': socket.gethostname(),
'username': getuser(),
'time': datetime.utcnow().isoformat(),
}
data, self.hash = self.keychain.encrypt(PACKET_ARCHIVE_METADATA, msgpack.packb(metadata))
data, self.hash = self.key.encrypt(msgpack.packb(metadata))
self.store.put(NS_ARCHIVE_METADATA, self.id, data)
self.store.commit()
cache.commit()
def stats(self, cache):
osize = csize = usize = 0
def get_chunks(self):
for item in self.get_items():
if stat.S_ISREG(item['mode']) and not 'source' in item:
osize += item['size']
for id, size in self.get_chunks():
csize += size
if cache.seen_chunk(id) == 1:
usize += size
return osize, csize, usize
try:
for chunk in item['chunks']:
yield chunk
except KeyError:
pass
def stats(self, cache):
# This function is a bit evil since it abuses the cache to calculate
# the stats. The cache transaction must be rolled back afterwards
unpacker = msgpack.Unpacker()
cache.begin_txn()
osize = zsize = usize = 0
for id, size, csize in self.metadata['items']:
osize += size
zsize += csize
unique = self.cache.seen_chunk(id) == 1
if unique:
usize += csize
data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
assert self.key.id_hash(data) == id
unpacker.feed(data)
for item in unpacker:
try:
for id, size, csize in item['chunks']:
osize += size
zsize += csize
if unique and self.cache.seen_chunk(id) == 1:
usize += csize
except KeyError:
pass
cache.rollback()
return osize, zsize, usize
def extract_item(self, item, dest=None, start_cb=None):
dest = dest or os.getcwdu()
@ -163,14 +174,13 @@ class Archive(object):
if i==0:
start_cb(item)
assert not error
magic, data, hash = self.keychain.decrypt(chunk)
assert magic == PACKET_CHUNK
if self.keychain.id_hash(data) != id:
data, hash = self.key.decrypt(chunk)
if self.key.id_hash(data) != id:
raise IntegrityError('chunk hash did not match')
fd.write(data)
if last:
self.restore_attrs(path, item)
fd.close()
self.restore_attrs(path, item)
fd = open(path, 'wb')
n = len(item['chunks'])
@ -179,7 +189,7 @@ class Archive(object):
self.restore_attrs(path, item)
fd.close()
else:
for i, id in enumerate(item['chunks']):
for i, (id, size, csize) in enumerate(item['chunks']):
self.store.get(NS_CHUNK, id, callback=extract_cb, callback_data=(id, i, i==n-1))
else:
@ -206,16 +216,15 @@ class Archive(object):
pass
if not symlink:
# FIXME: We should really call futimes here (c extension required)
os.utime(path, (item['atime'], item['mtime']))
os.utime(path, (item['mtime'], item['mtime']))
def verify_file(self, item, start, result):
def verify_chunk(chunk, error, (id, i, last)):
if i == 0:
start(item)
assert not error
magic, data, hash = self.keychain.decrypt(chunk)
assert magic == PACKET_CHUNK
if self.keychain.id_hash(data) != id:
data, hash = self.key.decrypt(chunk)
if self.key.id_hash(data) != id:
result(item, False)
elif last:
result(item, True)
@ -224,17 +233,24 @@ class Archive(object):
start(item)
result(item, True)
else:
for i, id in enumerate(item['chunks']):
for i, (id, size, csize) in enumerate(item['chunks']):
self.store.get(NS_CHUNK, id, callback=verify_chunk, callback_data=(id, i, i==n-1))
def delete(self, cache):
for id, size in self.get_chunks():
cache.chunk_decref(id)
unpacker = msgpack.Unpacker()
for id, size, csize in self.metadata['items']:
if self.cache.seen_chunk(id) == 1:
data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
assert self.key.id_hash(data) == id
unpacker.feed(data)
for item in unpacker:
try:
for chunk_id, size, csize in item['chunks']:
self.cache.chunk_decref(chunk_id)
except KeyError:
pass
self.cache.chunk_decref(id)
self.store.delete(NS_ARCHIVE_METADATA, self.id)
for id in self.metadata['chunks_ids']:
self.store.delete(NS_ARCHIVE_CHUNKS, id)
for id in self.metadata['items_ids']:
self.store.delete(NS_ARCHIVE_ITEMS, id)
self.store.commit()
cache.commit()
@ -243,7 +259,7 @@ class Archive(object):
'mode': st.st_mode,
'uid': st.st_uid, 'user': uid2user(st.st_uid),
'gid': st.st_gid, 'group': gid2group(st.st_gid),
'atime': st.st_atime, 'mtime': st.st_mtime,
'mtime': st.st_mtime,
}
try:
xa = xattr(path, XATTR_NOFOLLOW)
@ -287,34 +303,33 @@ class Archive(object):
return
else:
self.hard_links[st.st_ino, st.st_dev] = safe_path
path_hash = self.keychain.id_hash(path.encode('utf-8'))
ids, size = cache.file_known_and_unchanged(path_hash, st)
path_hash = self.key.id_hash(path.encode('utf-8'))
ids = cache.file_known_and_unchanged(path_hash, st)
chunks = None
if ids is not None:
# Make sure all ids are available
for id in ids:
if not cache.seen_chunk(id):
ids = None
break
else:
for id in ids:
cache.chunk_incref(id)
chunks = [cache.chunk_incref(id) for id in ids]
# Only chunkify the file if needed
if ids is None:
if chunks is None:
with open(path, 'rb') as fd:
size = 0
ids = []
chunks = []
for chunk in chunkify(fd, CHUNK_SIZE, WINDOW_SIZE,
self.keychain.get_chunkify_seed()):
ids.append(cache.add_chunk(self.keychain.id_hash(chunk), chunk))
size += len(chunk)
self.key.chunk_seed):
chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk))
ids = [id for id, _, _ in chunks]
cache.memorize_file(path_hash, st, ids)
item = {'path': safe_path, 'chunks': ids, 'size': size}
item = {'path': safe_path, 'chunks': chunks}
item.update(self.stat_attrs(st, path))
self.add_item(item)
self.add_item(item, ids)
@staticmethod
def list_archives(store, keychain):
def list_archives(store, key):
for id in list(store.list(NS_ARCHIVE_METADATA)):
archive = Archive(store, keychain)
archive = Archive(store, key)
archive.load(id)
yield archive

View File

@ -8,7 +8,7 @@ import sys
from .archive import Archive
from .store import Store
from .cache import Cache
from .keychain import Keychain
from .key import Key
from .helpers import location_validator, format_file_size, format_time,\
format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime
from .remote import StoreServer, RemoteStore
@ -44,18 +44,22 @@ class Archiver(object):
def do_serve(self, args):
return StoreServer().serve()
def do_init(self, args):
store = self.open_store(args.store, create=True)
key = Key.create(store)
def do_create(self, args):
store = self.open_store(args.archive, create=True)
keychain = Keychain(args.keychain)
store = self.open_store(args.archive)
key = Key(store)
try:
Archive(store, keychain, args.archive.archive)
Archive(store, key, args.archive.archive)
except Archive.DoesNotExist:
pass
else:
self.print_error('Archive already exists')
return self.exit_code
archive = Archive(store, keychain)
cache = Cache(store, keychain)
cache = Cache(store, key)
archive = Archive(store, key, cache=cache)
# Add darc cache dir to inode_skip list
skip_inodes = set()
try:
@ -112,8 +116,8 @@ class Archiver(object):
def start_cb(item):
self.print_verbose(item['path'].decode('utf-8'))
store = self.open_store(args.archive)
keychain = Keychain(args.keychain)
archive = Archive(store, keychain, args.archive.archive)
key = Key(store)
archive = Archive(store, key, args.archive.archive)
dirs = []
for item in archive.get_items():
if exclude_path(item['path'], args.patterns):
@ -131,22 +135,24 @@ class Archiver(object):
def do_delete(self, args):
store = self.open_store(args.archive)
keychain = Keychain(args.keychain)
archive = Archive(store, keychain, args.archive.archive)
cache = Cache(store, keychain)
key = Key(store)
cache = Cache(store, key)
archive = Archive(store, key, args.archive.archive, cache=cache)
archive.delete(cache)
return self.exit_code
def do_list(self, args):
store = self.open_store(args.src)
keychain = Keychain(args.keychain)
key = Key(store)
if args.src.archive:
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'}
archive = Archive(store, keychain, args.src.archive)
archive = Archive(store, key, args.src.archive)
for item in archive.get_items():
type = tmap.get(item['mode'] / 4096, '?')
mode = format_file_mode(item['mode'])
size = item.get('size', 0)
size = 0
if type == '-':
size = sum(size for _, size, _ in item['chunks'])
mtime = format_time(datetime.fromtimestamp(item['mtime']))
if 'source' in item:
if type == 'l':
@ -160,14 +166,14 @@ class Archiver(object):
item['group'], size, mtime,
item['path'], extra)
else:
for archive in sorted(Archive.list_archives(store, keychain), key=attrgetter('ts')):
for archive in sorted(Archive.list_archives(store, key), key=attrgetter('ts')):
print '%-20s %s' % (archive.metadata['name'], to_localtime(archive.ts).strftime('%c'))
return self.exit_code
def do_verify(self, args):
store = self.open_store(args.archive)
keychain = Keychain(args.keychain)
archive = Archive(store, keychain, args.archive.archive)
key = Key(store)
archive = Archive(store, key, args.archive.archive)
def start_cb(item):
self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False)
def result_cb(item, success):
@ -187,9 +193,9 @@ class Archiver(object):
def do_info(self, args):
store = self.open_store(args.archive)
keychain = Keychain(args.keychain)
archive = Archive(store, keychain, args.archive.archive)
cache = Cache(store, keychain)
key = Key(store)
cache = Cache(store, key)
archive = Archive(store, key, args.archive.archive, cache=cache)
osize, csize, usize = archive.stats(cache)
print 'Name:', archive.metadata['name']
print 'Hostname:', archive.metadata['hostname']
@ -201,45 +207,28 @@ class Archiver(object):
print 'Unique data:', format_file_size(usize)
return self.exit_code
def do_init_keychain(self, args):
return Keychain.generate(args.keychain)
def do_export_restricted(self, args):
keychain = Keychain(args.keychain)
keychain.restrict(args.output)
return self.exit_code
def do_keychain_chpass(self, args):
return Keychain(args.keychain).chpass()
def run(self, args=None):
dot_path = os.path.join(os.path.expanduser('~'), '.darc')
if not os.path.exists(dot_path):
os.mkdir(dot_path)
default_keychain = os.path.join(os.path.expanduser('~'),
'.darc', 'keychain')
os.mkdir(os.path.join(dot_path, 'keys'))
os.mkdir(os.path.join(dot_path, 'cache'))
parser = argparse.ArgumentParser(description='DARC - Deduplicating Archiver')
parser.add_argument('-k', '--keychain', dest='keychain', type=str,
default=default_keychain,
help='Keychain to use')
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
default=False,
help='Verbose output')
subparsers = parser.add_subparsers(title='Available subcommands')
subparser = subparsers.add_parser('init-keychain')
subparser.set_defaults(func=self.do_init_keychain)
subparser = subparsers.add_parser('export-restricted')
subparser.add_argument('output', metavar='OUTPUT', type=str,
help='Keychain to create')
subparser.set_defaults(func=self.do_export_restricted)
subparser = subparsers.add_parser('change-password')
subparser.set_defaults(func=self.do_keychain_chpass)
subparser = subparsers.add_parser('serve')
subparser.set_defaults(func=self.do_serve)
subparser = subparsers.add_parser('init')
subparser.set_defaults(func=self.do_init)
subparser.add_argument('store', metavar='ARCHIVE',
type=location_validator(archive=False),
help='Store to create')
subparser = subparsers.add_parser('create')
subparser.set_defaults(func=self.do_create)
subparser.add_argument('-i', '--include', dest='patterns',

View File

@ -5,19 +5,19 @@ import msgpack
import os
import shutil
from . import NS_ARCHIVE_CHUNKS, NS_CHUNK, PACKET_ARCHIVE_CHUNKS, PACKET_CHUNK
from . import NS_CHUNK, NS_ARCHIVE_METADATA
from .helpers import error_callback
from .hashindex import NSIndex
from .hashindex import ChunkIndex
class Cache(object):
"""Client Side cache
"""
def __init__(self, store, keychain):
def __init__(self, store, key):
self.txn_active = False
self.store = store
self.keychain = keychain
self.key = key
self.path = os.path.join(Cache.cache_dir_path(), self.store.id.encode('hex'))
if not os.path.exists(self.path):
self.create()
@ -25,6 +25,7 @@ class Cache(object):
assert self.id == store.id
if self.tid != store.tid:
self.sync()
self.commit()
@staticmethod
def cache_dir_path():
@ -44,7 +45,7 @@ class Cache(object):
config.set('cache', 'tid', '0')
with open(os.path.join(self.path, 'config'), 'wb') as fd:
config.write(fd)
NSIndex.create(os.path.join(self.path, 'chunks'))
ChunkIndex.create(os.path.join(self.path, 'chunks'))
with open(os.path.join(self.path, 'files'), 'wb') as fd:
pass # empty file
@ -60,7 +61,7 @@ class Cache(object):
raise Exception('%s Does not look like a darc cache')
self.id = self.config.get('cache', 'store_id').decode('hex')
self.tid = self.config.getint('cache', 'tid')
self.chunks = NSIndex(os.path.join(self.path, 'chunks'))
self.chunks = ChunkIndex(os.path.join(self.path, 'chunks'))
self.files = None
def _read_files(self):
@ -96,9 +97,6 @@ class Cache(object):
with open(os.path.join(self.path, 'files'), 'wb') as fd:
for item in self.files.iteritems():
msgpack.pack(item, fd)
for id, (count, size) in self.chunks.iteritems():
if count > 1000000:
self.chunks[id] = count - 1000000, size
self.config.set('cache', 'tid', self.store.tid)
with open(os.path.join(self.path, 'config'), 'w') as fd:
self.config.write(fd)
@ -129,48 +127,63 @@ class Cache(object):
self.begin_txn()
print 'Initializing cache...'
self.chunks.clear()
for id in self.store.list(NS_ARCHIVE_CHUNKS):
magic, data, hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, id))
assert magic == PACKET_ARCHIVE_CHUNKS
chunks = msgpack.unpackb(data)
for id, size in chunks:
unpacker = msgpack.Unpacker()
for id in self.store.list(NS_ARCHIVE_METADATA):
data, hash = self.key.decrypt(self.store.get(NS_ARCHIVE_METADATA, id))
archive = msgpack.unpackb(data)
print 'Analyzing archive:', archive['name']
for id, size, csize in archive['items']:
data, hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
assert self.key.id_hash(data) == id
try:
count, size = self.chunks[id]
self.chunks[id] = count + 1, size
count, size, csize = self.chunks[id]
self.chunks[id] = count + 1, size, csize
except KeyError:
self.chunks[id] = 1, size
self.chunks[id] = 1, size, csize
unpacker.feed(data)
for item in unpacker:
try:
for id, size, csize in item['chunks']:
try:
count, size, csize = self.chunks[id]
self.chunks[id] = count + 1, size, csize
except KeyError:
self.chunks[id] = 1, size, csize
pass
except KeyError:
pass
def add_chunk(self, id, data):
if not self.txn_active:
self.begin_txn()
if self.seen_chunk(id):
return self.chunk_incref(id)
data, hash = self.keychain.encrypt(PACKET_CHUNK, data)
size = len(data)
data, hash = self.key.encrypt(data)
csize = len(data)
self.store.put(NS_CHUNK, id, data, callback=error_callback)
self.chunks[id] = (1000001, csize)
return id
self.chunks[id] = (1, size, csize)
return id, size, csize
def seen_chunk(self, id):
return self.chunks.get(id, (0, 0))[0]
return self.chunks.get(id, (0, 0, 0))[0]
def chunk_incref(self, id):
if not self.txn_active:
self.begin_txn()
count, size = self.chunks[id]
if count < 1000000:
self.chunks[id] = (count + 1000001, size)
return id
count, size, csize = self.chunks[id]
self.chunks[id] = (count + 1, size, csize)
return id, size, csize
def chunk_decref(self, id):
if not self.txn_active:
self.begin_txn()
count, size = self.chunks[id]
count, size, csize = self.chunks[id]
if count == 1:
del self.chunks[id]
self.store.delete(NS_CHUNK, id, callback=error_callback)
else:
self.chunks[id] = (count - 1, size)
self.chunks[id] = (count - 1, size, csize)
def file_known_and_unchanged(self, path_hash, st):
if self.files is None:
@ -180,9 +193,9 @@ class Cache(object):
and entry[2] == st.st_size and entry[1] == st.st_ino):
# reset entry age
self.files[path_hash] = (0,) + entry[1:]
return entry[4], entry[2]
return entry[4]
else:
return None, 0
return None
def memorize_file(self, path_hash, st, ids):
# Entry: Age, inode, size, mtime, chunk ids

View File

@ -113,6 +113,62 @@ cdef class NSKeyIterator:
return self.key[:32], (value[0], value[1])
cdef class ChunkIndex(IndexBase):
@classmethod
def create(cls, path, capacity=16):
index = hashindex_create(path, capacity, 32, 12)
hashindex_close(index)
return cls(path)
def __getitem__(self, key):
assert len(key) == 32
data = <int *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError
return data[0], data[1], data[2]
def __delitem__(self, key):
assert len(key) == 32
hashindex_delete(self.index, <char *>key)
def __setitem__(self, key, value):
assert len(key) == 32
cdef int[3] data
data[0] = value[0]
data[1] = value[1]
data[2] = value[2]
hashindex_set(self.index, <char *>key, data)
def __contains__(self, key):
assert len(key) == 32
data = <int *>hashindex_get(self.index, <char *>key)
return data != NULL
def iteritems(self, marker=None, limit=0):
iter = ChunkKeyIterator()
iter.index = self.index
return iter
cdef class ChunkKeyIterator:
cdef HashIndex *index
cdef char *key
def __cinit__(self):
self.key = NULL
def __iter__(self):
return self
def __next__(self):
self.key = <char *>hashindex_next_key(self.index, <char *>self.key)
if not self.key:
raise StopIteration
cdef int *value = <int *>(self.key + 32)
return self.key[:32], (value[0], value[1], value[2])
cdef class BandIndex(IndexBase):
@classmethod

View File

@ -70,17 +70,6 @@ def decode_long(bytes):
return v + (b << base)
def zero_pad(data, length):
"""Make sure data is `length` bytes long by prepending zero bytes
>>> zero_pad('foo', 5)
'\\x00\\x00foo'
>>> zero_pad('foo', 3)
'foo'
"""
return '\0' * (length - len(data)) + data
def exclude_path(path, patterns):
"""Used by create and extract sub-commands to determine
if an item should be processed or not

160
darc/key.py Normal file
View File

@ -0,0 +1,160 @@
from __future__ import with_statement
from getpass import getpass
import hashlib
import os
import msgpack
import zlib
from pbkdf2 import pbkdf2
from Crypto.Cipher import AES
from Crypto.Hash import SHA256, HMAC
from Crypto.Util import Counter
from Crypto.Util.number import bytes_to_long, long_to_bytes
from Crypto.Random import get_random_bytes
from .helpers import IntegrityError
class Key(object):
FILE_ID = 'DARC KEY'
def __init__(self, store=None):
if store:
self.open(store)
def open(self, store):
path = os.path.join(os.path.expanduser('~'),
'.darc', 'keys', store.id.encode('hex'))
with open(path, 'rb') as fd:
lines = fd.readlines()
if not lines[0].startswith(self.FILE_ID) != self.FILE_ID:
raise ValueError('Not a DARC key file')
self.store_id = lines[0][len(self.FILE_ID):].strip().decode('hex')
cdata = (''.join(lines[1:])).decode('base64')
self.password = ''
data = self.decrypt_key_file(cdata, '')
while not data:
self.password = getpass('Key password: ')
if not self.password:
raise Exception('Key decryption failed')
data = self.decrypt_key_file(cdata, self.password)
if not data:
print 'Incorrect password'
key = msgpack.unpackb(data)
assert key['version'] == 1
self.store_id = key['store_id']
self.enc_key = key['enc_key']
self.enc_hmac_key = key['enc_hmac_key']
self.id_key = key['id_key']
self.archive_key = key['archive_key']
self.chunk_seed = key['chunk_seed']
self.counter = Counter.new(128, initial_value=bytes_to_long(os.urandom(16)), allow_wraparound=True)
def encrypt_key_file(self, data, password):
salt = get_random_bytes(32)
iterations = 2000
key = pbkdf2(password, salt, 32, iterations, hashlib.sha256)
hash = HMAC.new(key, data, SHA256).digest()
cdata = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).encrypt(data)
d = {
'version': 1,
'salt': salt,
'iterations': iterations,
'algorithm': 'SHA256',
'hash': hash,
'data': cdata,
}
return msgpack.packb(d)
def decrypt_key_file(self, data, password):
d = msgpack.unpackb(data)
assert d['version'] == 1
assert d['algorithm'] == 'SHA256'
key = pbkdf2(password, d['salt'], 32, d['iterations'], hashlib.sha256)
data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data'])
if HMAC.new(key, data, SHA256).digest() != d['hash']:
return None
return data
def save(self, path, password):
key = {
'version': 1,
'store_id': self.store_id,
'enc_key': self.enc_key,
'enc_hmac_key': self.enc_hmac_key,
'id_key': self.enc_key,
'archive_key': self.enc_key,
'chunk_seed': self.chunk_seed,
}
data = self.encrypt_key_file(msgpack.packb(key), password)
with open(path, 'wb') as fd:
fd.write('%s %s\n' % (self.FILE_ID, self.store_id.encode('hex')))
fd.write(data.encode('base64'))
print 'Key chain "%s" created' % path
def chpass(self):
password, password2 = 1, 2
while password != password2:
password = getpass('New password: ')
password2 = getpass('New password again: ')
if password != password2:
print 'Passwords do not match'
self.save(self.path, password)
return 0
@staticmethod
def create(store):
path = os.path.join(os.path.expanduser('~'),
'.darc', 'keys', store.id.encode('hex'))
if os.path.exists(path):
print '%s already exists' % path
return 1
password, password2 = 1, 2
while password != password2:
password = getpass('Keychain password: ')
password2 = getpass('Keychain password again: ')
if password != password2:
print 'Passwords do not match'
key = Key()
key.store_id = store.id
# Chunk AES256 encryption key
key.enc_key = get_random_bytes(32)
# Chunk encryption HMAC key
key.enc_hmac_key = get_random_bytes(32)
# Chunk id HMAC key
key.id_key = get_random_bytes(32)
# Archive name HMAC key
key.archive_key = get_random_bytes(32)
# Chunkifier seed
key.chunk_seed = bytes_to_long(get_random_bytes(4)) & 0x7fffffff
key.save(path, password)
return 0
def id_hash(self, data):
"""Return HMAC hash using the "id" HMAC key
"""
return HMAC.new(self.id_key, data, SHA256).digest()
def archive_hash(self, data):
"""Return HMAC hash using the "archive" HMAC key
"""
return HMAC.new(self.archive_key, data, SHA256).digest()
def encrypt(self, data):
data = zlib.compress(data)
nonce = long_to_bytes(self.counter.next_value(), 16)
data = ''.join((nonce, AES.new(self.enc_key, AES.MODE_CTR, '',
counter=self.counter).encrypt(data)))
hash = HMAC.new(self.enc_hmac_key, data, SHA256).digest()
return ''.join(('\0', hash, data)), hash
def decrypt(self, data):
assert data[0] == '\0'
hash = data[1:33]
if HMAC.new(self.enc_hmac_key, data[33:], SHA256).digest() != hash:
raise IntegrityError('Encryption integrity error')
nonce = bytes_to_long(data[33:49])
counter = Counter.new(128, initial_value=nonce, allow_wraparound=True)
data = AES.new(self.enc_key, AES.MODE_CTR, counter=counter).decrypt(data[49:])
return zlib.decompress(data), hash

View File

@ -1,189 +0,0 @@
from __future__ import with_statement
from getpass import getpass
import hashlib
import os
import msgpack
import zlib
from pbkdf2 import pbkdf2
from Crypto.Cipher import AES
from Crypto.Hash import SHA256, HMAC
from Crypto.PublicKey import RSA
from Crypto.Util import Counter
from Crypto.Util.number import bytes_to_long, long_to_bytes
from . import PACKET_ENCRYPT_READ, PACKET_ENCRYPT_CREATE
from .helpers import IntegrityError, zero_pad
from .oaep import OAEP
class Keychain(object):
FILE_ID = 'DARC KEYCHAIN'
CREATE = '\1'
READ = '\2'
def __init__(self, path=None):
self._key_cache = {}
self.read_key = os.urandom(32)
self.create_key = os.urandom(32)
self.counter = Counter.new(64, prefix='\0' * 8)
self.aes_id = self.rsa_read = self.rsa_create = None
self.path = path
if path:
self.open(path)
def get_chunkify_seed(self):
return bytes_to_long(self.aes_id[:4]) & 0x7fffffff
def open(self, path):
print 'Opening keychain "%s"' % path
with open(path, 'rb') as fd:
if fd.read(len(self.FILE_ID)) != self.FILE_ID:
raise ValueError('Not a keychain')
cdata = fd.read()
self.password = ''
data = self.decrypt_keychain(cdata, '')
while not data:
self.password = getpass('Keychain password: ')
if not self.password:
raise Exception('Keychain decryption failed')
data = self.decrypt_keychain(cdata, self.password)
if not data:
print 'Incorrect password'
chain = msgpack.unpackb(data)
assert chain['version'] == 1
self.aes_id = chain['aes_id']
self.rsa_read = RSA.importKey(chain['rsa_read'])
self.rsa_create = RSA.importKey(chain['rsa_create'])
self.read_encrypted = OAEP(256, hash=SHA256).encode(self.read_key, os.urandom(32))
self.read_encrypted = zero_pad(self.rsa_read.encrypt(self.read_encrypted, '')[0], 256)
self.create_encrypted = OAEP(256, hash=SHA256).encode(self.create_key, os.urandom(32))
self.create_encrypted = zero_pad(self.rsa_create.encrypt(self.create_encrypted, '')[0], 256)
def encrypt_keychain(self, data, password):
salt = os.urandom(32)
iterations = 2000
key = pbkdf2(password, salt, 32, iterations, hashlib.sha256)
hash = HMAC.new(key, data, SHA256).digest()
cdata = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).encrypt(data)
d = {
'version': 1,
'salt': salt,
'iterations': iterations,
'algorithm': 'SHA256',
'hash': hash,
'data': cdata,
}
return msgpack.packb(d)
def decrypt_keychain(self, data, password):
d = msgpack.unpackb(data)
assert d['version'] == 1
assert d['algorithm'] == 'SHA256'
key = pbkdf2(password, d['salt'], 32, d['iterations'], hashlib.sha256)
data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data'])
if HMAC.new(key, data, SHA256).digest() != d['hash']:
return None
return data
def save(self, path, password):
chain = {
'version': 1,
'aes_id': self.aes_id,
'rsa_read': self.rsa_read.exportKey('PEM'),
'rsa_create': self.rsa_create.exportKey('PEM'),
}
data = self.encrypt_keychain(msgpack.packb(chain), password)
with open(path, 'wb') as fd:
fd.write(self.FILE_ID)
fd.write(data)
print 'Key chain "%s" saved' % path
def restrict(self, path):
if os.path.exists(path):
print '%s already exists' % path
return 1
self.rsa_read = self.rsa_read.publickey()
self.save(path, self.password)
return 0
def chpass(self):
password, password2 = 1, 2
while password != password2:
password = getpass('New password: ')
password2 = getpass('New password again: ')
if password != password2:
print 'Passwords do not match'
self.save(self.path, password)
return 0
@staticmethod
def generate(path):
if os.path.exists(path):
print '%s already exists' % path
return 1
password, password2 = 1, 2
while password != password2:
password = getpass('Keychain password: ')
password2 = getpass('Keychain password again: ')
if password != password2:
print 'Passwords do not match'
chain = Keychain()
print 'Generating keychain'
chain.aes_id = os.urandom(32)
chain.rsa_read = RSA.generate(2048)
chain.rsa_create = RSA.generate(2048)
chain.save(path, password)
return 0
def id_hash(self, data):
"""Return HMAC hash using the "id" AES key
"""
return HMAC.new(self.aes_id, data, SHA256).digest()
def encrypt(self, magic, data):
"""Helper function used by `encrypt_read` and `encrypt_create`
"""
data = zlib.compress(data)
nonce = long_to_bytes(self.counter.next_value(), 8)
if magic & PACKET_ENCRYPT_READ:
data = ''.join((nonce, self.read_encrypted,
AES.new(self.read_key, AES.MODE_CTR, '',
counter=self.counter).encrypt(data)))
elif magic & PACKET_ENCRYPT_CREATE:
data = ''.join((nonce, self.create_encrypted,
AES.new(self.create_key, AES.MODE_CTR, '',
counter=self.counter).encrypt(data)))
hash = self.id_hash(data)
return ''.join((chr(magic), hash, data)), hash
def _decrypt_key(self, data, rsa_key):
"""Helper function used by `decrypt`
"""
try:
return self._key_cache[data]
except KeyError:
self._key_cache[data] = OAEP(256, hash=SHA256).decode(rsa_key.decrypt(data))
return self._key_cache[data]
def decrypt(self, data):
"""Decrypt `data` previously encrypted by `encrypt_create` or `encrypt_read`
"""
magic = ord(data[0])
hash = data[1:33]
if self.id_hash(data[33:]) != hash:
raise IntegrityError('Encryption integrity error')
nonce = bytes_to_long(data[33:41])
counter = Counter.new(64, prefix='\0' * 8, initial_value=nonce)
if magic & PACKET_ENCRYPT_READ:
key = self._decrypt_key(data[41:297], self.rsa_read)
elif magic & PACKET_ENCRYPT_CREATE:
key = self._decrypt_key(data[41:297], self.rsa_create)
else:
raise Exception('Unknown pack magic %d found' % magic)
data = AES.new(key, AES.MODE_CTR, counter=counter).decrypt(data[297:])
return magic, zlib.decompress(data), hash

View File

@ -1,71 +0,0 @@
from Crypto.Util.number import long_to_bytes
from Crypto.Hash import SHA
from .helpers import IntegrityError
def _xor_bytes(a, b):
return ''.join(chr(ord(x[0]) ^ ord(x[1])) for x in zip(a, b))
def MGF1(seed, mask_len, hash=SHA):
"""MGF1 is a Mask Generation Function based on hash function
"""
T = ''.join(hash.new(seed + long_to_bytes(c, 4)).digest()
for c in range(1 + mask_len / hash.digest_size))
return T[:mask_len]
class OAEP(object):
"""Optimal Asymmetric Encryption Padding
"""
def __init__(self, k, hash=SHA, MGF=MGF1):
self.k = k
self.hash = hash
self.MGF = MGF
def encode(self, msg, seed, label=''):
# FIXME: length checks
if len(msg) > self.k - 2 * self.hash.digest_size - 2:
raise ValueError('message too long')
label_hash = self.hash.new(label).digest()
padding = '\0' * (self.k - len(msg) - 2 * self.hash.digest_size - 2)
datablock = '%s%s\1%s' % (label_hash, padding, msg)
datablock_mask = self.MGF(seed, self.k - self.hash.digest_size - 1, self.hash)
masked_db = _xor_bytes(datablock, datablock_mask)
seed_mask = self.MGF(masked_db, self.hash.digest_size, self.hash)
masked_seed = _xor_bytes(seed, seed_mask)
return '\0%s%s' % (masked_seed, masked_db)
def decode(self, ciphertext, label=''):
if len(ciphertext) < self.k:
ciphertext = ('\0' * (self.k - len(ciphertext))) + ciphertext
label_hash = self.hash.new(label).digest()
masked_seed = ciphertext[1:self.hash.digest_size + 1]
masked_db = ciphertext[-(self.k - self.hash.digest_size - 1):]
seed_mask = self.MGF(masked_db, self.hash.digest_size, self.hash)
seed = _xor_bytes(masked_seed, seed_mask)
datablock_mask = self.MGF(seed, self.k - self.hash.digest_size - 1, self.hash)
datablock = _xor_bytes(masked_db, datablock_mask)
label_hash2 = datablock[:self.hash.digest_size]
data = datablock[self.hash.digest_size:].lstrip('\0')
if (ciphertext[0] != '\0' or
label_hash != label_hash2 or
data[0] != '\1'):
raise IntegrityError('decryption error')
return data[1:]
def test():
from Crypto.Hash import SHA256
import os
import random
oaep = OAEP(256, SHA256)
for x in range(1000):
M = os.urandom(random.randint(0, 100))
EM = oaep.encode(M, os.urandom(32))
assert len(EM) == oaep.k
assert oaep.decode(EM) == M
if __name__ == '__main__':
test()

View File

@ -32,7 +32,7 @@ class Store(object):
def __init__(self, path, create=False):
self.txn_active = False
if not os.path.exists(path) and create:
if create:
self.create(path)
self.open(path)