From 1ab4ebf2cc7c7c7552e22dd3a06a0ea724ad02c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Fri, 22 Oct 2010 00:10:51 +0200 Subject: [PATCH] Initial AES encryption support --- dedupestore/archive.py | 27 +++++++------- dedupestore/archiver.py | 6 +-- dedupestore/cache.py | 21 ++++------- dedupestore/crypt.py | 82 +++++++++++++++++++++++++++++++++++++++++ dedupestore/helpers.py | 18 --------- 5 files changed, 107 insertions(+), 47 deletions(-) create mode 100644 dedupestore/crypt.py diff --git a/dedupestore/archive.py b/dedupestore/archive.py index 4a06ec640..8215eda06 100644 --- a/dedupestore/archive.py +++ b/dedupestore/archive.py @@ -1,14 +1,13 @@ from datetime import datetime -import hashlib import logging import os import stat import sys -import zlib from .cache import NS_ARCHIVES, NS_CHUNKS, NS_CINDEX from .chunkifier import chunkify -from .helpers import uid2user, user2uid, gid2group, group2gid, pack, unpack +from .crypt import CryptoManager +from .helpers import uid2user, user2uid, gid2group, group2gid CHUNK_SIZE = 55001 @@ -16,29 +15,30 @@ class Archive(object): def __init__(self, store, name=None): + self.crypt = CryptoManager(store) self.store = store self.items = [] self.chunks = [] self.chunk_idx = {} self.hard_links = {} if name: - self.load(hashlib.sha256(name).digest()) + self.load(self.crypt.id_hash(name)) def load(self, id): self.id = id - archive = unpack(self.store.get(NS_ARCHIVES, self.id)) + archive = self.crypt.unpack_read(self.store.get(NS_ARCHIVES, self.id)) if archive['version'] != 1: raise Exception('Archive version %r not supported' % archive['version']) self.items = archive['items'] self.name = archive['name'] - cindex = unpack(self.store.get(NS_CINDEX, self.id)) + cindex = self.crypt.unpack_create(self.store.get(NS_CINDEX, self.id)) assert cindex['version'] == 1 self.chunks = cindex['chunks'] for i, chunk in enumerate(self.chunks): self.chunk_idx[i] = chunk[0] def save(self, name): - self.id = hashlib.sha256(name).digest() + self.id = self.crypt.id_hash(name) archive = { 'version': 1, 'name': name, @@ -46,14 +46,15 @@ def save(self, name): 'ts': datetime.utcnow().isoformat(), 'items': self.items, } - _, data = pack(archive) + data = self.crypt.pack_read(archive) self.store.put(NS_ARCHIVES, self.id, data) cindex = { 'version': 1, 'chunks': self.chunks, } - _, data = pack(cindex) + data = self.crypt.pack_create(cindex) self.store.put(NS_CINDEX, self.id, data) + self.crypt.store_key() self.store.commit() def add_chunk(self, id, size): @@ -117,7 +118,7 @@ def extract(self, dest=None): for chunk in item['chunks']: id = self.chunk_idx[chunk] try: - fd.write(unpack(self.store.get(NS_CHUNKS, id))) + fd.write(self.crypt.unpack_read(self.store.get(NS_CHUNKS, id))) except ValueError: raise Exception('Invalid chunk checksum') self.restore_stat(path, item) @@ -145,7 +146,7 @@ def verify(self): for chunk in item['chunks']: id = self.chunk_idx[chunk] try: - unpack(self.store.get(NS_CHUNKS, id)) + self.crypt.unpack_read(self.store.get(NS_CHUNKS, id)) except ValueError: logging.error('%s ... ERROR', item['path']) break @@ -242,12 +243,12 @@ def process_file(self, path, st, cache): }) def process_chunk(self, data, cache): - id = hashlib.sha256(data).digest() + id = self.crypt.id_hash(data) try: return self.chunk_idx[id] except KeyError: idx = len(self.chunks) - size = cache.add_chunk(id, data) + size = cache.add_chunk(id, data, self.crypt) self.chunks.append((id, size)) self.chunk_idx[id] = idx return idx diff --git a/dedupestore/archiver.py b/dedupestore/archiver.py index 25014412d..63c699d17 100644 --- a/dedupestore/archiver.py +++ b/dedupestore/archiver.py @@ -19,8 +19,8 @@ def exit_code_from_logger(self): def do_create(self, args): store = self.open_store(args.archive) - cache = Cache(store) archive = Archive(store) + cache = Cache(store, archive.crypt) archive.create(args.archive.archive, args.paths, cache) return self.exit_code_from_logger() @@ -32,8 +32,8 @@ def do_extract(self, args): def do_delete(self, args): store = self.open_store(args.archive) - cache = Cache(store) archive = Archive(store, args.archive.archive) + cache = Cache(store, archive.crypt) archive.delete(cache) return self.exit_code_from_logger() @@ -55,8 +55,8 @@ def do_verify(self, args): def do_info(self, args): store = self.open_store(args.archive) - cache = Cache(store) archive = Archive(store, args.archive.archive) + cache = Cache(store, archive.crypt) osize, csize, usize = archive.stats(cache) print 'Original size:', pretty_size(osize) print 'Compressed size:', pretty_size(csize) diff --git a/dedupestore/cache.py b/dedupestore/cache.py index da1916f12..d2a58b2d4 100644 --- a/dedupestore/cache.py +++ b/dedupestore/cache.py @@ -1,10 +1,6 @@ -import hashlib import logging import msgpack import os -import zlib - -from .helpers import pack, unpack NS_ARCHIVES = 'A' NS_CHUNKS = 'C' @@ -15,20 +11,19 @@ class Cache(object): """Client Side cache """ - def __init__(self, store): + def __init__(self, store, crypt): self.store = store self.path = os.path.join(os.path.expanduser('~'), '.dedupestore', 'cache', '%s.cache' % self.store.uuid) self.tid = -1 self.open() if self.tid != self.store.tid: - self.init() + self.init(crypt) def open(self): if not os.path.exists(self.path): return - data = open(self.path, 'rb').read() - cache = unpack(data) + cache = msgpack.unpackb(open(self.path, 'rb').read()) version = cache.get('version') if version != 1: logging.error('Unsupported cache version %r' % version) @@ -38,7 +33,7 @@ def open(self): self.chunkmap = cache['chunkmap'] self.tid = cache['tid'] - def init(self): + def init(self, crypt): """Initializes cache by fetching and reading all archive indicies """ logging.info('Initializing cache...') @@ -47,7 +42,7 @@ def init(self): if self.store.tid == 0: return for id in list(self.store.list(NS_CINDEX)): - cindex = unpack(self.store.get(NS_CINDEX, id)) + cindex = crypt.unpack_create(self.store.get(NS_CINDEX, id)) for id, size in cindex['chunks']: try: count, size = self.chunkmap[id] @@ -63,17 +58,17 @@ def save(self): 'chunkmap': self.chunkmap, 'tid': self.store.tid, } - _, data = pack(cache) + data = msgpack.packb(cache) cachedir = os.path.dirname(self.path) if not os.path.exists(cachedir): os.makedirs(cachedir) with open(self.path, 'wb') as fd: fd.write(data) - def add_chunk(self, id, data): + def add_chunk(self, id, data, crypt): if self.seen_chunk(id): return self.chunk_incref(id) - _, data = pack(data) + data = crypt.pack_read(data) csize = len(data) self.store.put(NS_CHUNKS, id, data) self.chunkmap[id] = (1, csize) diff --git a/dedupestore/crypt.py b/dedupestore/crypt.py new file mode 100644 index 000000000..981f2f6c4 --- /dev/null +++ b/dedupestore/crypt.py @@ -0,0 +1,82 @@ +import hashlib +import hmac +import msgpack +import os +import zlib + +from Crypto.Cipher import AES + + +class CryptoManager(object): + + KEY_CREATE = 1 + KEY_READ = 2 + KEY_ID = 3 + KEY_ARCHIVE = 4 + KEY_CINDEX = 5 + + def __init__(self, store): + self.key_cache = {} + self.store = store + self.tid = store.tid + self.id_key = '0' * 32 + self.read_key = os.urandom(32) + self.create_key = os.urandom(32) + + def get_key(self, tid): + try: + return self.key_cache[tid] + except KeyError: + keys = self.load_key(tid) + self.key_cache[tid] = keys + return keys + + def load_key(self, tid): + data = self.store.get('K', str(tid)) + id = data[:32] + if self.id_hash(data[32:]) != id: + raise Exception('Invalid key object found') + key = msgpack.unpackb(data[32:]) + return key['create'], key['read'] + + def store_key(self): + key = { + 'version': 1, + 'read': self.read_key, + 'create': self.create_key, + } + data = msgpack.packb(key) + id = self.id_hash(data) + self.store.put('K', str(self.tid), id + data) + + def id_hash(self, data): + return hmac.new(self.id_key, data, hashlib.sha256).digest() + + def pack(self, data, key): + data = zlib.compress(msgpack.packb(data)) + id = hmac.new(key, data, hashlib.sha256).digest() + data = AES.new(key, AES.MODE_CFB, id[:16]).encrypt(data) + return id + msgpack.packb((1, self.tid, data)) + + def pack_read(self, data): + return self.pack(data, self.read_key) + + def pack_create(self, data): + return self.pack(data, self.create_key) + + def unpack(self, data, key_idx): + id = data[:32] + version, tid, data = msgpack.unpackb(data[32:]) + assert version == 1 + key = self.get_key(tid)[key_idx] + data = AES.new(key, AES.MODE_CFB, id[:16]).decrypt(data) + if hmac.new(key, data, hashlib.sha256).digest() != id: + raise ValueError + return msgpack.unpackb(zlib.decompress(data)) + + def unpack_read(self, data): + return self.unpack(data, 1) + + def unpack_create(self, data): + return self.unpack(data, 0) + diff --git a/dedupestore/helpers.py b/dedupestore/helpers.py index 3ae98c0f3..8a4267c64 100644 --- a/dedupestore/helpers.py +++ b/dedupestore/helpers.py @@ -1,26 +1,8 @@ import argparse import grp -import hashlib import logging -import msgpack import pwd import re -import zlib - - -def pack(data): - data = zlib.compress(msgpack.packb(data)) - id = hashlib.sha256(data).digest() - tid = 0 - return id, msgpack.packb((1, tid, id, data)) - - -def unpack(data): - version, tid, id, data = msgpack.unpackb(data) - assert version == 1 - if hashlib.sha256(data).digest() != id: - raise ValueError - return msgpack.unpackb(zlib.decompress(data)) def memoize(function):