From 98b1b5e0ce79415b07adc5c184d09032548c04b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Fri, 15 Oct 2010 22:18:22 +0200 Subject: [PATCH] Switch from sha1 to sha256 --- dedupestore/archiver.py | 69 +++++++++++++++++++++++++---------------- dedupestore/cache.py | 37 ++++++++++++---------- 2 files changed, 63 insertions(+), 43 deletions(-) diff --git a/dedupestore/archiver.py b/dedupestore/archiver.py index 952debd29..c028822c8 100644 --- a/dedupestore/archiver.py +++ b/dedupestore/archiver.py @@ -15,14 +15,34 @@ class Archive(object): - def __init__(self, store, name=None): + def __init__(self, store, cache, name=None): self.store = store + self.cache = cache self.items = [] self.chunks = [] self.chunk_idx = {} if name: self.open(name) + def open(self, name): + id = self.cache.archives[name] + data = self.store.get(NS_ARCHIVES, id) + if hashlib.sha256(data).digest() != id: + raise Exception('Archive hash did not match') + archive = cPickle.loads(zlib.decompress(data)) + self.items = archive['items'] + self.name = archive['name'] + self.chunks = archive['chunks'] + for i, (id, csize, osize) in enumerate(archive['chunks']): + self.chunk_idx[i] = id + + def save(self, name): + archive = {'name': name, 'items': self.items, 'chunks': self.chunks} + data = zlib.compress(cPickle.dumps(archive)) + self.id = hashlib.sha256(data).digest() + self.store.put(NS_ARCHIVES, self.id, data) + self.store.commit() + def add_chunk(self, id, csize, osize): try: return self.chunk_idx[id] @@ -32,19 +52,6 @@ def add_chunk(self, id, csize, osize): self.chunk_idx[id] = idx return idx - def open(self, name): - archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, name))) - self.items = archive['items'] - self.name = archive['name'] - self.chunks = archive['chunks'] - for i, (id, csize, osize) in enumerate(archive['chunks']): - self.chunk_idx[i] = id - - def save(self, name): - archive = {'name': name, 'items': self.items, 'chunks': self.chunks} - self.store.put(NS_ARCHIVES, name, zlib.compress(cPickle.dumps(archive))) - self.store.commit() - def stats(self, cache): total_osize = 0 total_csize = 0 @@ -84,7 +91,11 @@ def extract(self, dest=None): for chunk in item['chunks']: id = self.chunk_idx[chunk] data = self.store.get(NS_CHUNKS, id) - if hashlib.sha1(data).digest() != id: + cid = data[:32] + data = data[32:] + if hashlib.sha256(data).digest() != cid: + raise Exception('Invalid chunk checksum') + if hashlib.sha256(zlib.decompress(data)).digest() != id: raise Exception('Invalid chunk checksum') fd.write(zlib.decompress(data)) @@ -94,24 +105,30 @@ def verify(self): for chunk in item['chunks']: id = self.chunk_idx[chunk] data = self.store.get(NS_CHUNKS, id) - if hashlib.sha1(data).digest() != id: - logging.ERROR('%s ... ERROR', item['path']) + data = self.store.get(NS_CHUNKS, id) + cid = data[:32] + data = data[32:] + if (hashlib.sha256(data).digest() != cid or + hashlib.sha256(zlib.decompress(data)).digest() != id): + logging.error('%s ... ERROR', item['path']) break else: logging.info('%s ... OK', item['path']) def delete(self, cache): - self.store.delete(NS_ARCHIVES, self.name) + self.store.delete(NS_ARCHIVES, self.cache.archives[self.name]) for item in self.items: if item['type'] == 'FILE': for c in item['chunks']: id = self.chunk_idx[c] cache.chunk_decref(id) self.store.commit() - cache.archives.remove(self.name) + del cache.archives[self.name] cache.save() def create(self, name, paths, cache): + if name in cache.archives: + raise NameError('Archive already exists') for path in paths: for root, dirs, files in os.walk(path): for d in dirs: @@ -123,7 +140,7 @@ def create(self, name, paths, cache): if entry: self.items.append(entry) self.save(name) - cache.archives.append(name) + cache.archives[name] = self.id cache.save() def process_dir(self, path, cache): @@ -167,23 +184,23 @@ def open_store(self, location): def do_create(self, args): store, cache = self.open_store(args.archive) - archive = Archive(store) + archive = Archive(store, cache) archive.create(args.archive.archive, args.paths, cache) def do_extract(self, args): store, cache = self.open_store(args.archive) - archive = Archive(store, args.archive.archive) + archive = Archive(store, cache, args.archive.archive) archive.extract(args.dest) def do_delete(self, args): store, cache = self.open_store(args.archive) - archive = Archive(store, args.archive.archive) + archive = Archive(store, cache, args.archive.archive) archive.delete(cache) def do_list(self, args): store, cache = self.open_store(args.src) if args.src.archive: - archive = Archive(store, args.src.archive) + archive = Archive(store, cache, args.src.archive) archive.list() else: for archive in sorted(cache.archives): @@ -191,12 +208,12 @@ def do_list(self, args): def do_verify(self, args): store, cache = self.open_store(args.archive) - archive = Archive(store, args.archive.archive) + archive = Archive(store, cache, args.archive.archive) archive.verify() def do_info(self, args): store, cache = self.open_store(args.archive) - archive = Archive(store, args.archive.archive) + archive = Archive(store, cache, args.archive.archive) stats = archive.stats(cache) print 'Original size:', self.pretty_size(stats['osize']) print 'Compressed size:', self.pretty_size(stats['csize']) diff --git a/dedupestore/cache.py b/dedupestore/cache.py index 3eb9d0f42..1475db54c 100644 --- a/dedupestore/cache.py +++ b/dedupestore/cache.py @@ -24,55 +24,58 @@ def __init__(self, store): def open(self): if not os.path.exists(self.path): return - print 'Loading cache: ', self.path, '...' - data = cPickle.loads(zlib.decompress(open(self.path, 'rb').read())) + data = open(self.path, 'rb').read() + id = data[:32] + data = data[32:] + if hashlib.sha256(data).digest() != id: + raise Exception('Cache hash did not match') + data = cPickle.loads(zlib.decompress(data)) if data['uuid'] != self.store.uuid: - print >> sys.stderr, 'Cache UUID mismatch' - return + raise Exception('Cache UUID mismatch') self.chunkmap = data['chunkmap'] self.archives = data['archives'] self.tid = data['tid'] - print 'done' def init(self): """Initializes cache by fetching and reading all archive indicies """ self.chunkmap = {} - self.archives = [] + self.archives = {} self.tid = self.store.tid if self.store.tid == 0: return - print 'Recreating cache...' for id in list(self.store.list(NS_ARCHIVES)): - archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, id))) - self.archives.append(archive['name']) + data = self.store.get(NS_ARCHIVES, id) + if hashlib.sha256(data).digest() != id: + raise Exception('Archive hash did not match') + archive = cPickle.loads(zlib.decompress(data)) + self.archives[archive['name']] = id for id, csize, osize in archive['chunks']: if self.seen_chunk(id): self.chunk_incref(id) else: self.init_chunk(id, csize, osize) - print 'done' def save(self): assert self.store.state == self.store.OPEN - print 'saving cache' data = {'uuid': self.store.uuid, 'chunkmap': self.chunkmap, 'tid': self.store.tid, 'archives': self.archives} - print 'Saving cache as:', self.path cachedir = os.path.dirname(self.path) if not os.path.exists(cachedir): os.makedirs(cachedir) with open(self.path, 'wb') as fd: - fd.write(zlib.compress(cPickle.dumps(data))) - print 'done' + data = zlib.compress(cPickle.dumps(data)) + id = hashlib.sha256(data).digest() + fd.write(id + data) def add_chunk(self, data): - osize = len(data) - data = zlib.compress(data) - id = hashlib.sha1(data).digest() + id = hashlib.sha256(data).digest() if self.seen_chunk(id): return self.chunk_incref(id) + osize = len(data) + data = zlib.compress(data) + data = hashlib.sha256(data).digest() + data csize = len(data) self.store.put(NS_CHUNKS, id, data) return self.init_chunk(id, csize, osize)