From 62a4a37544550a7339b209b4aed5608da6379452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Mon, 10 May 2010 22:35:46 +0200 Subject: [PATCH] An experimental sqlite based storage implementation. --- dedupestore/archiver.py | 9 +- dedupestore/cache.py | 12 +- dedupestore/{store.py => fsstore.py} | 7 +- dedupestore/sqlitestore.py | 157 +++++++++++++++++++++++++++ 4 files changed, 172 insertions(+), 13 deletions(-) rename dedupestore/{store.py => fsstore.py} (99%) create mode 100644 dedupestore/sqlitestore.py diff --git a/dedupestore/archiver.py b/dedupestore/archiver.py index c2a3c1cac..7e44c94df 100644 --- a/dedupestore/archiver.py +++ b/dedupestore/archiver.py @@ -5,8 +5,11 @@ import cPickle from optparse import OptionParser from chunkifier import chunkify -from cache import Cache -from store import Store, NS_ARCHIVES, NS_CHUNKS, CHUNK_SIZE +from cache import Cache, NS_ARCHIVES, NS_CHUNKS +from sqlitestore import SqliteStore + + +CHUNK_SIZE = 256 * 1024 class Archive(object): @@ -202,7 +205,7 @@ class Archiver(object): help="Display archive statistics", metavar="ARCHIVE") (options, args) = parser.parse_args() if options.store: - self.store = Store(options.store) + self.store = SqliteStore(options.store) else: parser.error('No store path specified') self.cache = Cache(self.store) diff --git a/dedupestore/cache.py b/dedupestore/cache.py index 4b91e7892..07339a41f 100644 --- a/dedupestore/cache.py +++ b/dedupestore/cache.py @@ -5,7 +5,9 @@ import sys import zlib from chunkifier import checksum -from store import Store, NS_ARCHIVES, NS_CHUNKS + +NS_ARCHIVES = 'ARCHIVES' +NS_CHUNKS = 'CHUNKS' class Cache(object): @@ -46,6 +48,8 @@ class Cache(object): return print 'Recreating cache...' for id in self.store.list(NS_ARCHIVES): + + archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, id))) self.archives.append(archive['name']) for id, sum, csize, osize in archive['chunks']: @@ -56,7 +60,7 @@ class Cache(object): print 'done' def save(self): - assert self.store.state == Store.OPEN + assert self.store.state == self.store.OPEN print 'saving cache' data = {'uuid': self.store.uuid, 'chunkmap': self.chunkmap, 'summap': self.summap, @@ -81,7 +85,7 @@ class Cache(object): return self.init_chunk(id, sum, csize, osize) def init_chunk(self, id, sum, csize, osize): - self.chunkmap[id] = (1, sum, osize, csize) + self.chunkmap[id] = (1, sum, csize, osize) self.summap[sum] = self.summap.get(sum, 0) + 1 return id, sum, csize, osize @@ -91,7 +95,7 @@ class Cache(object): def chunk_incref(self, id): count, sum, csize, osize = self.chunkmap[id] - self.chunkmap[id] = (count + 1, sum, osize, csize) + self.chunkmap[id] = (count + 1, sum, csize, osize) self.summap[sum] += 1 return id, sum, csize, osize diff --git a/dedupestore/store.py b/dedupestore/fsstore.py similarity index 99% rename from dedupestore/store.py rename to dedupestore/fsstore.py index b9ccc001d..724459f0f 100644 --- a/dedupestore/store.py +++ b/dedupestore/fsstore.py @@ -7,11 +7,6 @@ import unittest import uuid -CHUNK_SIZE = 256 * 1024 -NS_ARCHIVES = 'ARCHIVES' -NS_CHUNKS = 'CHUNKS' - - class Store(object): """ """ @@ -33,7 +28,7 @@ class Store(object): if not os.path.exists(path): self.create(path) self.open(path) - + def create(self, path): os.mkdir(path) open(os.path.join(path, 'version'), 'wb').write(self.VERSION) diff --git a/dedupestore/sqlitestore.py b/dedupestore/sqlitestore.py new file mode 100644 index 000000000..08763225f --- /dev/null +++ b/dedupestore/sqlitestore.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +import os +import tempfile +import shutil +import unittest +import sqlite3 +import uuid + + +class SqliteStore(object): + """ + """ + + class DoesNotExist(KeyError): + """""" + + class AlreadyExists(KeyError): + """""" + + IDLE = 'Idle' + OPEN = 'Open' + ACTIVE = 'Active' + VERSION = 'DEDUPESTORE VERSION 1' + + def __init__(self, path): + if not os.path.exists(path): + self.create(path) + self.cnx = sqlite3.connect(path) + self.cursor = self.cnx.cursor() + self.uuid, self.tid = self.cursor.execute('SELECT uuid, tid FROM system').fetchone() + self.state = self.OPEN + + def create(self, path): + cnx = sqlite3.connect(path) + cnx.execute('PRAGMA auto_vacuum=full') + cnx.execute('CREATE TABLE objects(ns TEXT NOT NULL, id NOT NULL, data NOT NULL)') + cnx.execute('CREATE TABLE system(uuid NOT NULL, tid NOT NULL)') + cnx.execute('INSERT INTO system VALUES(?,?)', (uuid.uuid1().hex, 0)) + cnx.execute('CREATE UNIQUE INDEX objects_pk ON objects(ns, id)') + + def close(self): + self.cnx.close() + + def commit(self): + """ + """ + self.cursor.execute('UPDATE system SET tid=tid+1') + import time + t = time.time() + self.cnx.commit() + print time.time() - t + self.tid += 1 + + def rollback(self): + """ + """ + self.cnx.rollback() + + def get(self, ns, id): + """ + """ + self.cursor.execute('SELECT data FROM objects WHERE ns=? and id=?', + (ns.encode('hex'), id.encode('hex'))) + row = self.cursor.fetchone() + if row: + return str(row[0]) + else: + raise self.DoesNotExist + + def put(self, ns, id, data): + """ + """ + try: + self.cursor.execute('INSERT INTO objects (ns, id, data) ' + 'VALUES(?, ?, ?)', + (ns.encode('hex'), id.encode('hex'), + sqlite3.Binary(data))) + except sqlite3.IntegrityError: + raise self.AlreadyExists + + def delete(self, ns, id): + """ + """ + self.cursor.execute('DELETE FROM objects WHERE ns=? AND id=?', + (ns.encode('hex'), id.encode('hex'))) + + def list(self, ns, prefix='', marker=None, max_keys=1000000): + """ + """ + condition = '' + if prefix: + condition += ' AND id LIKE :prefix' + if marker: + condition += ' AND id >= :marker' + args = dict(ns=ns.encode('hex'), prefix=prefix.encode('hex') + '%', + marker=marker and marker.encode('hex')) + for row in self.cursor.execute('SELECT id FROM objects WHERE ' + 'ns=:ns ' + condition + ' LIMIT ' + str(max_keys), + args): + yield row[0].decode('hex') + + +class SqliteStoreTestCase(unittest.TestCase): + + def setUp(self): + self.tmppath = tempfile.mkdtemp() + self.store = SqliteStore(os.path.join(self.tmppath, 'store')) + + def tearDown(self): + shutil.rmtree(self.tmppath) + + def test1(self): + self.assertEqual(self.store.tid, 0) + self.assertEqual(self.store.state, self.store.OPEN) + self.store.put('SOMENS', 'SOMEID', 'SOMEDATA') + self.assertRaises(self.store.AlreadyExists, lambda: self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')) + self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA') + self.store.rollback() + self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID')) + self.assertEqual(self.store.tid, 0) + + def test2(self): + self.assertEqual(self.store.tid, 0) + self.assertEqual(self.store.state, self.store.OPEN) + self.store.put('SOMENS', 'SOMEID', 'SOMEDATA') + self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA') + self.store.commit() + self.assertEqual(self.store.tid, 1) + self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA') + self.store.delete('SOMENS', 'SOMEID') + self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID')) + self.store.rollback() + self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA') + self.store.delete('SOMENS', 'SOMEID') + self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID')) + self.store.commit() + self.assertEqual(self.store.tid, 2) + self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID')) + + def test_list(self): + self.store.put('SOMENS', 'SOMEID12', 'SOMEDATA') + self.store.put('SOMENS', 'SOMEID', 'SOMEDATA') + self.store.put('SOMENS', 'SOMEID1', 'SOMEDATA') + self.store.put('SOMENS', 'SOMEID123', 'SOMEDATA') + self.store.commit() + self.assertEqual(list(self.store.list('SOMENS', max_keys=3)), + ['SOMEID', 'SOMEID1', 'SOMEID12']) + self.assertEqual(list(self.store.list('SOMENS', marker='SOMEID12')), + ['SOMEID12', 'SOMEID123']) + self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', max_keys=2)), + ['SOMEID1', 'SOMEID12']) + self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', marker='SOMEID12')), + ['SOMEID12', 'SOMEID123']) + + +if __name__ == '__main__': + unittest.main()