2010-05-26 19:41:55 +00:00
|
|
|
import os
|
|
|
|
import tempfile
|
|
|
|
import shutil
|
|
|
|
import unittest
|
|
|
|
import sqlite3
|
2010-10-15 17:56:38 +00:00
|
|
|
import fcntl
|
2010-05-26 19:41:55 +00:00
|
|
|
|
2010-10-19 19:08:42 +00:00
|
|
|
Binary = sqlite3.Binary
|
|
|
|
|
2010-05-26 19:41:55 +00:00
|
|
|
|
2010-10-26 19:25:25 +00:00
|
|
|
class Store(object):
|
2010-05-26 19:41:55 +00:00
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
class DoesNotExist(KeyError):
|
|
|
|
""""""
|
|
|
|
|
|
|
|
class AlreadyExists(KeyError):
|
|
|
|
""""""
|
|
|
|
|
|
|
|
IDLE = 'Idle'
|
|
|
|
OPEN = 'Open'
|
|
|
|
ACTIVE = 'Active'
|
2010-10-17 20:04:50 +00:00
|
|
|
BAND_LIMIT = 1 * 1024 * 1024
|
2010-05-26 19:41:55 +00:00
|
|
|
|
2010-10-26 19:48:43 +00:00
|
|
|
def __init__(self, path, create=False):
|
2010-10-17 20:42:52 +00:00
|
|
|
self.read_fd = None
|
|
|
|
self.write_fd = None
|
2010-10-26 19:48:43 +00:00
|
|
|
if create:
|
2010-05-26 19:41:55 +00:00
|
|
|
self.create(path)
|
2010-10-15 17:56:38 +00:00
|
|
|
self.open(path)
|
|
|
|
|
2010-10-26 19:48:43 +00:00
|
|
|
def get_option(self, key):
|
|
|
|
return self.cursor.execute('SELECT value FROM system WHERE key=?', (key,)) \
|
|
|
|
.fetchone()[0]
|
|
|
|
|
|
|
|
def set_option(self, key, value):
|
|
|
|
return self.cursor.execute('UPDATE system SET value=? WHERE key=?',
|
|
|
|
(value, key))
|
|
|
|
|
2010-10-15 17:56:38 +00:00
|
|
|
def open(self, path):
|
|
|
|
if not os.path.isdir(path):
|
2010-10-26 19:48:43 +00:00
|
|
|
raise Exception('%s Does not look like a store' % path)
|
2010-10-15 17:56:38 +00:00
|
|
|
db_path = os.path.join(path, 'dedupestore.db')
|
|
|
|
if not os.path.exists(db_path):
|
|
|
|
raise Exception('%s Does not look like a store2')
|
|
|
|
self.lock_fd = open(os.path.join(path, 'lock'), 'w')
|
|
|
|
fcntl.flock(self.lock_fd, fcntl.LOCK_EX)
|
2010-05-26 19:41:55 +00:00
|
|
|
self.path = path
|
2010-10-15 17:56:38 +00:00
|
|
|
self.cnx = sqlite3.connect(db_path)
|
2010-10-19 19:08:42 +00:00
|
|
|
self.cnx.text_factory = str
|
2010-05-26 19:41:55 +00:00
|
|
|
self.cursor = self.cnx.cursor()
|
|
|
|
self._begin()
|
|
|
|
|
|
|
|
def _begin(self):
|
2010-10-17 20:42:52 +00:00
|
|
|
if self.read_fd:
|
|
|
|
self.read_fd.close()
|
|
|
|
self.read_fd = None
|
|
|
|
if self.write_fd:
|
|
|
|
self.write_fd.close()
|
|
|
|
self.write_fd = None
|
2010-10-26 19:25:25 +00:00
|
|
|
self.version = self.get_option('version')
|
|
|
|
self.id = self.get_option('id').decode('hex')
|
|
|
|
self.tid = self.get_option('tid')
|
|
|
|
self.nextband = self.get_option('nextband')
|
|
|
|
self.bandlimit = self.get_option('bandlimit')
|
|
|
|
assert self.version == 1
|
2010-05-26 19:41:55 +00:00
|
|
|
self.state = self.OPEN
|
2010-10-17 20:42:52 +00:00
|
|
|
self.read_band = None
|
|
|
|
self.write_band = None
|
2010-05-26 19:41:55 +00:00
|
|
|
self.to_delete = set()
|
2010-10-15 17:56:38 +00:00
|
|
|
band = self.nextband
|
2010-10-13 20:18:54 +00:00
|
|
|
while os.path.exists(self.band_filename(band)):
|
|
|
|
os.unlink(self.band_filename(band))
|
|
|
|
band += 1
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
def create(self, path):
|
2010-10-26 19:48:43 +00:00
|
|
|
if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
|
|
|
|
raise Exception('Path "%s" already exists' % path)
|
|
|
|
if not os.path.exists(path):
|
|
|
|
os.mkdir(path)
|
2010-05-26 19:41:55 +00:00
|
|
|
os.mkdir(os.path.join(path, 'bands'))
|
2010-10-15 17:56:38 +00:00
|
|
|
cnx = sqlite3.connect(os.path.join(path, 'dedupestore.db'))
|
2010-10-19 19:08:42 +00:00
|
|
|
cnx.execute('CREATE TABLE objects(ns BINARY NOT NULL, id BINARY NOT NULL, '
|
2010-05-26 19:41:55 +00:00
|
|
|
'band NOT NULL, offset NOT NULL, size NOT NULL)')
|
2010-10-18 22:00:52 +00:00
|
|
|
cnx.execute('CREATE UNIQUE INDEX objects_pk ON objects(ns, id)')
|
2010-10-26 19:25:25 +00:00
|
|
|
cnx.execute('CREATE TABLE system(key UNIQUE NOT NULL, value)')
|
|
|
|
cnx.executemany('INSERT INTO system VALUES(?, ?)',
|
|
|
|
(('id', os.urandom(32).encode('hex')),
|
|
|
|
('version', 1),
|
|
|
|
('tid', 0),
|
|
|
|
('nextband', 0),
|
|
|
|
('bandlimit', self.BAND_LIMIT)))
|
2010-10-18 22:00:52 +00:00
|
|
|
cnx.commit()
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
def close(self):
|
2010-10-17 20:04:50 +00:00
|
|
|
self.rollback()
|
2010-05-26 19:41:55 +00:00
|
|
|
self.cnx.close()
|
2010-10-15 17:56:38 +00:00
|
|
|
self.lock_fd.close()
|
|
|
|
os.unlink(os.path.join(self.path, 'lock'))
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
def commit(self):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
self.band = None
|
2010-10-19 19:08:42 +00:00
|
|
|
self.delete_bands()
|
2010-10-18 22:00:52 +00:00
|
|
|
self.tid += 1
|
|
|
|
self._begin()
|
|
|
|
|
|
|
|
def delete_bands(self):
|
2010-10-19 19:08:42 +00:00
|
|
|
for b in self.to_delete:
|
2010-05-26 19:41:55 +00:00
|
|
|
objects = self.cursor.execute('SELECT ns, id, offset, size '
|
2010-10-17 20:04:50 +00:00
|
|
|
'FROM objects WHERE band=? ORDER BY offset',
|
|
|
|
(b,)).fetchall()
|
2010-05-26 19:41:55 +00:00
|
|
|
for o in objects:
|
|
|
|
band, offset, size = self.store_data(self.retrieve_data(b, *o[2:]))
|
|
|
|
self.cursor.execute('UPDATE objects SET band=?, offset=?, size=? '
|
2010-10-19 19:08:42 +00:00
|
|
|
'WHERE ns=? AND id=?', (band, offset, size,
|
|
|
|
Binary(o[0]), Binary(o[1])))
|
2010-10-26 19:25:25 +00:00
|
|
|
self.set_option('tid', self.tid + 1)
|
|
|
|
self.set_option('nextband', self.nextband)
|
2010-10-19 19:08:42 +00:00
|
|
|
self.cnx.commit()
|
|
|
|
for b in self.to_delete:
|
2010-10-17 20:04:50 +00:00
|
|
|
os.unlink(self.band_filename(b))
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
def rollback(self):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
self.cnx.rollback()
|
|
|
|
self._begin()
|
|
|
|
|
|
|
|
def get(self, ns, id):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
self.cursor.execute('SELECT band, offset, size FROM objects WHERE ns=? and id=?',
|
2010-10-19 19:08:42 +00:00
|
|
|
(Binary(ns), Binary(id)))
|
2010-05-26 19:41:55 +00:00
|
|
|
row = self.cursor.fetchone()
|
|
|
|
if row:
|
|
|
|
return self.retrieve_data(*row)
|
|
|
|
else:
|
|
|
|
raise self.DoesNotExist
|
|
|
|
|
2010-10-13 20:18:54 +00:00
|
|
|
def band_filename(self, band):
|
2010-10-17 20:04:50 +00:00
|
|
|
return os.path.join(self.path, 'bands', str(band / 1000), str(band))
|
2010-10-13 20:18:54 +00:00
|
|
|
|
2010-05-26 19:41:55 +00:00
|
|
|
def retrieve_data(self, band, offset, size):
|
2010-10-19 19:08:42 +00:00
|
|
|
if self.write_band == band:
|
|
|
|
self.write_fd.flush()
|
2010-10-17 20:42:52 +00:00
|
|
|
if self.read_band != band:
|
|
|
|
self.read_band = band
|
|
|
|
if self.read_fd:
|
|
|
|
self.read_fd.close()
|
|
|
|
self.read_fd = open(self.band_filename(band), 'rb')
|
|
|
|
self.read_fd.seek(offset)
|
|
|
|
return self.read_fd.read(size)
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
def store_data(self, data):
|
2010-10-17 20:42:52 +00:00
|
|
|
if self.write_band is None:
|
|
|
|
self.write_band = self.nextband
|
2010-10-15 17:56:38 +00:00
|
|
|
self.nextband += 1
|
2010-10-17 20:42:52 +00:00
|
|
|
if self.write_band % 1000 == 0:
|
2010-10-19 19:08:42 +00:00
|
|
|
path = os.path.join(self.path, 'bands', str(self.write_band / 1000))
|
|
|
|
if not os.path.exists(path):
|
|
|
|
os.mkdir(path)
|
2010-10-17 20:42:52 +00:00
|
|
|
assert not os.path.exists(self.band_filename(self.write_band))
|
|
|
|
self.write_fd = open(self.band_filename(self.write_band), 'ab')
|
|
|
|
band = self.write_band
|
|
|
|
offset = self.write_fd.tell()
|
|
|
|
self.write_fd.write(data)
|
2010-10-17 20:04:50 +00:00
|
|
|
if offset + len(data) > self.bandlimit:
|
2010-10-17 20:42:52 +00:00
|
|
|
self.write_band = None
|
|
|
|
return band, offset, len(data)
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
def put(self, ns, id, data):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
band, offset, size = self.store_data(data)
|
|
|
|
self.cursor.execute('INSERT INTO objects (ns, id, band, offset, size) '
|
|
|
|
'VALUES(?, ?, ?, ?, ?)',
|
2010-10-19 19:08:42 +00:00
|
|
|
(Binary(ns), Binary(id), band, offset, size))
|
2010-05-26 19:41:55 +00:00
|
|
|
except sqlite3.IntegrityError:
|
|
|
|
raise self.AlreadyExists
|
|
|
|
|
|
|
|
def delete(self, ns, id):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
self.cursor.execute('SELECT band FROM objects WHERE ns=? and id=?',
|
2010-10-19 19:08:42 +00:00
|
|
|
(Binary(ns), Binary(id)))
|
2010-05-26 19:41:55 +00:00
|
|
|
row = self.cursor.fetchone()
|
|
|
|
if not row:
|
|
|
|
raise self.DoesNotExist
|
|
|
|
self.cursor.execute('DELETE FROM objects WHERE ns=? AND id=?',
|
2010-10-19 19:08:42 +00:00
|
|
|
(Binary(ns), Binary(id)))
|
2010-05-26 19:41:55 +00:00
|
|
|
self.to_delete.add(row[0])
|
|
|
|
|
|
|
|
def list(self, ns, prefix='', marker=None, max_keys=1000000):
|
|
|
|
"""
|
|
|
|
"""
|
2010-10-19 19:08:42 +00:00
|
|
|
sql = 'SELECT id FROM objects WHERE ns=:ns'
|
|
|
|
args = dict(ns=Binary(ns))
|
2010-05-26 19:41:55 +00:00
|
|
|
if prefix:
|
2010-10-19 19:08:42 +00:00
|
|
|
args['prefix'] = Binary(prefix)
|
|
|
|
args['end'] = Binary(prefix + chr(255))
|
|
|
|
sql += ' AND id >= :prefix AND id < :end'
|
2010-05-26 19:41:55 +00:00
|
|
|
if marker:
|
2010-10-19 19:08:42 +00:00
|
|
|
sql += ' AND id >= :marker'
|
|
|
|
args['marker'] = Binary(marker)
|
|
|
|
for row in self.cursor.execute(sql + ' LIMIT ' + str(max_keys), args):
|
|
|
|
yield str(row[0])
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
|
2010-10-26 19:25:25 +00:00
|
|
|
class StoreTestCase(unittest.TestCase):
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.tmppath = tempfile.mkdtemp()
|
2010-10-26 19:48:43 +00:00
|
|
|
self.store = Store(os.path.join(self.tmppath, 'store'), create=True)
|
2010-05-26 19:41:55 +00:00
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
shutil.rmtree(self.tmppath)
|
|
|
|
|
|
|
|
def test1(self):
|
|
|
|
self.assertEqual(self.store.tid, 0)
|
|
|
|
self.assertEqual(self.store.state, self.store.OPEN)
|
|
|
|
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
|
|
|
|
self.assertRaises(self.store.AlreadyExists, lambda: self.store.put('SOMENS', 'SOMEID', 'SOMEDATA'))
|
|
|
|
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
|
|
|
|
self.store.rollback()
|
|
|
|
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
|
|
|
|
self.assertEqual(self.store.tid, 0)
|
|
|
|
|
|
|
|
def test2(self):
|
|
|
|
self.assertEqual(self.store.tid, 0)
|
|
|
|
self.assertEqual(self.store.state, self.store.OPEN)
|
|
|
|
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
|
|
|
|
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
|
|
|
|
self.store.commit()
|
|
|
|
self.assertEqual(self.store.tid, 1)
|
|
|
|
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
|
|
|
|
self.store.delete('SOMENS', 'SOMEID')
|
|
|
|
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
|
|
|
|
self.store.rollback()
|
|
|
|
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
|
|
|
|
self.store.delete('SOMENS', 'SOMEID')
|
|
|
|
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
|
|
|
|
self.store.commit()
|
|
|
|
self.assertEqual(self.store.tid, 2)
|
|
|
|
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
|
|
|
|
|
|
|
|
def test_list(self):
|
|
|
|
self.store.put('SOMENS', 'SOMEID12', 'SOMEDATA')
|
|
|
|
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
|
|
|
|
self.store.put('SOMENS', 'SOMEID1', 'SOMEDATA')
|
|
|
|
self.store.put('SOMENS', 'SOMEID123', 'SOMEDATA')
|
|
|
|
self.store.commit()
|
|
|
|
self.assertEqual(list(self.store.list('SOMENS', max_keys=3)),
|
|
|
|
['SOMEID', 'SOMEID1', 'SOMEID12'])
|
|
|
|
self.assertEqual(list(self.store.list('SOMENS', marker='SOMEID12')),
|
|
|
|
['SOMEID12', 'SOMEID123'])
|
|
|
|
self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', max_keys=2)),
|
|
|
|
['SOMEID1', 'SOMEID12'])
|
|
|
|
self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', marker='SOMEID12')),
|
|
|
|
['SOMEID12', 'SOMEID123'])
|
|
|
|
|
|
|
|
|
2010-10-19 19:08:42 +00:00
|
|
|
def suite():
|
2010-10-26 19:25:25 +00:00
|
|
|
return unittest.TestLoader().loadTestsFromTestCase(StoreTestCase)
|
2010-10-19 19:08:42 +00:00
|
|
|
|
2010-05-26 19:41:55 +00:00
|
|
|
if __name__ == '__main__':
|
|
|
|
unittest.main()
|