borg/dedupestore/store.py

271 lines
9.7 KiB
Python
Raw Normal View History

2010-05-26 19:41:55 +00:00
import os
import tempfile
import shutil
import unittest
import sqlite3
2010-10-15 17:56:38 +00:00
import fcntl
2010-05-26 19:41:55 +00:00
2010-10-19 19:08:42 +00:00
Binary = sqlite3.Binary
2010-05-26 19:41:55 +00:00
2010-10-26 19:25:25 +00:00
class Store(object):
2010-05-26 19:41:55 +00:00
"""
"""
class DoesNotExist(KeyError):
""""""
class AlreadyExists(KeyError):
""""""
IDLE = 'Idle'
OPEN = 'Open'
ACTIVE = 'Active'
BAND_LIMIT = 1 * 1024 * 1024
2010-05-26 19:41:55 +00:00
2010-10-26 19:48:43 +00:00
def __init__(self, path, create=False):
2010-10-17 20:42:52 +00:00
self.read_fd = None
self.write_fd = None
2010-10-26 19:48:43 +00:00
if create:
2010-05-26 19:41:55 +00:00
self.create(path)
2010-10-15 17:56:38 +00:00
self.open(path)
2010-10-26 19:48:43 +00:00
def get_option(self, key):
return self.cursor.execute('SELECT value FROM system WHERE key=?', (key,)) \
.fetchone()[0]
def set_option(self, key, value):
return self.cursor.execute('UPDATE system SET value=? WHERE key=?',
(value, key))
2010-10-15 17:56:38 +00:00
def open(self, path):
if not os.path.isdir(path):
2010-10-26 19:48:43 +00:00
raise Exception('%s Does not look like a store' % path)
2010-10-15 17:56:38 +00:00
db_path = os.path.join(path, 'dedupestore.db')
if not os.path.exists(db_path):
raise Exception('%s Does not look like a store2')
self.lock_fd = open(os.path.join(path, 'lock'), 'w')
fcntl.flock(self.lock_fd, fcntl.LOCK_EX)
2010-05-26 19:41:55 +00:00
self.path = path
2010-10-15 17:56:38 +00:00
self.cnx = sqlite3.connect(db_path)
2010-10-19 19:08:42 +00:00
self.cnx.text_factory = str
2010-05-26 19:41:55 +00:00
self.cursor = self.cnx.cursor()
self._begin()
def _begin(self):
2010-10-17 20:42:52 +00:00
if self.read_fd:
self.read_fd.close()
self.read_fd = None
if self.write_fd:
self.write_fd.close()
self.write_fd = None
2010-10-26 19:25:25 +00:00
self.version = self.get_option('version')
self.id = self.get_option('id').decode('hex')
self.tid = self.get_option('tid')
self.nextband = self.get_option('nextband')
self.bandlimit = self.get_option('bandlimit')
assert self.version == 1
2010-05-26 19:41:55 +00:00
self.state = self.OPEN
2010-10-17 20:42:52 +00:00
self.read_band = None
self.write_band = None
2010-05-26 19:41:55 +00:00
self.to_delete = set()
2010-10-15 17:56:38 +00:00
band = self.nextband
while os.path.exists(self.band_filename(band)):
os.unlink(self.band_filename(band))
band += 1
2010-05-26 19:41:55 +00:00
def create(self, path):
2010-10-26 19:48:43 +00:00
if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
raise Exception('Path "%s" already exists' % path)
if not os.path.exists(path):
os.mkdir(path)
2010-05-26 19:41:55 +00:00
os.mkdir(os.path.join(path, 'bands'))
2010-10-15 17:56:38 +00:00
cnx = sqlite3.connect(os.path.join(path, 'dedupestore.db'))
2010-10-19 19:08:42 +00:00
cnx.execute('CREATE TABLE objects(ns BINARY NOT NULL, id BINARY NOT NULL, '
2010-05-26 19:41:55 +00:00
'band NOT NULL, offset NOT NULL, size NOT NULL)')
2010-10-18 22:00:52 +00:00
cnx.execute('CREATE UNIQUE INDEX objects_pk ON objects(ns, id)')
2010-10-26 19:25:25 +00:00
cnx.execute('CREATE TABLE system(key UNIQUE NOT NULL, value)')
cnx.executemany('INSERT INTO system VALUES(?, ?)',
(('id', os.urandom(32).encode('hex')),
('version', 1),
('tid', 0),
('nextband', 0),
('bandlimit', self.BAND_LIMIT)))
2010-10-18 22:00:52 +00:00
cnx.commit()
2010-05-26 19:41:55 +00:00
def close(self):
self.rollback()
2010-05-26 19:41:55 +00:00
self.cnx.close()
2010-10-15 17:56:38 +00:00
self.lock_fd.close()
os.unlink(os.path.join(self.path, 'lock'))
2010-05-26 19:41:55 +00:00
def commit(self):
"""
"""
self.band = None
2010-10-19 19:08:42 +00:00
self.delete_bands()
2010-10-18 22:00:52 +00:00
self.tid += 1
self._begin()
def delete_bands(self):
2010-10-19 19:08:42 +00:00
for b in self.to_delete:
2010-05-26 19:41:55 +00:00
objects = self.cursor.execute('SELECT ns, id, offset, size '
'FROM objects WHERE band=? ORDER BY offset',
(b,)).fetchall()
2010-05-26 19:41:55 +00:00
for o in objects:
band, offset, size = self.store_data(self.retrieve_data(b, *o[2:]))
self.cursor.execute('UPDATE objects SET band=?, offset=?, size=? '
2010-10-19 19:08:42 +00:00
'WHERE ns=? AND id=?', (band, offset, size,
Binary(o[0]), Binary(o[1])))
2010-10-26 19:25:25 +00:00
self.set_option('tid', self.tid + 1)
self.set_option('nextband', self.nextband)
2010-10-19 19:08:42 +00:00
self.cnx.commit()
for b in self.to_delete:
os.unlink(self.band_filename(b))
2010-05-26 19:41:55 +00:00
def rollback(self):
"""
"""
self.cnx.rollback()
self._begin()
def get(self, ns, id):
"""
"""
self.cursor.execute('SELECT band, offset, size FROM objects WHERE ns=? and id=?',
2010-10-19 19:08:42 +00:00
(Binary(ns), Binary(id)))
2010-05-26 19:41:55 +00:00
row = self.cursor.fetchone()
if row:
return self.retrieve_data(*row)
else:
raise self.DoesNotExist
def band_filename(self, band):
return os.path.join(self.path, 'bands', str(band / 1000), str(band))
2010-05-26 19:41:55 +00:00
def retrieve_data(self, band, offset, size):
2010-10-19 19:08:42 +00:00
if self.write_band == band:
self.write_fd.flush()
2010-10-17 20:42:52 +00:00
if self.read_band != band:
self.read_band = band
if self.read_fd:
self.read_fd.close()
self.read_fd = open(self.band_filename(band), 'rb')
self.read_fd.seek(offset)
return self.read_fd.read(size)
2010-05-26 19:41:55 +00:00
def store_data(self, data):
2010-10-17 20:42:52 +00:00
if self.write_band is None:
self.write_band = self.nextband
2010-10-15 17:56:38 +00:00
self.nextband += 1
2010-10-17 20:42:52 +00:00
if self.write_band % 1000 == 0:
2010-10-19 19:08:42 +00:00
path = os.path.join(self.path, 'bands', str(self.write_band / 1000))
if not os.path.exists(path):
os.mkdir(path)
2010-10-17 20:42:52 +00:00
assert not os.path.exists(self.band_filename(self.write_band))
self.write_fd = open(self.band_filename(self.write_band), 'ab')
band = self.write_band
offset = self.write_fd.tell()
self.write_fd.write(data)
if offset + len(data) > self.bandlimit:
2010-10-17 20:42:52 +00:00
self.write_band = None
return band, offset, len(data)
2010-05-26 19:41:55 +00:00
def put(self, ns, id, data):
"""
"""
try:
band, offset, size = self.store_data(data)
self.cursor.execute('INSERT INTO objects (ns, id, band, offset, size) '
'VALUES(?, ?, ?, ?, ?)',
2010-10-19 19:08:42 +00:00
(Binary(ns), Binary(id), band, offset, size))
2010-05-26 19:41:55 +00:00
except sqlite3.IntegrityError:
raise self.AlreadyExists
def delete(self, ns, id):
"""
"""
self.cursor.execute('SELECT band FROM objects WHERE ns=? and id=?',
2010-10-19 19:08:42 +00:00
(Binary(ns), Binary(id)))
2010-05-26 19:41:55 +00:00
row = self.cursor.fetchone()
if not row:
raise self.DoesNotExist
self.cursor.execute('DELETE FROM objects WHERE ns=? AND id=?',
2010-10-19 19:08:42 +00:00
(Binary(ns), Binary(id)))
2010-05-26 19:41:55 +00:00
self.to_delete.add(row[0])
def list(self, ns, prefix='', marker=None, max_keys=1000000):
"""
"""
2010-10-19 19:08:42 +00:00
sql = 'SELECT id FROM objects WHERE ns=:ns'
args = dict(ns=Binary(ns))
2010-05-26 19:41:55 +00:00
if prefix:
2010-10-19 19:08:42 +00:00
args['prefix'] = Binary(prefix)
args['end'] = Binary(prefix + chr(255))
sql += ' AND id >= :prefix AND id < :end'
2010-05-26 19:41:55 +00:00
if marker:
2010-10-19 19:08:42 +00:00
sql += ' AND id >= :marker'
args['marker'] = Binary(marker)
for row in self.cursor.execute(sql + ' LIMIT ' + str(max_keys), args):
yield str(row[0])
2010-05-26 19:41:55 +00:00
2010-10-26 19:25:25 +00:00
class StoreTestCase(unittest.TestCase):
2010-05-26 19:41:55 +00:00
def setUp(self):
self.tmppath = tempfile.mkdtemp()
2010-10-26 19:48:43 +00:00
self.store = Store(os.path.join(self.tmppath, 'store'), create=True)
2010-05-26 19:41:55 +00:00
def tearDown(self):
shutil.rmtree(self.tmppath)
def test1(self):
self.assertEqual(self.store.tid, 0)
self.assertEqual(self.store.state, self.store.OPEN)
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
self.assertRaises(self.store.AlreadyExists, lambda: self.store.put('SOMENS', 'SOMEID', 'SOMEDATA'))
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
self.store.rollback()
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
self.assertEqual(self.store.tid, 0)
def test2(self):
self.assertEqual(self.store.tid, 0)
self.assertEqual(self.store.state, self.store.OPEN)
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
self.store.commit()
self.assertEqual(self.store.tid, 1)
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
self.store.delete('SOMENS', 'SOMEID')
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
self.store.rollback()
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
self.store.delete('SOMENS', 'SOMEID')
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
self.store.commit()
self.assertEqual(self.store.tid, 2)
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
def test_list(self):
self.store.put('SOMENS', 'SOMEID12', 'SOMEDATA')
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
self.store.put('SOMENS', 'SOMEID1', 'SOMEDATA')
self.store.put('SOMENS', 'SOMEID123', 'SOMEDATA')
self.store.commit()
self.assertEqual(list(self.store.list('SOMENS', max_keys=3)),
['SOMEID', 'SOMEID1', 'SOMEID12'])
self.assertEqual(list(self.store.list('SOMENS', marker='SOMEID12')),
['SOMEID12', 'SOMEID123'])
self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', max_keys=2)),
['SOMEID1', 'SOMEID12'])
self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', marker='SOMEID12')),
['SOMEID12', 'SOMEID123'])
2010-10-19 19:08:42 +00:00
def suite():
2010-10-26 19:25:25 +00:00
return unittest.TestLoader().loadTestsFromTestCase(StoreTestCase)
2010-10-19 19:08:42 +00:00
2010-05-26 19:41:55 +00:00
if __name__ == '__main__':
unittest.main()