1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-30 19:21:17 +00:00

More bandstore optimizations

This commit is contained in:
Jonas Borgström 2010-10-19 21:08:42 +02:00
parent a1bbe57460
commit 5cb2d80cb5
4 changed files with 46 additions and 42 deletions

View file

@ -121,8 +121,6 @@ def extract(self, dest=None):
if hashlib.sha256(data).digest() != cid:
raise Exception('Invalid chunk checksum')
data = zlib.decompress(data)
# if hashlib.sha256(data).digest() != id:
# raise Exception('Invalid chunk checksum')
fd.write(data)
def verify(self):
@ -138,10 +136,6 @@ def verify(self):
if (hashlib.sha256(data).digest() != cid):
logging.error('%s ... ERROR', item['path'])
break
# if (hashlib.sha256(data).digest() != cid or
# hashlib.sha256(zlib.decompress(data)).digest() != id):
# logging.error('%s ... ERROR', item['path'])
# break
else:
logging.info('%s ... OK', item['path'])
@ -198,6 +192,7 @@ def process_file(self, path, st):
fd = open(path, 'rb')
except IOError, e:
logging.error(e)
return
with fd:
path = path.lstrip('/\\:')
logging.info(path)

View file

@ -1,4 +1,3 @@
#!/usr/bin/env python
import os
import tempfile
import shutil
@ -7,6 +6,8 @@
import uuid
import fcntl
Binary = sqlite3.Binary
class BandStore(object):
"""
@ -40,6 +41,7 @@ def open(self, path):
fcntl.flock(self.lock_fd, fcntl.LOCK_EX)
self.path = path
self.cnx = sqlite3.connect(db_path)
self.cnx.text_factory = str
self.cursor = self.cnx.cursor()
self._begin()
@ -62,17 +64,14 @@ def _begin(self):
while os.path.exists(self.band_filename(band)):
os.unlink(self.band_filename(band))
band += 1
self.delete_bands()
def create(self, path):
os.mkdir(path)
os.mkdir(os.path.join(path, 'bands'))
cnx = sqlite3.connect(os.path.join(path, 'dedupestore.db'))
cnx.execute('CREATE TABLE objects(ns TEXT NOT NULL, id NOT NULL, '
cnx.execute('CREATE TABLE objects(ns BINARY NOT NULL, id BINARY NOT NULL, '
'band NOT NULL, offset NOT NULL, size NOT NULL)')
cnx.execute('CREATE UNIQUE INDEX objects_pk ON objects(ns, id)')
cnx.execute('CREATE TABLE to_delete(band NOT NULL)')
cnx.execute('CREATE UNIQUE INDEX to_delete_pk ON to_delete(band)')
cnx.execute('CREATE TABLE system(uuid NOT NULL, tid NOT NULL, '
'nextband NOT NULL, version NOT NULL, bandlimit NOT NULL)')
cnx.execute('INSERT INTO system VALUES(?,?,?,?,?)',
@ -89,28 +88,24 @@ def commit(self):
"""
"""
self.band = None
self.cursor.executemany('INSERT INTO to_delete(band) VALUES(?)',
[[d] for d in self.to_delete])
self.cursor.execute('UPDATE system SET tid=tid+1, nextband=?',
(self.nextband,))
self.cnx.commit()
self.delete_bands()
self.tid += 1
self._begin()
def delete_bands(self):
self.cursor.execute('SELECT band FROM to_delete')
to_delete = [r[0] for r in self.cursor.fetchall()]
for b in to_delete:
for b in self.to_delete:
objects = self.cursor.execute('SELECT ns, id, offset, size '
'FROM objects WHERE band=? ORDER BY offset',
(b,)).fetchall()
for o in objects:
band, offset, size = self.store_data(self.retrieve_data(b, *o[2:]))
self.cursor.execute('UPDATE objects SET band=?, offset=?, size=? '
'WHERE ns=? AND id=?', (band, offset, size, o[0], o[1]))
self.cursor.execute('DELETE FROM to_delete WHERE band=?', (b,))
self.cursor.execute('UPDATE system SET nextband=?', (self.nextband,))
self.cnx.commit()
'WHERE ns=? AND id=?', (band, offset, size,
Binary(o[0]), Binary(o[1])))
self.cursor.execute('UPDATE system SET tid=tid+1, nextband=?',
(self.nextband,))
self.cnx.commit()
for b in self.to_delete:
os.unlink(self.band_filename(b))
def rollback(self):
@ -123,7 +118,7 @@ def get(self, ns, id):
"""
"""
self.cursor.execute('SELECT band, offset, size FROM objects WHERE ns=? and id=?',
(ns.encode('hex'), id.encode('hex')))
(Binary(ns), Binary(id)))
row = self.cursor.fetchone()
if row:
return self.retrieve_data(*row)
@ -134,6 +129,8 @@ def band_filename(self, band):
return os.path.join(self.path, 'bands', str(band / 1000), str(band))
def retrieve_data(self, band, offset, size):
if self.write_band == band:
self.write_fd.flush()
if self.read_band != band:
self.read_band = band
if self.read_fd:
@ -147,7 +144,9 @@ def store_data(self, data):
self.write_band = self.nextband
self.nextband += 1
if self.write_band % 1000 == 0:
os.mkdir(os.path.join(self.path, 'bands', str(self.write_band / 1000)))
path = os.path.join(self.path, 'bands', str(self.write_band / 1000))
if not os.path.exists(path):
os.mkdir(path)
assert not os.path.exists(self.band_filename(self.write_band))
self.write_fd = open(self.band_filename(self.write_band), 'ab')
band = self.write_band
@ -164,8 +163,7 @@ def put(self, ns, id, data):
band, offset, size = self.store_data(data)
self.cursor.execute('INSERT INTO objects (ns, id, band, offset, size) '
'VALUES(?, ?, ?, ?, ?)',
(ns.encode('hex'), id.encode('hex'),
band, offset, size))
(Binary(ns), Binary(id), band, offset, size))
except sqlite3.IntegrityError:
raise self.AlreadyExists
@ -173,28 +171,28 @@ def delete(self, ns, id):
"""
"""
self.cursor.execute('SELECT band FROM objects WHERE ns=? and id=?',
(ns.encode('hex'), id.encode('hex')))
(Binary(ns), Binary(id)))
row = self.cursor.fetchone()
if not row:
raise self.DoesNotExist
self.cursor.execute('DELETE FROM objects WHERE ns=? AND id=?',
(ns.encode('hex'), id.encode('hex')))
(Binary(ns), Binary(id)))
self.to_delete.add(row[0])
def list(self, ns, prefix='', marker=None, max_keys=1000000):
"""
"""
condition = ''
sql = 'SELECT id FROM objects WHERE ns=:ns'
args = dict(ns=Binary(ns))
if prefix:
condition += ' AND id LIKE :prefix'
args['prefix'] = Binary(prefix)
args['end'] = Binary(prefix + chr(255))
sql += ' AND id >= :prefix AND id < :end'
if marker:
condition += ' AND id >= :marker'
args = dict(ns=ns.encode('hex'), prefix=prefix.encode('hex') + '%',
marker=marker and marker.encode('hex'))
for row in self.cursor.execute('SELECT id FROM objects WHERE '
'ns=:ns ' + condition + ' LIMIT ' + str(max_keys),
args):
yield row[0].decode('hex')
sql += ' AND id >= :marker'
args['marker'] = Binary(marker)
for row in self.cursor.execute(sql + ' LIMIT ' + str(max_keys), args):
yield str(row[0])
class BandStoreTestCase(unittest.TestCase):
@ -250,5 +248,8 @@ def test_list(self):
['SOMEID12', 'SOMEID123'])
def suite():
return unittest.TestLoader().loadTestsFromTestCase(BandStoreTestCase)
if __name__ == '__main__':
unittest.main()

View file

@ -3,8 +3,8 @@
import zlib
import msgpack
NS_ARCHIVES = 'ARCHIVES'
NS_CHUNKS = 'CHUNKS'
NS_ARCHIVES = 'A'
NS_CHUNKS = 'C'
class Cache(object):

View file

@ -2,7 +2,9 @@
import shutil
import tempfile
import unittest
from archiver import Archiver
from .archiver import Archiver
from . import bandstore
class Test(unittest.TestCase):
@ -54,5 +56,11 @@ def test_symlinks(self):
self.assertEqual(os.readlink(os.path.join(dest, 'link')), '/tmp/somewhere')
def suite():
suite = unittest.TestSuite()
suite.addTest(unittest.TestLoader().loadTestsFromTestCase(Test))
suite.addTest(bandstore.suite())
return suite
if __name__ == '__main__':
unittest.main()
unittest.TextTestRunner(verbosity=2).run(suite())