mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-03 05:35:58 +00:00
An experimental sqlite based storage implementation.
This commit is contained in:
parent
3d89bf7eac
commit
62a4a37544
4 changed files with 172 additions and 13 deletions
|
@ -5,8 +5,11 @@
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
|
|
||||||
from chunkifier import chunkify
|
from chunkifier import chunkify
|
||||||
from cache import Cache
|
from cache import Cache, NS_ARCHIVES, NS_CHUNKS
|
||||||
from store import Store, NS_ARCHIVES, NS_CHUNKS, CHUNK_SIZE
|
from sqlitestore import SqliteStore
|
||||||
|
|
||||||
|
|
||||||
|
CHUNK_SIZE = 256 * 1024
|
||||||
|
|
||||||
|
|
||||||
class Archive(object):
|
class Archive(object):
|
||||||
|
@ -202,7 +205,7 @@ def run(self):
|
||||||
help="Display archive statistics", metavar="ARCHIVE")
|
help="Display archive statistics", metavar="ARCHIVE")
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
if options.store:
|
if options.store:
|
||||||
self.store = Store(options.store)
|
self.store = SqliteStore(options.store)
|
||||||
else:
|
else:
|
||||||
parser.error('No store path specified')
|
parser.error('No store path specified')
|
||||||
self.cache = Cache(self.store)
|
self.cache = Cache(self.store)
|
||||||
|
|
|
@ -5,7 +5,9 @@
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from chunkifier import checksum
|
from chunkifier import checksum
|
||||||
from store import Store, NS_ARCHIVES, NS_CHUNKS
|
|
||||||
|
NS_ARCHIVES = 'ARCHIVES'
|
||||||
|
NS_CHUNKS = 'CHUNKS'
|
||||||
|
|
||||||
|
|
||||||
class Cache(object):
|
class Cache(object):
|
||||||
|
@ -46,6 +48,8 @@ def init(self):
|
||||||
return
|
return
|
||||||
print 'Recreating cache...'
|
print 'Recreating cache...'
|
||||||
for id in self.store.list(NS_ARCHIVES):
|
for id in self.store.list(NS_ARCHIVES):
|
||||||
|
|
||||||
|
|
||||||
archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, id)))
|
archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, id)))
|
||||||
self.archives.append(archive['name'])
|
self.archives.append(archive['name'])
|
||||||
for id, sum, csize, osize in archive['chunks']:
|
for id, sum, csize, osize in archive['chunks']:
|
||||||
|
@ -56,7 +60,7 @@ def init(self):
|
||||||
print 'done'
|
print 'done'
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
assert self.store.state == Store.OPEN
|
assert self.store.state == self.store.OPEN
|
||||||
print 'saving cache'
|
print 'saving cache'
|
||||||
data = {'uuid': self.store.uuid,
|
data = {'uuid': self.store.uuid,
|
||||||
'chunkmap': self.chunkmap, 'summap': self.summap,
|
'chunkmap': self.chunkmap, 'summap': self.summap,
|
||||||
|
@ -81,7 +85,7 @@ def add_chunk(self, data):
|
||||||
return self.init_chunk(id, sum, csize, osize)
|
return self.init_chunk(id, sum, csize, osize)
|
||||||
|
|
||||||
def init_chunk(self, id, sum, csize, osize):
|
def init_chunk(self, id, sum, csize, osize):
|
||||||
self.chunkmap[id] = (1, sum, osize, csize)
|
self.chunkmap[id] = (1, sum, csize, osize)
|
||||||
self.summap[sum] = self.summap.get(sum, 0) + 1
|
self.summap[sum] = self.summap.get(sum, 0) + 1
|
||||||
return id, sum, csize, osize
|
return id, sum, csize, osize
|
||||||
|
|
||||||
|
@ -91,7 +95,7 @@ def seen_chunk(self, id):
|
||||||
|
|
||||||
def chunk_incref(self, id):
|
def chunk_incref(self, id):
|
||||||
count, sum, csize, osize = self.chunkmap[id]
|
count, sum, csize, osize = self.chunkmap[id]
|
||||||
self.chunkmap[id] = (count + 1, sum, osize, csize)
|
self.chunkmap[id] = (count + 1, sum, csize, osize)
|
||||||
self.summap[sum] += 1
|
self.summap[sum] += 1
|
||||||
return id, sum, csize, osize
|
return id, sum, csize, osize
|
||||||
|
|
||||||
|
|
|
@ -7,11 +7,6 @@
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
CHUNK_SIZE = 256 * 1024
|
|
||||||
NS_ARCHIVES = 'ARCHIVES'
|
|
||||||
NS_CHUNKS = 'CHUNKS'
|
|
||||||
|
|
||||||
|
|
||||||
class Store(object):
|
class Store(object):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
|
@ -33,7 +28,7 @@ def __init__(self, path):
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
self.create(path)
|
self.create(path)
|
||||||
self.open(path)
|
self.open(path)
|
||||||
|
|
||||||
def create(self, path):
|
def create(self, path):
|
||||||
os.mkdir(path)
|
os.mkdir(path)
|
||||||
open(os.path.join(path, 'version'), 'wb').write(self.VERSION)
|
open(os.path.join(path, 'version'), 'wb').write(self.VERSION)
|
157
dedupestore/sqlitestore.py
Normal file
157
dedupestore/sqlitestore.py
Normal file
|
@ -0,0 +1,157 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import unittest
|
||||||
|
import sqlite3
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
class SqliteStore(object):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
|
||||||
|
class DoesNotExist(KeyError):
|
||||||
|
""""""
|
||||||
|
|
||||||
|
class AlreadyExists(KeyError):
|
||||||
|
""""""
|
||||||
|
|
||||||
|
IDLE = 'Idle'
|
||||||
|
OPEN = 'Open'
|
||||||
|
ACTIVE = 'Active'
|
||||||
|
VERSION = 'DEDUPESTORE VERSION 1'
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
if not os.path.exists(path):
|
||||||
|
self.create(path)
|
||||||
|
self.cnx = sqlite3.connect(path)
|
||||||
|
self.cursor = self.cnx.cursor()
|
||||||
|
self.uuid, self.tid = self.cursor.execute('SELECT uuid, tid FROM system').fetchone()
|
||||||
|
self.state = self.OPEN
|
||||||
|
|
||||||
|
def create(self, path):
|
||||||
|
cnx = sqlite3.connect(path)
|
||||||
|
cnx.execute('PRAGMA auto_vacuum=full')
|
||||||
|
cnx.execute('CREATE TABLE objects(ns TEXT NOT NULL, id NOT NULL, data NOT NULL)')
|
||||||
|
cnx.execute('CREATE TABLE system(uuid NOT NULL, tid NOT NULL)')
|
||||||
|
cnx.execute('INSERT INTO system VALUES(?,?)', (uuid.uuid1().hex, 0))
|
||||||
|
cnx.execute('CREATE UNIQUE INDEX objects_pk ON objects(ns, id)')
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.cnx.close()
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
self.cursor.execute('UPDATE system SET tid=tid+1')
|
||||||
|
import time
|
||||||
|
t = time.time()
|
||||||
|
self.cnx.commit()
|
||||||
|
print time.time() - t
|
||||||
|
self.tid += 1
|
||||||
|
|
||||||
|
def rollback(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
self.cnx.rollback()
|
||||||
|
|
||||||
|
def get(self, ns, id):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
self.cursor.execute('SELECT data FROM objects WHERE ns=? and id=?',
|
||||||
|
(ns.encode('hex'), id.encode('hex')))
|
||||||
|
row = self.cursor.fetchone()
|
||||||
|
if row:
|
||||||
|
return str(row[0])
|
||||||
|
else:
|
||||||
|
raise self.DoesNotExist
|
||||||
|
|
||||||
|
def put(self, ns, id, data):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.cursor.execute('INSERT INTO objects (ns, id, data) '
|
||||||
|
'VALUES(?, ?, ?)',
|
||||||
|
(ns.encode('hex'), id.encode('hex'),
|
||||||
|
sqlite3.Binary(data)))
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
raise self.AlreadyExists
|
||||||
|
|
||||||
|
def delete(self, ns, id):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
self.cursor.execute('DELETE FROM objects WHERE ns=? AND id=?',
|
||||||
|
(ns.encode('hex'), id.encode('hex')))
|
||||||
|
|
||||||
|
def list(self, ns, prefix='', marker=None, max_keys=1000000):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
condition = ''
|
||||||
|
if prefix:
|
||||||
|
condition += ' AND id LIKE :prefix'
|
||||||
|
if marker:
|
||||||
|
condition += ' AND id >= :marker'
|
||||||
|
args = dict(ns=ns.encode('hex'), prefix=prefix.encode('hex') + '%',
|
||||||
|
marker=marker and marker.encode('hex'))
|
||||||
|
for row in self.cursor.execute('SELECT id FROM objects WHERE '
|
||||||
|
'ns=:ns ' + condition + ' LIMIT ' + str(max_keys),
|
||||||
|
args):
|
||||||
|
yield row[0].decode('hex')
|
||||||
|
|
||||||
|
|
||||||
|
class SqliteStoreTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.tmppath = tempfile.mkdtemp()
|
||||||
|
self.store = SqliteStore(os.path.join(self.tmppath, 'store'))
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
shutil.rmtree(self.tmppath)
|
||||||
|
|
||||||
|
def test1(self):
|
||||||
|
self.assertEqual(self.store.tid, 0)
|
||||||
|
self.assertEqual(self.store.state, self.store.OPEN)
|
||||||
|
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
|
||||||
|
self.assertRaises(self.store.AlreadyExists, lambda: self.store.put('SOMENS', 'SOMEID', 'SOMEDATA'))
|
||||||
|
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
|
||||||
|
self.store.rollback()
|
||||||
|
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
|
||||||
|
self.assertEqual(self.store.tid, 0)
|
||||||
|
|
||||||
|
def test2(self):
|
||||||
|
self.assertEqual(self.store.tid, 0)
|
||||||
|
self.assertEqual(self.store.state, self.store.OPEN)
|
||||||
|
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
|
||||||
|
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
|
||||||
|
self.store.commit()
|
||||||
|
self.assertEqual(self.store.tid, 1)
|
||||||
|
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
|
||||||
|
self.store.delete('SOMENS', 'SOMEID')
|
||||||
|
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
|
||||||
|
self.store.rollback()
|
||||||
|
self.assertEqual(self.store.get('SOMENS', 'SOMEID'), 'SOMEDATA')
|
||||||
|
self.store.delete('SOMENS', 'SOMEID')
|
||||||
|
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
|
||||||
|
self.store.commit()
|
||||||
|
self.assertEqual(self.store.tid, 2)
|
||||||
|
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get('SOMENS', 'SOMEID'))
|
||||||
|
|
||||||
|
def test_list(self):
|
||||||
|
self.store.put('SOMENS', 'SOMEID12', 'SOMEDATA')
|
||||||
|
self.store.put('SOMENS', 'SOMEID', 'SOMEDATA')
|
||||||
|
self.store.put('SOMENS', 'SOMEID1', 'SOMEDATA')
|
||||||
|
self.store.put('SOMENS', 'SOMEID123', 'SOMEDATA')
|
||||||
|
self.store.commit()
|
||||||
|
self.assertEqual(list(self.store.list('SOMENS', max_keys=3)),
|
||||||
|
['SOMEID', 'SOMEID1', 'SOMEID12'])
|
||||||
|
self.assertEqual(list(self.store.list('SOMENS', marker='SOMEID12')),
|
||||||
|
['SOMEID12', 'SOMEID123'])
|
||||||
|
self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', max_keys=2)),
|
||||||
|
['SOMEID1', 'SOMEID12'])
|
||||||
|
self.assertEqual(list(self.store.list('SOMENS', prefix='SOMEID1', marker='SOMEID12')),
|
||||||
|
['SOMEID12', 'SOMEID123'])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in a new issue