borg/darc/store.py

363 lines
13 KiB
Python
Raw Normal View History

2011-06-23 20:47:51 +00:00
from __future__ import with_statement
from ConfigParser import RawConfigParser
import errno
import fcntl
2010-05-26 19:41:55 +00:00
import os
import msgpack
2010-05-26 19:41:55 +00:00
import shutil
import struct
import tempfile
2010-05-26 19:41:55 +00:00
import unittest
2010-12-17 21:13:10 +00:00
from zlib import crc32
2010-05-26 19:41:55 +00:00
from .hashindex import NSIndex
from .helpers import IntegrityError, deferrable
2010-12-17 21:13:10 +00:00
from .lrucache import LRUCache
2010-10-19 19:08:42 +00:00
2010-05-26 19:41:55 +00:00
2010-10-26 19:25:25 +00:00
class Store(object):
"""Filesystem based transactional key value store
On disk layout:
dir/README
dir/config
2011-08-18 20:23:05 +00:00
dir/data/<X / SEGMENTS_PER_DIR>/<X>
dir/segments
dir/index
2010-05-26 19:41:55 +00:00
"""
2011-08-18 20:23:05 +00:00
DEFAULT_MAX_SEGMENT_SIZE = 5 * 1024 * 1024
DEFAULT_SEGMENTS_PER_DIR = 10000
2010-05-26 19:41:55 +00:00
class DoesNotExist(KeyError):
"""Requested key does not exist"""
2010-05-26 19:41:55 +00:00
2010-10-26 19:48:43 +00:00
def __init__(self, path, create=False):
self.txn_active = False
if create:
2010-05-26 19:41:55 +00:00
self.create(path)
2010-10-15 17:56:38 +00:00
self.open(path)
2010-05-26 19:41:55 +00:00
def create(self, path):
"""Create a new empty store at `path`
"""
2010-10-26 19:48:43 +00:00
if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
raise Exception('Path "%s" already exists' % path)
if not os.path.exists(path):
os.mkdir(path)
with open(os.path.join(path, 'README'), 'wb') as fd:
fd.write('This is a DARC store')
2011-08-18 20:23:05 +00:00
os.mkdir(os.path.join(path, 'data'))
config = RawConfigParser()
config.add_section('store')
config.set('store', 'version', '1')
2011-08-18 20:23:05 +00:00
config.set('store', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
config.set('store', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
config.set('store', 'next_segment', '0')
config.add_section('meta')
config.set('meta', 'manifest', '')
config.set('meta', 'id', os.urandom(32).encode('hex'))
NSIndex.create(os.path.join(path, 'index'))
2011-08-18 20:23:05 +00:00
self.write_dict(os.path.join(path, 'segments'), {})
with open(os.path.join(path, 'config'), 'w') as fd:
config.write(fd)
def open(self, path):
self.path = path
if not os.path.isdir(path):
raise Exception('%s Does not look like a darc store' % path)
self.lock_fd = open(os.path.join(path, 'README'), 'r+')
fcntl.flock(self.lock_fd, fcntl.LOCK_EX)
2010-12-21 20:29:09 +00:00
self.rollback()
def read_dict(self, filename):
with open(filename, 'rb') as fd:
return msgpack.unpackb(fd.read())
def write_dict(self, filename, d):
2011-08-16 18:40:24 +00:00
with open(filename+'.tmp', 'wb') as fd:
fd.write(msgpack.packb(d))
2011-08-16 18:40:24 +00:00
os.rename(filename+'.tmp', filename)
2011-08-18 20:23:05 +00:00
def delete_segments(self):
delete_path = os.path.join(self.path, 'delete')
2010-12-19 11:46:42 +00:00
if os.path.exists(delete_path):
2011-08-18 20:23:05 +00:00
segments = self.read_dict(os.path.join(self.path, 'segments'))
for segment in self.read_dict(delete_path):
assert segments.pop(segment, 0) == 0
self.io.delete_segment(segment, missing_ok=True)
self.write_dict(os.path.join(self.path, 'segments'), segments)
2010-12-19 11:46:42 +00:00
def begin_txn(self):
txn_dir = os.path.join(self.path, 'txn.tmp')
# Initialize transaction snapshot
os.mkdir(txn_dir)
shutil.copy(os.path.join(self.path, 'config'), txn_dir)
shutil.copy(os.path.join(self.path, 'index'), txn_dir)
2011-08-18 20:23:05 +00:00
shutil.copy(os.path.join(self.path, 'segments'), txn_dir)
os.rename(os.path.join(self.path, 'txn.tmp'),
os.path.join(self.path, 'txn.active'))
self.compact = set()
self.txn_active = True
2010-05-26 19:41:55 +00:00
def close(self):
self.rollback()
2010-10-15 17:56:38 +00:00
self.lock_fd.close()
2010-05-26 19:41:55 +00:00
def commit(self, meta=None):
"""Commit transaction
2010-05-26 19:41:55 +00:00
"""
meta = meta or self.meta
2011-08-18 20:23:05 +00:00
self.compact_segments()
self.io.close()
2011-08-18 20:23:05 +00:00
self.config.set('store', 'next_segment', self.io.segment + 1)
self.config.remove_section('meta')
self.config.add_section('meta')
for k, v in meta.items():
self.config.set('meta', k, v)
with open(os.path.join(self.path, 'config'), 'w') as fd:
self.config.write(fd)
self.index.flush()
2011-08-18 20:23:05 +00:00
self.write_dict(os.path.join(self.path, 'segments'), self.segments)
# If we crash before this line, the transaction will be
# rolled back by open()
os.rename(os.path.join(self.path, 'txn.active'),
os.path.join(self.path, 'txn.commit'))
self.rollback()
2011-08-18 20:23:05 +00:00
def compact_segments(self):
"""Compact sparse segments by copying data into new segments
2010-12-19 11:46:42 +00:00
"""
if not self.compact:
return
2011-08-18 20:23:05 +00:00
self.io.close_segment()
def lookup(key):
2011-08-18 20:23:05 +00:00
return self.index.get(key, (-1, -1))[0] == segment
segments = self.segments
for segment in self.compact:
if segments[segment] > 0:
for key, data in self.io.iter_objects(segment, lookup):
new_segment, offset = self.io.write(key, data)
self.index[key] = new_segment, offset
segments.setdefault(new_segment, 0)
segments[new_segment] += 1
segments[segment] -= 1
self.write_dict(os.path.join(self.path, 'delete'), tuple(self.compact))
2010-05-26 19:41:55 +00:00
def rollback(self):
"""
"""
# Commit any half committed transaction
if os.path.exists(os.path.join(self.path, 'txn.commit')):
2011-08-18 20:23:05 +00:00
self.delete_segments()
os.rename(os.path.join(self.path, 'txn.commit'),
os.path.join(self.path, 'txn.tmp'))
2011-08-16 18:40:24 +00:00
delete_path = os.path.join(self.path, 'delete')
if os.path.exists(delete_path):
os.unlink(delete_path)
# Roll back active transaction
txn_dir = os.path.join(self.path, 'txn.active')
if os.path.exists(txn_dir):
shutil.copy(os.path.join(txn_dir, 'config'), self.path)
shutil.copy(os.path.join(txn_dir, 'index'), self.path)
2011-08-18 20:23:05 +00:00
shutil.copy(os.path.join(txn_dir, 'segments'), self.path)
os.rename(txn_dir, os.path.join(self.path, 'txn.tmp'))
# Remove partially removed transaction
if os.path.exists(os.path.join(self.path, 'txn.tmp')):
shutil.rmtree(os.path.join(self.path, 'txn.tmp'))
self.index = NSIndex(os.path.join(self.path, 'index'))
2011-08-18 20:23:05 +00:00
self.segments = self.read_dict(os.path.join(self.path, 'segments'))
self.config = RawConfigParser()
self.config.read(os.path.join(self.path, 'config'))
if self.config.getint('store', 'version') != 1:
raise Exception('%s Does not look like a darc store')
2011-08-18 20:23:05 +00:00
next_segment = self.config.getint('store', 'next_segment')
max_segment_size = self.config.getint('store', 'max_segment_size')
segments_per_dir = self.config.getint('store', 'segments_per_dir')
self.meta = dict(self.config.items('meta'))
2011-08-18 20:23:05 +00:00
self.io = SegmentIO(self.path, next_segment, max_segment_size, segments_per_dir)
self.io.cleanup()
self.txn_active = False
2011-07-17 20:31:37 +00:00
@deferrable
def get(self, id):
try:
2011-08-18 20:23:05 +00:00
segment, offset = self.index[id]
return self.io.read(segment, offset, id)
except KeyError:
2010-05-26 19:41:55 +00:00
raise self.DoesNotExist
2011-07-17 20:31:37 +00:00
@deferrable
def put(self, id, data):
if not self.txn_active:
self.begin_txn()
try:
2011-08-18 20:23:05 +00:00
segment, _ = self.index[id]
self.segments[segment] -= 1
self.compact.add(segment)
except KeyError:
pass
2011-08-18 20:23:05 +00:00
segment, offset = self.io.write(id, data)
self.segments.setdefault(segment, 0)
self.segments[segment] += 1
self.index[id] = segment, offset
2010-05-26 19:41:55 +00:00
2011-07-17 20:31:37 +00:00
@deferrable
def delete(self, id):
if not self.txn_active:
self.begin_txn()
try:
2011-08-18 20:23:05 +00:00
segment, offset = self.index.pop(id)
self.segments[segment] -= 1
self.compact.add(segment)
except KeyError:
2010-05-26 19:41:55 +00:00
raise self.DoesNotExist
def flush_rpc(self, *args):
2011-07-17 21:53:23 +00:00
pass
2011-08-18 20:23:05 +00:00
class SegmentIO(object):
header_fmt = struct.Struct('<IBI32s')
assert header_fmt.size == 41
2011-08-18 20:23:05 +00:00
def __init__(self, path, next_segment, limit, segments_per_dir, capacity=100):
self.path = path
self.fds = LRUCache(capacity)
2011-08-18 20:23:05 +00:00
self.segment = next_segment
self.limit = limit
2011-08-18 20:23:05 +00:00
self.segments_per_dir = segments_per_dir
self.offset = 0
def close(self):
2011-08-18 20:23:05 +00:00
for segment in self.fds.keys():
self.fds.pop(segment).close()
self.fds = None # Just to make sure we're disabled
2010-12-19 11:46:42 +00:00
def cleanup(self):
2011-08-18 20:23:05 +00:00
"""Delete segment files left by aborted transactions
2010-12-19 11:46:42 +00:00
"""
2011-08-18 20:23:05 +00:00
segment = self.segment
2010-12-19 11:46:42 +00:00
while True:
2011-08-18 20:23:05 +00:00
filename = self.segment_filename(segment)
2010-12-19 11:46:42 +00:00
if not os.path.exists(filename):
break
os.unlink(filename)
2011-08-18 20:23:05 +00:00
segment += 1
2010-12-19 11:46:42 +00:00
2011-08-18 20:23:05 +00:00
def segment_filename(self, segment):
return os.path.join(self.path, 'data', str(segment / self.segments_per_dir), str(segment))
2011-08-18 20:23:05 +00:00
def get_fd(self, segment, write=False):
try:
2011-08-18 20:23:05 +00:00
return self.fds[segment]
except KeyError:
2011-08-18 20:23:05 +00:00
if write and segment % self.segments_per_dir == 0:
dirname = os.path.join(self.path, 'data', str(segment / self.segments_per_dir))
if not os.path.exists(dirname):
os.mkdir(dirname)
2011-08-18 20:23:05 +00:00
fd = open(self.segment_filename(segment), write and 'w+' or 'rb')
self.fds[segment] = fd
return fd
2011-08-18 20:23:05 +00:00
def delete_segment(self, segment, missing_ok=False):
try:
2011-08-18 20:23:05 +00:00
os.unlink(self.segment_filename(segment))
except OSError, e:
if not missing_ok or e.errno != errno.ENOENT:
raise
2011-08-18 20:23:05 +00:00
def read(self, segment, offset, id):
fd = self.get_fd(segment)
fd.seek(offset)
data = fd.read(self.header_fmt.size)
size, magic, hash, id_ = self.header_fmt.unpack(data)
if magic != 0 or id != id_:
2011-08-18 20:23:05 +00:00
raise IntegrityError('Invalid segment entry header')
2010-12-17 21:13:10 +00:00
data = fd.read(size - self.header_fmt.size)
2011-06-23 20:47:51 +00:00
if crc32(data) & 0xffffffff != hash:
2011-08-18 20:23:05 +00:00
raise IntegrityError('Segment checksum mismatch')
2010-12-17 21:13:10 +00:00
return data
2011-08-18 20:23:05 +00:00
def iter_objects(self, segment, lookup):
fd = self.get_fd(segment)
fd.seek(0)
2011-08-18 20:23:05 +00:00
if fd.read(8) != 'DSEGMENT':
raise IntegrityError('Invalid segment header')
offset = 8
data = fd.read(self.header_fmt.size)
while data:
size, magic, hash, key = self.header_fmt.unpack(data)
2011-08-06 11:01:58 +00:00
if magic != 0:
2011-08-18 20:23:05 +00:00
raise IntegrityError('Unknown segment entry header')
offset += size
if lookup(key):
2010-12-17 21:13:10 +00:00
data = fd.read(size - self.header_fmt.size)
2011-06-23 20:47:51 +00:00
if crc32(data) & 0xffffffff != hash:
2011-08-18 20:23:05 +00:00
raise IntegrityError('Segment checksum mismatch')
yield key, data
else:
fd.seek(offset)
data = fd.read(self.header_fmt.size)
def write(self, id, data):
size = len(data) + self.header_fmt.size
if self.offset and self.offset + size > self.limit:
2011-08-18 20:23:05 +00:00
self.close_segment()
fd = self.get_fd(self.segment, write=True)
fd.seek(self.offset)
if self.offset == 0:
2011-08-18 20:23:05 +00:00
fd.write('DSEGMENT')
self.offset = 8
offset = self.offset
2011-06-23 20:47:51 +00:00
hash = crc32(data) & 0xffffffff
fd.write(self.header_fmt.pack(size, 0, hash, id))
fd.write(data)
self.offset += size
2011-08-18 20:23:05 +00:00
return self.segment, offset
2011-08-18 20:23:05 +00:00
def close_segment(self):
self.segment += 1
self.offset = 0
2010-05-26 19:41:55 +00:00
2010-10-26 19:25:25 +00:00
class StoreTestCase(unittest.TestCase):
2010-05-26 19:41:55 +00:00
def setUp(self):
self.tmppath = tempfile.mkdtemp()
2010-10-26 19:48:43 +00:00
self.store = Store(os.path.join(self.tmppath, 'store'), create=True)
2010-05-26 19:41:55 +00:00
def tearDown(self):
shutil.rmtree(self.tmppath)
def test1(self):
for x in range(100):
self.store.put('%-32d' % x, 'SOMEDATA')
key50 = '%-32d' % 50
self.assertEqual(self.store.get(key50), 'SOMEDATA')
self.store.delete(key50)
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get(key50))
2010-05-26 19:41:55 +00:00
self.store.commit()
self.store.close()
store2 = Store(os.path.join(self.tmppath, 'store'))
2010-05-26 19:41:55 +00:00
def test2(self):
"""Test multiple sequential transactions
"""
self.store.put('00000000000000000000000000000000', 'foo')
self.store.put('00000000000000000000000000000001', 'foo')
self.store.commit()
self.store.delete('00000000000000000000000000000000')
self.store.put('00000000000000000000000000000001', 'bar')
self.store.commit()
self.assertEqual(self.store.get('00000000000000000000000000000001'), 'bar')
2010-05-26 19:41:55 +00:00
2010-10-19 19:08:42 +00:00
def suite():
2010-10-26 19:25:25 +00:00
return unittest.TestLoader().loadTestsFromTestCase(StoreTestCase)
2010-10-19 19:08:42 +00:00
2010-05-26 19:41:55 +00:00
if __name__ == '__main__':
unittest.main()