borg/darc/store.py

449 lines
16 KiB
Python
Raw Normal View History

2011-06-23 20:47:51 +00:00
from __future__ import with_statement
from ConfigParser import RawConfigParser
import fcntl
2010-05-26 19:41:55 +00:00
import os
import re
2010-05-26 19:41:55 +00:00
import shutil
import struct
import tempfile
2010-05-26 19:41:55 +00:00
import unittest
2010-12-17 21:13:10 +00:00
from zlib import crc32
2010-05-26 19:41:55 +00:00
from .hashindex import NSIndex
from .helpers import IntegrityError, deferrable, read_msgpack, write_msgpack
2010-12-17 21:13:10 +00:00
from .lrucache import LRUCache
2010-10-19 19:08:42 +00:00
MAX_OBJECT_SIZE = 20 * 1024 * 1024
TAG_PUT = 0
TAG_DELETE = 1
TAG_COMMIT = 2
2010-05-26 19:41:55 +00:00
2010-10-26 19:25:25 +00:00
class Store(object):
"""Filesystem based transactional key value store
On disk layout:
dir/README
dir/config
2011-08-18 20:23:05 +00:00
dir/data/<X / SEGMENTS_PER_DIR>/<X>
dir/index.X
dir/hints.X
2010-05-26 19:41:55 +00:00
"""
2011-08-18 20:23:05 +00:00
DEFAULT_MAX_SEGMENT_SIZE = 5 * 1024 * 1024
DEFAULT_SEGMENTS_PER_DIR = 10000
2010-05-26 19:41:55 +00:00
class DoesNotExist(KeyError):
"""Requested key does not exist"""
2010-05-26 19:41:55 +00:00
2010-10-26 19:48:43 +00:00
def __init__(self, path, create=False):
if create:
2010-05-26 19:41:55 +00:00
self.create(path)
2010-10-15 17:56:38 +00:00
self.open(path)
2010-05-26 19:41:55 +00:00
def create(self, path):
"""Create a new empty store at `path`
"""
2010-10-26 19:48:43 +00:00
if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
raise Exception('Path "%s" already exists' % path)
if not os.path.exists(path):
os.mkdir(path)
with open(os.path.join(path, 'README'), 'wb') as fd:
fd.write('This is a DARC store')
2011-08-18 20:23:05 +00:00
os.mkdir(os.path.join(path, 'data'))
config = RawConfigParser()
config.add_section('store')
config.set('store', 'version', '1')
2011-08-18 20:23:05 +00:00
config.set('store', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
config.set('store', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
config.set('store', 'id', os.urandom(32).encode('hex'))
with open(os.path.join(path, 'config'), 'w') as fd:
config.write(fd)
def open(self, path):
self.head = None
self.path = path
if not os.path.isdir(path):
raise Exception('%s Does not look like a darc store' % path)
self.lock_fd = open(os.path.join(path, 'README'), 'r+')
fcntl.flock(self.lock_fd, fcntl.LOCK_EX)
self.config = RawConfigParser()
self.config.read(os.path.join(self.path, 'config'))
if self.config.getint('store', 'version') != 1:
raise Exception('%s Does not look like a darc store')
self.max_segment_size = self.config.getint('store', 'max_segment_size')
self.segments_per_dir = self.config.getint('store', 'segments_per_dir')
self.id = self.config.get('store', 'id').decode('hex')
2010-12-21 20:29:09 +00:00
self.rollback()
2010-05-26 19:41:55 +00:00
def close(self):
2010-10-15 17:56:38 +00:00
self.lock_fd.close()
2010-05-26 19:41:55 +00:00
def commit(self, rollback=True):
"""Commit transaction
2010-05-26 19:41:55 +00:00
"""
self.io.write_commit()
2011-08-18 20:23:05 +00:00
self.compact_segments()
self.write_index()
self.rollback()
def _available_indices(self, reverse=False):
names = [int(name[6:]) for name in os.listdir(self.path) if re.match('index\.\d+', name)]
names.sort(reverse=reverse)
return names
2011-09-05 19:30:21 +00:00
def open_index(self, head, read_only=False):
if head is None:
self.index = NSIndex.create(os.path.join(self.path, 'index.tmp'))
self.segments = {}
self.compact = set()
else:
2011-09-05 19:30:21 +00:00
if read_only:
self.index = NSIndex(os.path.join(self.path, 'index.%d') % head)
else:
shutil.copy(os.path.join(self.path, 'index.%d' % head),
os.path.join(self.path, 'index.tmp'))
self.index = NSIndex(os.path.join(self.path, 'index.tmp'))
hints = read_msgpack(os.path.join(self.path, 'hints.%d' % head))
if hints['version'] != 1:
raise ValueError('Unknown hints file version: %d' % hints['version'])
self.segments = hints['segments']
self.compact = set(hints['compact'])
def write_index(self):
hints = {'version': 1,
'segments': self.segments,
'compact': list(self.compact)}
write_msgpack(os.path.join(self.path, 'hints.%d' % self.io.head), hints)
self.index.flush()
os.rename(os.path.join(self.path, 'index.tmp'),
os.path.join(self.path, 'index.%d' % self.io.head))
# Remove old indices
current = '.%d' % self.io.head
for name in os.listdir(self.path):
if not name.startswith('index.') and not name.startswith('hints.'):
continue
if name.endswith(current):
continue
os.unlink(os.path.join(self.path, name))
2011-08-18 20:23:05 +00:00
def compact_segments(self):
"""Compact sparse segments by copying data into new segments
2010-12-19 11:46:42 +00:00
"""
if not self.compact:
return
def lookup(tag, key):
return tag == TAG_PUT and self.index.get(key, (-1, -1))[0] == segment
2011-08-18 20:23:05 +00:00
segments = self.segments
for segment in self.compact:
if segments[segment] > 0:
for tag, key, data in self.io.iter_objects(segment, lookup, include_data=True):
new_segment, offset = self.io.write_put(key, data)
2011-08-18 20:23:05 +00:00
self.index[key] = new_segment, offset
segments.setdefault(new_segment, 0)
segments[new_segment] += 1
segments[segment] -= 1
assert segments[segment] == 0
self.io.write_commit()
for segment in self.compact:
assert self.segments.pop(segment) == 0
self.io.delete_segment(segment)
self.compact = set()
def recover(self, path):
"""Recover missing index by replaying logs"""
start = None
available = self._available_indices()
if available:
start = available[-1]
self.open_index(start)
for segment, filename in self.io._segment_names():
if start is not None and segment <= start:
continue
self.segments[segment] = 0
for tag, key, offset in self.io.iter_objects(segment):
if tag == TAG_PUT:
try:
s, _ = self.index[key]
self.compact.add(s)
self.segments[s] -= 1
except KeyError:
pass
self.index[key] = segment, offset
self.segments[segment] += 1
elif tag == TAG_DELETE:
try:
s, _ = self.index.pop(key)
self.segments[s] -= 1
self.compact.add(s)
self.compact.add(segment)
except KeyError:
pass
if self.segments[segment] == 0:
self.compact.add(segment)
if self.io.head is not None:
self.write_index()
2010-05-26 19:41:55 +00:00
def rollback(self):
"""
"""
2011-09-05 19:30:21 +00:00
self._active_txn = False
self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
if self.io.head is not None and not os.path.exists(os.path.join(self.path, 'index.%d' % self.io.head)):
self.recover(self.path)
2011-09-05 19:30:21 +00:00
self.open_index(self.io.head, read_only=True)
2011-07-17 20:31:37 +00:00
@deferrable
def get(self, id):
try:
2011-08-18 20:23:05 +00:00
segment, offset = self.index[id]
return self.io.read(segment, offset, id)
except KeyError:
2010-05-26 19:41:55 +00:00
raise self.DoesNotExist
2011-07-17 20:31:37 +00:00
@deferrable
def put(self, id, data):
2011-09-05 19:30:21 +00:00
if not self._active_txn:
self._active_txn = True
self.open_index(self.io.head)
try:
2011-08-18 20:23:05 +00:00
segment, _ = self.index[id]
self.segments[segment] -= 1
self.compact.add(segment)
self.compact.add(self.io.write_delete(id))
except KeyError:
pass
segment, offset = self.io.write_put(id, data)
2011-08-18 20:23:05 +00:00
self.segments.setdefault(segment, 0)
self.segments[segment] += 1
self.index[id] = segment, offset
2010-05-26 19:41:55 +00:00
2011-07-17 20:31:37 +00:00
@deferrable
def delete(self, id):
2011-09-05 19:30:21 +00:00
if not self._active_txn:
self._active_txn = True
self.open_index(self.io.head)
try:
2011-08-18 20:23:05 +00:00
segment, offset = self.index.pop(id)
self.segments[segment] -= 1
self.compact.add(segment)
self.compact.add(self.io.write_delete(id))
except KeyError:
2010-05-26 19:41:55 +00:00
raise self.DoesNotExist
def flush_rpc(self, *args):
2011-07-17 21:53:23 +00:00
pass
2011-09-12 19:34:09 +00:00
def add_callback(self, cb, data):
cb(None, None, data)
class LoggedIO(object):
header_fmt = struct.Struct('<IIB')
assert header_fmt.size == 9
put_header_fmt = struct.Struct('<IIB32s')
assert put_header_fmt.size == 41
header_no_crc_fmt = struct.Struct('<IB')
assert header_no_crc_fmt.size == 5
crc_fmt = struct.Struct('<I')
assert crc_fmt.size == 4
_commit = header_no_crc_fmt.pack(9, TAG_COMMIT)
COMMIT = crc_fmt.pack(crc32(_commit)) + _commit
def __init__(self, path, limit, segments_per_dir, capacity=100):
self.path = path
self.fds = LRUCache(capacity)
self.segment = None
self.limit = limit
2011-08-18 20:23:05 +00:00
self.segments_per_dir = segments_per_dir
self.offset = 0
self._write_fd = None
self.head = None
self.cleanup()
def close(self):
2011-08-18 20:23:05 +00:00
for segment in self.fds.keys():
self.fds.pop(segment).close()
self.close_segment()
self.fds = None # Just to make sure we're disabled
def _segment_names(self, reverse=False):
for dirpath, dirs, filenames in os.walk(os.path.join(self.path, 'data')):
dirs.sort(lambda a, b: cmp(int(a), int(b)), reverse=reverse)
filenames.sort(lambda a, b: cmp(int(a), int(b)), reverse=reverse)
for filename in filenames:
yield int(filename), os.path.join(dirpath, filename)
2010-12-19 11:46:42 +00:00
def cleanup(self):
2011-08-18 20:23:05 +00:00
"""Delete segment files left by aborted transactions
2010-12-19 11:46:42 +00:00
"""
self.head = None
self.segment = 0
for segment, filename in self._segment_names(reverse=True):
if self.is_complete_segment(filename):
self.head = segment
self.segment = self.head + 1
return
else:
os.unlink(filename)
def is_complete_segment(self, filename):
with open(filename, 'rb') as fd:
fd.seek(-self.header_fmt.size, 2)
return fd.read(self.header_fmt.size) == self.COMMIT
2010-12-19 11:46:42 +00:00
2011-08-18 20:23:05 +00:00
def segment_filename(self, segment):
return os.path.join(self.path, 'data', str(segment / self.segments_per_dir), str(segment))
def get_write_fd(self):
if self.offset and self.offset > self.limit:
self.close_segment()
if not self._write_fd:
if self.segment % self.segments_per_dir == 0:
dirname = os.path.join(self.path, 'data', str(self.segment / self.segments_per_dir))
if not os.path.exists(dirname):
os.mkdir(dirname)
self._write_fd = open(self.segment_filename(self.segment), 'ab')
self._write_fd.write('DSEGMENT')
self.offset = 8
return self._write_fd
def get_fd(self, segment):
try:
2011-08-18 20:23:05 +00:00
return self.fds[segment]
except KeyError:
fd = open(self.segment_filename(segment), 'rb')
2011-08-18 20:23:05 +00:00
self.fds[segment] = fd
return fd
def delete_segment(self, segment):
try:
2011-08-18 20:23:05 +00:00
os.unlink(self.segment_filename(segment))
except OSError, e:
pass
def iter_objects(self, segment, lookup=None, include_data=False):
2011-08-18 20:23:05 +00:00
fd = self.get_fd(segment)
fd.seek(0)
2011-08-18 20:23:05 +00:00
if fd.read(8) != 'DSEGMENT':
raise IntegrityError('Invalid segment header')
offset = 8
header = fd.read(self.header_fmt.size)
while header:
crc, size, tag = self.header_fmt.unpack(header)
if size > MAX_OBJECT_SIZE:
raise IntegrityError('Invalid segment object size')
rest = fd.read(size - self.header_fmt.size)
if crc32(rest, crc32(buffer(header, 4))) & 0xffffffff != crc:
raise IntegrityError('Segment checksum mismatch')
if tag not in (TAG_PUT, TAG_DELETE, TAG_COMMIT):
raise IntegrityError('Invalid segment entry header')
key = None
if tag in (TAG_PUT, TAG_DELETE):
key = rest[:32]
if not lookup or lookup(tag, key):
if include_data:
yield tag, key, rest[32:]
else:
yield tag, key, offset
offset += size
header = fd.read(self.header_fmt.size)
def read(self, segment, offset, id):
if segment == self.segment:
self._write_fd.flush()
fd = self.get_fd(segment)
fd.seek(offset)
header = fd.read(self.put_header_fmt.size)
crc, size, tag, key = self.put_header_fmt.unpack(header)
if size > MAX_OBJECT_SIZE:
raise IntegrityError('Invalid segment object size')
data = fd.read(size - self.put_header_fmt.size)
if crc32(data, crc32(buffer(header, 4))) & 0xffffffff != crc:
raise IntegrityError('Segment checksum mismatch')
if tag != TAG_PUT or id != key:
raise IntegrityError('Invalid segment entry header')
return data
def write_put(self, id, data):
size = len(data) + self.put_header_fmt.size
fd = self.get_write_fd()
offset = self.offset
header = self.header_no_crc_fmt.pack(size, TAG_PUT)
crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff)
fd.write(''.join((crc, header, id, data)))
self.offset += size
2011-08-18 20:23:05 +00:00
return self.segment, offset
def write_delete(self, id):
fd = self.get_write_fd()
offset = self.offset
header = self.header_no_crc_fmt.pack(self.put_header_fmt.size, TAG_DELETE)
crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
fd.write(''.join((crc, header, id)))
self.offset += self.put_header_fmt.size
return self.segment
def write_commit(self):
fd = self.get_write_fd()
header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT)
crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
fd.write(''.join((crc, header)))
self.head = self.segment
self.close_segment()
2011-08-18 20:23:05 +00:00
def close_segment(self):
if self._write_fd:
self.segment += 1
self.offset = 0
os.fsync(self._write_fd)
self._write_fd.close()
self._write_fd = None
2010-05-26 19:41:55 +00:00
2010-10-26 19:25:25 +00:00
class StoreTestCase(unittest.TestCase):
2010-05-26 19:41:55 +00:00
def setUp(self):
self.tmppath = tempfile.mkdtemp()
2010-10-26 19:48:43 +00:00
self.store = Store(os.path.join(self.tmppath, 'store'), create=True)
2010-05-26 19:41:55 +00:00
def tearDown(self):
shutil.rmtree(self.tmppath)
def test1(self):
for x in range(100):
self.store.put('%-32d' % x, 'SOMEDATA')
key50 = '%-32d' % 50
self.assertEqual(self.store.get(key50), 'SOMEDATA')
self.store.delete(key50)
self.assertRaises(self.store.DoesNotExist, lambda: self.store.get(key50))
2010-05-26 19:41:55 +00:00
self.store.commit()
self.store.close()
store2 = Store(os.path.join(self.tmppath, 'store'))
self.assertRaises(store2.DoesNotExist, lambda: store2.get(key50))
for x in range(100):
if x == 50:
continue
self.assertEqual(self.store.get('%-32d' % x), 'SOMEDATA')
2010-05-26 19:41:55 +00:00
def test2(self):
"""Test multiple sequential transactions
"""
self.store.put('00000000000000000000000000000000', 'foo')
self.store.put('00000000000000000000000000000001', 'foo')
self.store.commit()
self.store.delete('00000000000000000000000000000000')
self.store.put('00000000000000000000000000000001', 'bar')
self.store.commit()
self.assertEqual(self.store.get('00000000000000000000000000000001'), 'bar')
2010-05-26 19:41:55 +00:00
2010-10-19 19:08:42 +00:00
def suite():
2010-10-26 19:25:25 +00:00
return unittest.TestLoader().loadTestsFromTestCase(StoreTestCase)
2010-10-19 19:08:42 +00:00
2010-05-26 19:41:55 +00:00
if __name__ == '__main__':
unittest.main()