Merge branch 'master' into move-to-src

This commit is contained in:
Thomas Waldmann 2016-05-30 19:38:16 +02:00
commit 045e5a1203
16 changed files with 472 additions and 164 deletions

View File

@ -14,6 +14,9 @@ matrix:
- python: 3.5
os: linux
env: TOXENV=py35
- python: nightly
os: linux
env: TOXENV=py36
- python: 3.5
os: linux
env: TOXENV=flake8
@ -25,6 +28,8 @@ matrix:
os: osx
osx_image: xcode6.4
env: TOXENV=py35
allow_failures:
- python: nightly
install:
- ./.travis/install.sh

View File

@ -5,7 +5,12 @@ setup_logging()
from borg.testsuite import has_lchflags, no_lchlfags_because, has_llfuse
from borg.testsuite.platform import fakeroot_detected
from borg import xattr
from borg import xattr, constants
def pytest_configure(config):
# no fixture-based monkey-patching since star-imports are used for the constants module
constants.PBKDF2_ITERATIONS = 1
def pytest_report_header(config, startdir):

View File

@ -158,7 +158,7 @@ class build_usage(Command):
print('generating usage docs')
# allows us to build docs without the C modules fully loaded during help generation
from borg.archiver import Archiver
parser = Archiver().build_parser(prog='borg')
parser = Archiver(prog='borg').parser
choices = {}
for action in parser._actions:
if action.choices is not None:

View File

@ -96,7 +96,7 @@ buzhash(const unsigned char *data, size_t len, const uint32_t *h)
static uint32_t
buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, const uint32_t *h)
{
uint32_t lenmod = len & 0x1f;
uint32_t lenmod = len & 0x1f; /* Note: replace by constant to get small speedup */
return BARREL_SHIFT(sum, 1) ^ BARREL_SHIFT(h[remove], lenmod) ^ h[add];
}
@ -196,10 +196,10 @@ chunker_fill(Chunker *c)
// We rollback the initial offset back to the start of the page,
// to avoid it not being truncated as a partial page request.
if (length > 0) {
// Linux kernels prior to 4.7 have a bug where they truncate
// last partial page of POSIX_FADV_DONTNEED request, so we need
// to page-align it ourselves. We'll need the rest of this page
// on the next read (assuming this was not EOF)
// All Linux kernels (at least up to and including 4.6(.0)) have a bug where
// they truncate last partial page of POSIX_FADV_DONTNEED request, so we need
// to page-align it ourselves. We'll need the rest of this page on the next
// read (assuming this was not EOF).
overshoot = (offset + length) & pagemask;
} else {
// For length == 0 we set overshoot 0, so the below
@ -249,11 +249,12 @@ chunker_process(Chunker *c)
PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
return NULL;
}
while(c->remaining <= window_size && !c->eof) {
while(c->remaining < min_size + window_size + 1 && !c->eof) { /* see assert in Chunker init */
if(!chunker_fill(c)) {
return NULL;
}
}
/* here we either are at eof ... */
if(c->eof) {
c->done = 1;
if(c->remaining) {
@ -268,8 +269,15 @@ chunker_process(Chunker *c)
return NULL;
}
}
/* ... or we have at least min_size + window_size + 1 bytes remaining.
* We do not want to "cut" a chunk smaller than min_size and the hash
* window starts at the potential cutting place.
*/
c->position += min_size;
c->remaining -= min_size;
n += min_size;
sum = buzhash(c->data + c->position, window_size, c->table);
while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) {
while(c->remaining > c->window_size && (sum & chunk_mask)) {
sum = buzhash_update(sum, c->data[c->position],
c->data[c->position + window_size],
window_size, c->table);

View File

@ -371,7 +371,7 @@ Number of files: {0.stats.nfiles}'''.format(
"""
if dry_run or stdout:
if b'chunks' in item:
for data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
if stdout:
sys.stdout.buffer.write(data)
if stdout:

View File

@ -96,9 +96,10 @@ def with_archive(method):
class Archiver:
def __init__(self, lock_wait=None):
def __init__(self, lock_wait=None, prog=None):
self.exit_code = EXIT_SUCCESS
self.lock_wait = lock_wait
self.parser = self.build_parser(prog)
def print_error(self, msg, *args):
msg = args and msg % args or msg
@ -317,6 +318,7 @@ class Archiver:
status = None
# Ignore if nodump flag is set
if get_flags(path, st) & stat.UF_NODUMP:
self.print_file_status('x', path)
return
if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode):
if not dry_run:
@ -1117,7 +1119,7 @@ class Archiver:
self.print_warning(warning)
return args
def build_parser(self, args=None, prog=None):
def build_parser(self, prog=None):
common_parser = argparse.ArgumentParser(add_help=False, prog=prog)
common_group = common_parser.add_argument_group('Common options')
@ -2061,8 +2063,7 @@ class Archiver:
# We can't use argparse for "serve" since we don't want it to show up in "Available commands"
if args:
args = self.preprocess_args(args)
parser = self.build_parser(args)
args = parser.parse_args(args or ['-h'])
args = self.parser.parse_args(args or ['-h'])
update_excludes(args)
return args

View File

@ -23,6 +23,8 @@ cdef class Chunker:
def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
min_size = 1 << chunk_min_exp
max_size = 1 << chunk_max_exp
# see chunker_process, first while loop condition, first term must be able to get True:
assert hash_window_size + min_size + 1 <= max_size, "too small max_size"
hash_mask = (1 << hash_mask_bits) - 1
self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)

View File

@ -1,9 +1,12 @@
# -*- coding: utf-8 -*-
from collections import namedtuple
import locale
import os
cimport cython
from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
from libc.errno cimport errno
from cpython.exc cimport PyErr_SetFromErrnoWithFilename
API_VERSION = 2
@ -52,6 +55,7 @@ MAX_VALUE = _MAX_VALUE
assert _MAX_VALUE % 2 == 1
@cython.internal
cdef class IndexBase:
cdef HashIndex *index
@ -63,7 +67,10 @@ cdef class IndexBase:
path = os.fsencode(path)
self.index = hashindex_read(path)
if not self.index:
raise Exception('hashindex_read failed')
if errno:
PyErr_SetFromErrnoWithFilename(OSError, path)
return
raise RuntimeError('hashindex_read failed')
else:
self.index = hashindex_init(capacity, self.key_size, self.value_size)
if not self.index:

View File

@ -65,6 +65,18 @@ class ErrorWithTraceback(Error):
traceback = True
class InternalOSError(Error):
"""Error while accessing repository: [Errno {}] {}: {}"""
def __init__(self, os_error):
self.errno = os_error.errno
self.strerror = os_error.strerror
self.filename = os_error.filename
def get_message(self):
return self.__doc__.format(self.errno, self.strerror, self.filename)
class IntegrityError(ErrorWithTraceback):
"""Data integrity error"""

View File

@ -8,13 +8,13 @@ from .platform_base import SyncFile as BaseSyncFile
from .platform_posix import swidth
from libc cimport errno
from libc.stdint cimport int64_t
API_VERSION = 3
cdef extern from "sys/types.h":
int ACL_TYPE_ACCESS
int ACL_TYPE_DEFAULT
ctypedef off64_t
cdef extern from "sys/acl.h":
ctypedef struct _acl_t:
@ -31,7 +31,7 @@ cdef extern from "acl/libacl.h":
int acl_extended_file(const char *path)
cdef extern from "fcntl.h":
int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags)
int sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags)
unsigned int SYNC_FILE_RANGE_WRITE
unsigned int SYNC_FILE_RANGE_WAIT_BEFORE
unsigned int SYNC_FILE_RANGE_WAIT_AFTER
@ -47,12 +47,9 @@ cdef extern from "linux/fs.h":
int FS_APPEND_FL
int FS_COMPR_FL
cdef extern from "stropts.h":
cdef extern from "sys/ioctl.h":
int ioctl(int fildes, int request, ...)
cdef extern from "errno.h":
int errno
cdef extern from "string.h":
char *strerror(int errnum)
@ -79,17 +76,20 @@ def set_flags(path, bsd_flags, fd=None):
fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
try:
if ioctl(fd, FS_IOC_SETFLAGS, &flags) == -1:
raise OSError(errno, strerror(errno).decode(), path)
error_number = errno.errno
if error_number != errno.EOPNOTSUPP:
raise OSError(error_number, strerror(error_number).decode(), path)
finally:
if open_fd:
os.close(fd)
def get_flags(path, st):
if stat.S_ISLNK(st.st_mode):
return 0
cdef int linux_flags
fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
try:
fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
except OSError:
return 0
try:
if ioctl(fd, FS_IOC_GETFLAGS, &linux_flags) == -1:
return 0

View File

@ -2,4 +2,9 @@ cdef extern from "wchar.h":
cdef int wcswidth(const Py_UNICODE *str, size_t n)
def swidth(s):
return wcswidth(s, len(s))
str_len = len(s)
terminal_width = wcswidth(s, str_len)
if terminal_width >= 0:
return terminal_width
else:
return str_len

View File

@ -19,6 +19,8 @@ RPC_PROTOCOL_VERSION = 2
BUFSIZE = 10 * 1024 * 1024
MAX_INFLIGHT = 100
class ConnectionClosed(Error):
"""Connection closed by remote host"""
@ -246,7 +248,6 @@ class RemoteRepository:
calls = list(calls)
waiting_for = []
w_fds = [self.stdin_fd]
while wait or calls:
while waiting_for:
try:
@ -275,6 +276,10 @@ class RemoteRepository:
return
except KeyError:
break
if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
w_fds = [self.stdin_fd]
else:
w_fds = []
r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1)
if x:
raise Exception('FD exception occurred')
@ -298,20 +303,9 @@ class RemoteRepository:
raise ConnectionClosed()
data = data.decode('utf-8')
for line in data.splitlines(keepends=True):
if line.startswith('$LOG '):
_, level, msg = line.split(' ', 2)
level = getattr(logging, level, logging.CRITICAL) # str -> int
if msg.startswith('Remote:'):
# server format: '$LOG <level> Remote: <msg>'
logging.log(level, msg.rstrip())
else:
# server format '$LOG <level> <logname> Remote: <msg>'
logname, msg = msg.split(' ', 1)
logging.getLogger(logname).log(level, msg.rstrip())
else:
sys.stderr.write("Remote: " + line)
handle_remote_line(line)
if w:
while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < 100:
while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT:
if calls:
if is_preloaded:
if calls[0] in self.cache:
@ -338,8 +332,6 @@ class RemoteRepository:
# that the fd should be writable
if e.errno != errno.EAGAIN:
raise
if not self.to_send and not (calls or self.preload_ids):
w_fds = []
self.ignore_responses |= set(waiting_for)
def check(self, repair=False, save_space=False):
@ -394,6 +386,21 @@ class RemoteRepository:
self.preload_ids += ids
def handle_remote_line(line):
if line.startswith('$LOG '):
_, level, msg = line.split(' ', 2)
level = getattr(logging, level, logging.CRITICAL) # str -> int
if msg.startswith('Remote:'):
# server format: '$LOG <level> Remote: <msg>'
logging.log(level, msg.rstrip())
else:
# server format '$LOG <level> <logname> Remote: <msg>'
logname, msg = msg.split(' ', 1)
logging.getLogger(logname).log(level, msg.rstrip())
else:
sys.stderr.write("Remote: " + line)
class RepositoryNoCache:
"""A not caching Repository wrapper, passes through to repository.

View File

@ -9,11 +9,14 @@ logger = logging.getLogger(__name__)
import os
import shutil
import struct
from collections import defaultdict
from functools import partial
from zlib import crc32
import msgpack
from .constants import * # NOQA
from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex
from .helpers import Error, ErrorWithTraceback, IntegrityError, InternalOSError, Location, ProgressIndicatorPercent, \
bin_to_hex
from .hashindex import NSIndex
from .locking import UpgradableLock, LockError, LockErrorT
from .lrucache import LRUCache
@ -26,17 +29,55 @@ TAG_PUT = 0
TAG_DELETE = 1
TAG_COMMIT = 2
FreeSpace = partial(defaultdict, int)
class Repository:
"""Filesystem based transactional key value store
"""
Filesystem based transactional key value store
Transactionality is achieved by using a log (aka journal) to record changes. The log is a series of numbered files
called segments. Each segment is a series of log entries. The segment number together with the offset of each
entry relative to its segment start establishes an ordering of the log entries. This is the "definition" of
time for the purposes of the log.
Log entries are either PUT, DELETE or COMMIT.
A COMMIT is always the final log entry in a segment and marks all data from the beginning of the log until the
segment ending with the COMMIT as committed and consistent. The segment number of a segment ending with a COMMIT
is called the transaction ID of that commit, and a segment ending with a COMMIT is called committed.
When reading from a repository it is first checked whether the last segment is committed. If it is not, then
all segments after the last committed segment are deleted; they contain log entries whose consistency is not
established by a COMMIT.
Note that the COMMIT can't establish consistency by itself, but only manages to do so with proper support from
the platform (including the hardware). See platform_base.SyncFile for details.
A PUT inserts a key-value pair. The value is stored in the log entry, hence the repository implements
full data logging, meaning that all data is consistent, not just metadata (which is common in file systems).
A DELETE marks a key as deleted.
For a given key only the last entry regarding the key, which is called current (all other entries are called
superseded), is relevant: If there is no entry or the last entry is a DELETE then the key does not exist.
Otherwise the last PUT defines the value of the key.
By superseding a PUT (with either another PUT or a DELETE) the log entry becomes obsolete. A segment containing
such obsolete entries is called sparse, while a segment containing no such entries is called compact.
Sparse segments can be compacted and thereby disk space freed. This destroys the transaction for which the
superseded entries where current.
On disk layout:
dir/README
dir/config
dir/data/<X // SEGMENTS_PER_DIR>/<X>
dir/index.X
dir/hints.X
"""
class DoesNotExist(Error):
"""Repository {} does not exist."""
@ -138,7 +179,7 @@ class Repository:
else:
return None
def get_transaction_id(self):
def check_transaction(self):
index_transaction_id = self.get_index_transaction_id()
segments_transaction_id = self.io.get_segments_transaction_id()
if index_transaction_id is not None and segments_transaction_id is None:
@ -151,6 +192,9 @@ class Repository:
else:
replay_from = index_transaction_id
self.replay_segments(replay_from, segments_transaction_id)
def get_transaction_id(self):
self.check_transaction()
return self.get_index_transaction_id()
def break_lock(self):
@ -191,10 +235,27 @@ class Repository:
self.write_index()
self.rollback()
def open_index(self, transaction_id):
def open_index(self, transaction_id, auto_recover=True):
if transaction_id is None:
return NSIndex()
return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'))
index_path = os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')
try:
return NSIndex.read(index_path)
except RuntimeError as error:
assert str(error) == 'hashindex_read failed' # everything else means we're in *deep* trouble
logger.warning('Repository index missing or corrupted, trying to recover')
try:
os.unlink(index_path)
except OSError as e:
raise InternalOSError(e) from None
if not auto_recover:
raise
self.prepare_txn(self.get_transaction_id())
# don't leave an open transaction around
self.commit()
return self.open_index(self.get_transaction_id())
except OSError as e:
raise InternalOSError(e) from None
def prepare_txn(self, transaction_id, do_cleanup=True):
self._active_txn = True
@ -207,24 +268,51 @@ class Repository:
self._active_txn = False
raise
if not self.index or transaction_id is None:
self.index = self.open_index(transaction_id)
try:
self.index = self.open_index(transaction_id, False)
except RuntimeError:
self.check_transaction()
self.index = self.open_index(transaction_id, False)
if transaction_id is None:
self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x]
self.compact = set() # XXX bad name: segments_needing_compaction = self.compact
self.compact = FreeSpace() # XXX bad name: freeable_space_of_segment_x = self.compact[x]
else:
if do_cleanup:
self.io.cleanup(transaction_id)
with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd:
hints = msgpack.unpack(fd)
if hints[b'version'] != 1:
raise ValueError('Unknown hints file version: %d' % hints['version'])
self.segments = hints[b'segments']
self.compact = set(hints[b'compact'])
hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
index_path = os.path.join(self.path, 'index.%d' % transaction_id)
try:
with open(hints_path, 'rb') as fd:
hints = msgpack.unpack(fd)
except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e:
logger.warning('Repository hints file missing or corrupted, trying to recover')
if not isinstance(e, FileNotFoundError):
os.unlink(hints_path)
# index must exist at this point
os.unlink(index_path)
self.check_transaction()
self.prepare_txn(transaction_id)
return
except OSError as os_error:
raise InternalOSError(os_error) from None
if hints[b'version'] == 1:
logger.debug('Upgrading from v1 hints.%d', transaction_id)
self.segments = hints[b'segments']
self.compact = FreeSpace()
for segment in sorted(hints[b'compact']):
logger.debug('Rebuilding sparse info for segment %d', segment)
self._rebuild_sparse(segment)
logger.debug('Upgrade to v2 hints complete')
elif hints[b'version'] != 2:
raise ValueError('Unknown hints file version: %d' % hints[b'version'])
else:
self.segments = hints[b'segments']
self.compact = FreeSpace(hints[b'compact'])
def write_index(self):
hints = {b'version': 1,
hints = {b'version': 2,
b'segments': self.segments,
b'compact': list(self.compact)}
b'compact': self.compact}
transaction_id = self.io.get_segments_transaction_id()
hints_file = os.path.join(self.path, 'hints.%d' % transaction_id)
with open(hints_file + '.tmp', 'wb') as fd:
@ -238,10 +326,10 @@ class Repository:
if self.append_only:
with open(os.path.join(self.path, 'transactions'), 'a') as log:
print('transaction %d, UTC time %s' % (transaction_id, datetime.utcnow().isoformat()), file=log)
# Remove old indices
# Remove old auxiliary files
current = '.%d' % transaction_id
for name in os.listdir(self.path):
if not name.startswith('index.') and not name.startswith('hints.'):
if not name.startswith(('index.', 'hints.')):
continue
if name.endswith(current):
continue
@ -267,32 +355,40 @@ class Repository:
for segment in unused:
assert self.segments.pop(segment) == 0
self.io.delete_segment(segment)
del self.compact[segment]
unused = []
for segment in sorted(self.compact):
if self.io.segment_exists(segment):
for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
for segment, freeable_space in sorted(self.compact.items()):
if not self.io.segment_exists(segment):
del self.compact[segment]
continue
segment_size = self.io.segment_size(segment)
if segment_size > 0.2 * self.max_segment_size and freeable_space < 0.15 * segment_size:
logger.debug('not compacting segment %d for later (only %d bytes are sparse)',
segment, freeable_space)
continue
segments.setdefault(segment, 0)
for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
try:
new_segment, offset = self.io.write_put(key, data, raise_full=save_space)
except LoggedIO.SegmentFull:
complete_xfer()
new_segment, offset = self.io.write_put(key, data)
self.index[key] = new_segment, offset
segments.setdefault(new_segment, 0)
segments[new_segment] += 1
segments[segment] -= 1
elif tag == TAG_DELETE:
if index_transaction_id is None or segment > index_transaction_id:
try:
new_segment, offset = self.io.write_put(key, data, raise_full=save_space)
self.io.write_delete(key, raise_full=save_space)
except LoggedIO.SegmentFull:
complete_xfer()
new_segment, offset = self.io.write_put(key, data)
self.index[key] = new_segment, offset
segments.setdefault(new_segment, 0)
segments[new_segment] += 1
segments[segment] -= 1
elif tag == TAG_DELETE:
if index_transaction_id is None or segment > index_transaction_id:
try:
self.io.write_delete(key, raise_full=save_space)
except LoggedIO.SegmentFull:
complete_xfer()
self.io.write_delete(key)
assert segments[segment] == 0
unused.append(segment)
self.io.write_delete(key)
assert segments[segment] == 0
unused.append(segment)
complete_xfer()
self.compact = set()
def replay_segments(self, index_transaction_id, segments_transaction_id):
self.prepare_txn(index_transaction_id, do_cleanup=False)
@ -315,11 +411,12 @@ class Repository:
def _update_index(self, segment, objects, report=None):
"""some code shared between replay_segments and check"""
self.segments[segment] = 0
for tag, key, offset in objects:
for tag, key, offset, size in objects:
if tag == TAG_PUT:
try:
# If this PUT supersedes an older PUT, mark the old segment for compaction and count the free space
s, _ = self.index[key]
self.compact.add(s)
self.compact[s] += size
self.segments[s] -= 1
except KeyError:
pass
@ -327,12 +424,17 @@ class Repository:
self.segments[segment] += 1
elif tag == TAG_DELETE:
try:
s, _ = self.index.pop(key)
self.segments[s] -= 1
self.compact.add(s)
# if the deleted PUT is not in the index, there is nothing to clean up
s, offset = self.index.pop(key)
except KeyError:
pass
self.compact.add(segment)
else:
if self.io.segment_exists(s):
# the old index is not necessarily valid for this transaction (e.g. compaction); if the segment
# is already gone, then it was already compacted.
self.segments[s] -= 1
size = self.io.read(s, offset, key, read_data=False)
self.compact[s] += size
elif tag == TAG_COMMIT:
continue
else:
@ -342,7 +444,22 @@ class Repository:
else:
report(msg)
if self.segments[segment] == 0:
self.compact.add(segment)
self.compact[segment] += self.io.segment_size(segment)
def _rebuild_sparse(self, segment):
"""Rebuild sparse bytes count for a single segment relative to the current index."""
self.compact[segment] = 0
if self.segments[segment] == 0:
self.compact[segment] += self.io.segment_size(segment)
return
for tag, key, offset, size in self.io.iter_objects(segment, read_data=False):
if tag == TAG_PUT:
if self.index.get(key, (-1, -1)) != (segment, offset):
# This PUT is superseded later
self.compact[segment] += size
elif tag == TAG_DELETE:
# The outcome of the DELETE has been recorded in the PUT branch already
self.compact[segment] += size
def check(self, repair=False, save_space=False):
"""Check repository consistency
@ -457,14 +574,16 @@ class Repository:
if not self._active_txn:
self.prepare_txn(self.get_transaction_id())
try:
segment, _ = self.index[id]
self.segments[segment] -= 1
self.compact.add(segment)
segment = self.io.write_delete(id)
self.segments.setdefault(segment, 0)
self.compact.add(segment)
segment, offset = self.index[id]
except KeyError:
pass
else:
self.segments[segment] -= 1
size = self.io.read(segment, offset, id, read_data=False)
self.compact[segment] += size
segment, size = self.io.write_delete(id)
self.compact[segment] += size
self.segments.setdefault(segment, 0)
segment, offset = self.io.write_put(id, data)
self.segments.setdefault(segment, 0)
self.segments[segment] += 1
@ -478,9 +597,10 @@ class Repository:
except KeyError:
raise self.ObjectNotFound(id, self.path) from None
self.segments[segment] -= 1
self.compact.add(segment)
segment = self.io.write_delete(id)
self.compact.add(segment)
size = self.io.read(segment, offset, id, read_data=False)
self.compact[segment] += size
segment, size = self.io.write_delete(id)
self.compact[segment] += size
self.segments.setdefault(segment, 0)
def preload(self, ids):
@ -578,7 +698,7 @@ class LoggedIO:
seen_commit = False
while True:
try:
tag, key, offset = next(iterator)
tag, key, offset, _ = next(iterator)
except IntegrityError:
return False
except StopIteration:
@ -635,7 +755,18 @@ class LoggedIO:
def segment_exists(self, segment):
return os.path.exists(self.segment_filename(segment))
def iter_objects(self, segment, include_data=False):
def segment_size(self, segment):
return os.path.getsize(self.segment_filename(segment))
def iter_objects(self, segment, include_data=False, read_data=True):
"""
Return object iterator for *segment*.
If read_data is False then include_data must be False as well.
Integrity checks are skipped: all data obtained from the iterator must be considered informational.
The iterator returns four-tuples of (tag, key, offset, data|size).
"""
fd = self.get_fd(segment)
fd.seek(0)
if fd.read(MAGIC_LEN) != MAGIC:
@ -644,11 +775,12 @@ class LoggedIO:
header = fd.read(self.header_fmt.size)
while header:
size, tag, key, data = self._read(fd, self.header_fmt, header, segment, offset,
(TAG_PUT, TAG_DELETE, TAG_COMMIT))
(TAG_PUT, TAG_DELETE, TAG_COMMIT),
read_data=read_data)
if include_data:
yield tag, key, offset, data
else:
yield tag, key, offset
yield tag, key, offset, size
offset += size
header = fd.read(self.header_fmt.size)
@ -672,19 +804,25 @@ class LoggedIO:
fd.write(data[:size])
data = data[size:]
def read(self, segment, offset, id):
def read(self, segment, offset, id, read_data=True):
"""
Read entry from *segment* at *offset* with *id*.
If read_data is False the size of the entry is returned instead and integrity checks are skipped.
The return value should thus be considered informational.
"""
if segment == self.segment and self._write_fd:
self._write_fd.sync()
fd = self.get_fd(segment)
fd.seek(offset)
header = fd.read(self.put_header_fmt.size)
size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, ))
size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, ), read_data)
if id != key:
raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format(
segment, offset))
return data
return data if read_data else size
def _read(self, fd, fmt, header, segment, offset, acceptable_tags):
def _read(self, fd, fmt, header, segment, offset, acceptable_tags, read_data=True):
# some code shared by read() and iter_objects()
try:
hdr_tuple = fmt.unpack(header)
@ -702,18 +840,32 @@ class LoggedIO:
raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format(
segment, offset))
length = size - fmt.size
data = fd.read(length)
if len(data) != length:
raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
segment, offset, length, len(data)))
if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format(
segment, offset))
if read_data:
data = fd.read(length)
if len(data) != length:
raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
segment, offset, length, len(data)))
if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format(
segment, offset))
if key is None and tag in (TAG_PUT, TAG_DELETE):
key, data = data[:32], data[32:]
else:
if key is None and tag in (TAG_PUT, TAG_DELETE):
key = fd.read(32)
length -= 32
if len(key) != 32:
raise IntegrityError('Segment entry key short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
segment, offset, 32, len(key)))
oldpos = fd.tell()
seeked = fd.seek(length, os.SEEK_CUR) - oldpos
data = None
if seeked != length:
raise IntegrityError('Segment entry data short seek [segment {}, offset {}]: expected {}, got {} bytes'.format(
segment, offset, length, seeked))
if tag not in acceptable_tags:
raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format(
segment, offset))
if key is None and tag in (TAG_PUT, TAG_DELETE):
key, data = data[:32], data[32:]
return size, tag, key, data
def write_put(self, id, data, raise_full=False):
@ -732,11 +884,11 @@ class LoggedIO:
crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
fd.write(b''.join((crc, header, id)))
self.offset += self.put_header_fmt.size
return self.segment
return self.segment, self.put_header_fmt.size
def write_commit(self):
fd = self.get_write_fd(no_new=True)
fd.sync()
self.close_segment()
fd = self.get_write_fd()
header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT)
crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
fd.write(b''.join((crc, header)))

View File

@ -5,6 +5,7 @@ import inspect
from io import StringIO
import logging
import random
import socket
import stat
import subprocess
import sys
@ -357,6 +358,16 @@ class ArchiverTestCase(ArchiverTestCaseBase):
# the interesting parts of info_output2 and info_output should be same
self.assert_equal(filter(info_output), filter(info_output2))
def test_unix_socket(self):
self.cmd('init', self.repository_location)
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
sock.bind(os.path.join(self.input_path, 'unix-socket'))
self.cmd('create', self.repository_location + '::test', 'input')
sock.close()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert not os.path.exists('input/unix-socket')
def test_symlink_extract(self):
self.create_test_files()
self.cmd('init', self.repository_location)
@ -916,7 +927,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.assert_in('borgbackup version', output) # implied output even without --info given
self.assert_not_in('Starting repository check', output) # --info not given for root logger
name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[1]
with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd:
fd.seek(100)
fd.write(b'XXXX')
@ -986,14 +997,21 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.create_regular_file('file1', size=1024 * 80)
os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago
self.create_regular_file('file2', size=1024 * 80)
if has_lchflags:
self.create_regular_file('file3', size=1024 * 80)
platform.set_flags(os.path.join(self.input_path, 'file3'), stat.UF_NODUMP)
self.cmd('init', self.repository_location)
output = self.cmd('create', '--list', self.repository_location + '::test', 'input')
self.assert_in("A input/file1", output)
self.assert_in("A input/file2", output)
if has_lchflags:
self.assert_in("x input/file3", output)
# should find second file as excluded
output = self.cmd('create', '--list', self.repository_location + '::test1', 'input', '--exclude', '*/file2')
self.assert_in("U input/file1", output)
self.assert_in("x input/file2", output)
if has_lchflags:
self.assert_in("x input/file3", output)
def test_create_topical(self):
now = time.time()
@ -1244,6 +1262,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
try:
self.cmd('mount', self.repository_location, mountpoint, fork=True)
self.wait_for_mount(mountpoint)
if has_lchflags:
# remove the file we did not backup, so input and output become equal
os.remove(os.path.join('input', 'flagfile'))
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
finally:
@ -1265,6 +1286,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
try:
self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True)
self.wait_for_mount(mountpoint)
if has_lchflags:
# remove the file we did not backup, so input and output become equal
os.remove(os.path.join('input', 'flagfile'))
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
finally:
if sys.platform.startswith('linux'):
@ -1491,9 +1515,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '::test', 'input')
archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location)
self.cmd('recreate', '-pv', '--chunker-params', '10,13,11,4095', self.repository_location)
assert 'test.recreate' in self.cmd('list', self.repository_location)
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location)
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,13,11,4095', self.repository_location)
assert 'Found test.recreate, will resume' in output
assert 'Copied 1 chunks from a partially processed item' in output
archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')

View File

@ -7,10 +7,10 @@ import tempfile
from unittest.mock import patch
from ..hashindex import NSIndex
from ..helpers import Location, IntegrityError
from ..helpers import Location, IntegrityError, InternalOSError
from ..locking import UpgradableLock, LockFailed
from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint
from ..repository import Repository, LoggedIO
from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line
from ..repository import Repository, LoggedIO, MAGIC
from . import BaseTestCase
@ -125,6 +125,46 @@ class RepositoryTestCase(RepositoryTestCaseBase):
self.assert_equal(len(self.repository.list(limit=50)), 50)
class LocalRepositoryTestCase(RepositoryTestCaseBase):
# test case that doesn't work with remote repositories
def _assert_sparse(self):
# The superseded 123456... PUT
assert self.repository.compact[0] == 41 + 9
# The DELETE issued by the superseding PUT (or issued directly)
assert self.repository.compact[2] == 41
self.repository._rebuild_sparse(0)
assert self.repository.compact[0] == 41 + 9
def test_sparse1(self):
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.put(b'00000000000000000000000000000001', b'123456789')
self.repository.commit()
self.repository.put(b'00000000000000000000000000000001', b'bar')
self._assert_sparse()
def test_sparse2(self):
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.put(b'00000000000000000000000000000001', b'123456789')
self.repository.commit()
self.repository.delete(b'00000000000000000000000000000001')
self._assert_sparse()
def test_sparse_delete(self):
self.repository.put(b'00000000000000000000000000000000', b'1245')
self.repository.delete(b'00000000000000000000000000000000')
self.repository.io._write_fd.sync()
# The on-line tracking works on a per-object basis...
assert self.repository.compact[0] == 41 + 41 + 4
self.repository._rebuild_sparse(0)
# ...while _rebuild_sparse can mark whole segments as completely sparse (which then includes the segment magic)
assert self.repository.compact[0] == 41 + 41 + 4 + len(MAGIC)
self.repository.commit()
assert 0 not in [segment for segment, _ in self.repository.io.segment_iterator()]
class RepositoryCommitTestCase(RepositoryTestCaseBase):
def add_keys(self):
@ -216,18 +256,69 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
self.repository.commit()
self.repository.append_only = False
assert segments_in_repository() == 1
assert segments_in_repository() == 2
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.commit()
# normal: compact squashes the data together, only one segment
assert segments_in_repository() == 1
assert segments_in_repository() == 4
self.repository.append_only = True
assert segments_in_repository() == 1
assert segments_in_repository() == 4
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.commit()
# append only: does not compact, only new segments written
assert segments_in_repository() == 2
assert segments_in_repository() == 6
class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
def setUp(self):
super().setUp()
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.commit()
self.repository.close()
def do_commit(self):
with self.repository:
self.repository.put(b'00000000000000000000000000000000', b'fox')
self.repository.commit()
def test_corrupted_hints(self):
with open(os.path.join(self.repository.path, 'hints.1'), 'ab') as fd:
fd.write(b'123456789')
self.do_commit()
def test_deleted_hints(self):
os.unlink(os.path.join(self.repository.path, 'hints.1'))
self.do_commit()
def test_deleted_index(self):
os.unlink(os.path.join(self.repository.path, 'index.1'))
self.do_commit()
def test_unreadable_hints(self):
hints = os.path.join(self.repository.path, 'hints.1')
os.unlink(hints)
os.mkdir(hints)
with self.assert_raises(InternalOSError):
self.do_commit()
def test_index(self):
with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd:
fd.write(b'123456789')
self.do_commit()
def test_index_outside_transaction(self):
with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd:
fd.write(b'123456789')
with self.repository:
assert len(self.repository) == 1
def test_unreadable_index(self):
index = os.path.join(self.repository.path, 'index.1')
os.unlink(index)
os.mkdir(index)
with self.assert_raises(InternalOSError):
self.do_commit()
class RepositoryCheckTestCase(RepositoryTestCaseBase):
@ -296,20 +387,20 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
self.add_objects([[1, 2, 3], [4, 5, 6]])
self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
self.check(status=True)
self.delete_segment(1)
self.delete_segment(2)
self.repository.rollback()
self.check(repair=True, status=True)
self.assert_equal(set([1, 2, 3]), self.list_objects())
def test_repair_missing_commit_segment(self):
self.add_objects([[1, 2, 3], [4, 5, 6]])
self.delete_segment(1)
self.delete_segment(3)
self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4))
self.assert_equal(set([1, 2, 3]), self.list_objects())
def test_repair_corrupted_commit_segment(self):
self.add_objects([[1, 2, 3], [4, 5, 6]])
with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd:
with open(os.path.join(self.tmppath, 'repository', 'data', '0', '3'), 'r+b') as fd:
fd.seek(-1, os.SEEK_END)
fd.write(b'X')
self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4))
@ -319,15 +410,15 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
def test_repair_no_commits(self):
self.add_objects([[1, 2, 3]])
with open(os.path.join(self.tmppath, 'repository', 'data', '0', '0'), 'r+b') as fd:
with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd:
fd.seek(-1, os.SEEK_END)
fd.write(b'X')
self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
self.check(status=False)
self.check(status=False)
self.assert_equal(self.list_indices(), ['index.0'])
self.check(repair=True, status=True)
self.assert_equal(self.list_indices(), ['index.1'])
self.check(repair=True, status=True)
self.assert_equal(self.list_indices(), ['index.3'])
self.check(status=True)
self.get_objects(3)
self.assert_equal(set([1, 2, 3]), self.list_objects())
@ -341,10 +432,10 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
def test_repair_index_too_new(self):
self.add_objects([[1, 2, 3], [4, 5, 6]])
self.assert_equal(self.list_indices(), ['index.1'])
self.assert_equal(self.list_indices(), ['index.3'])
self.rename_index('index.100')
self.check(status=True)
self.assert_equal(self.list_indices(), ['index.1'])
self.assert_equal(self.list_indices(), ['index.3'])
self.get_objects(4)
self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
@ -400,63 +491,51 @@ class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
pass
class RemoteRepositoryLoggingStub(RemoteRepository):
""" run a remote command that just prints a logging-formatted message to
stderr, and stub out enough of RemoteRepository to avoid the resulting
exceptions """
def __init__(self, *args, **kw):
self.msg = kw.pop('msg')
super().__init__(*args, **kw)
def borg_cmd(self, cmd, testing):
return [sys.executable, '-c', 'import sys; print("{}", file=sys.stderr)'.format(self.msg), ]
def __del__(self):
# clean up from exception without triggering assert
if self.p:
self.close()
class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase):
class RemoteLoggerTestCase(BaseTestCase):
def setUp(self):
self.location = Location('__testsuite__:/doesntexist/repo')
self.stream = io.StringIO()
self.handler = logging.StreamHandler(self.stream)
logging.getLogger().handlers[:] = [self.handler]
logging.getLogger('borg.repository').handlers[:] = []
logging.getLogger('borg.repository.foo').handlers[:] = []
# capture stderr
sys.stderr.flush()
self.old_stderr = sys.stderr
self.stderr = sys.stderr = io.StringIO()
def tearDown(self):
pass
sys.stderr = self.old_stderr
def create_repository(self, msg):
try:
RemoteRepositoryLoggingStub(self.location, msg=msg)
except ConnectionClosedWithHint:
# stub is dumb, so this exception expected
pass
def test_stderr_messages(self):
handle_remote_line("unstructured stderr message")
self.assert_equal(self.stream.getvalue(), '')
# stderr messages don't get an implicit newline
self.assert_equal(self.stderr.getvalue(), 'Remote: unstructured stderr message')
def test_old_format_messages(self):
self.handler.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
self.create_repository("$LOG INFO Remote: old format message")
handle_remote_line("$LOG INFO Remote: old format message")
self.assert_equal(self.stream.getvalue(), 'Remote: old format message\n')
self.assert_equal(self.stderr.getvalue(), '')
def test_new_format_messages(self):
self.handler.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
self.create_repository("$LOG INFO borg.repository Remote: new format message")
handle_remote_line("$LOG INFO borg.repository Remote: new format message")
self.assert_equal(self.stream.getvalue(), 'Remote: new format message\n')
self.assert_equal(self.stderr.getvalue(), '')
def test_remote_messages_screened(self):
# default borg config for root logger
self.handler.setLevel(logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
self.create_repository("$LOG INFO borg.repository Remote: new format info message")
handle_remote_line("$LOG INFO borg.repository Remote: new format info message")
self.assert_equal(self.stream.getvalue(), '')
self.assert_equal(self.stderr.getvalue(), '')
def test_info_to_correct_local_child(self):
logging.getLogger('borg.repository').setLevel(logging.INFO)
@ -474,7 +553,8 @@ class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase):
foo_handler.setLevel(logging.INFO)
logging.getLogger('borg.repository.foo').handlers[:] = [foo_handler]
self.create_repository("$LOG INFO borg.repository Remote: new format child message")
handle_remote_line("$LOG INFO borg.repository Remote: new format child message")
self.assert_equal(foo_stream.getvalue(), '')
self.assert_equal(child_stream.getvalue(), 'Remote: new format child message\n')
self.assert_equal(self.stream.getvalue(), '')
self.assert_equal(self.stderr.getvalue(), '')

View File

@ -2,7 +2,7 @@
# fakeroot -u tox --recreate
[tox]
envlist = py{34,35},flake8
envlist = py{34,35,36},flake8
[testenv]
# Change dir to avoid import problem for cython code. The directory does