Merge branch 'master' into move-to-src

This commit is contained in:
Thomas Waldmann 2016-05-30 19:38:16 +02:00
commit 045e5a1203
16 changed files with 472 additions and 164 deletions

View File

@ -14,6 +14,9 @@ matrix:
- python: 3.5 - python: 3.5
os: linux os: linux
env: TOXENV=py35 env: TOXENV=py35
- python: nightly
os: linux
env: TOXENV=py36
- python: 3.5 - python: 3.5
os: linux os: linux
env: TOXENV=flake8 env: TOXENV=flake8
@ -25,6 +28,8 @@ matrix:
os: osx os: osx
osx_image: xcode6.4 osx_image: xcode6.4
env: TOXENV=py35 env: TOXENV=py35
allow_failures:
- python: nightly
install: install:
- ./.travis/install.sh - ./.travis/install.sh

View File

@ -5,7 +5,12 @@ setup_logging()
from borg.testsuite import has_lchflags, no_lchlfags_because, has_llfuse from borg.testsuite import has_lchflags, no_lchlfags_because, has_llfuse
from borg.testsuite.platform import fakeroot_detected from borg.testsuite.platform import fakeroot_detected
from borg import xattr from borg import xattr, constants
def pytest_configure(config):
# no fixture-based monkey-patching since star-imports are used for the constants module
constants.PBKDF2_ITERATIONS = 1
def pytest_report_header(config, startdir): def pytest_report_header(config, startdir):

View File

@ -158,7 +158,7 @@ class build_usage(Command):
print('generating usage docs') print('generating usage docs')
# allows us to build docs without the C modules fully loaded during help generation # allows us to build docs without the C modules fully loaded during help generation
from borg.archiver import Archiver from borg.archiver import Archiver
parser = Archiver().build_parser(prog='borg') parser = Archiver(prog='borg').parser
choices = {} choices = {}
for action in parser._actions: for action in parser._actions:
if action.choices is not None: if action.choices is not None:

View File

@ -96,7 +96,7 @@ buzhash(const unsigned char *data, size_t len, const uint32_t *h)
static uint32_t static uint32_t
buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, const uint32_t *h) buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, const uint32_t *h)
{ {
uint32_t lenmod = len & 0x1f; uint32_t lenmod = len & 0x1f; /* Note: replace by constant to get small speedup */
return BARREL_SHIFT(sum, 1) ^ BARREL_SHIFT(h[remove], lenmod) ^ h[add]; return BARREL_SHIFT(sum, 1) ^ BARREL_SHIFT(h[remove], lenmod) ^ h[add];
} }
@ -196,10 +196,10 @@ chunker_fill(Chunker *c)
// We rollback the initial offset back to the start of the page, // We rollback the initial offset back to the start of the page,
// to avoid it not being truncated as a partial page request. // to avoid it not being truncated as a partial page request.
if (length > 0) { if (length > 0) {
// Linux kernels prior to 4.7 have a bug where they truncate // All Linux kernels (at least up to and including 4.6(.0)) have a bug where
// last partial page of POSIX_FADV_DONTNEED request, so we need // they truncate last partial page of POSIX_FADV_DONTNEED request, so we need
// to page-align it ourselves. We'll need the rest of this page // to page-align it ourselves. We'll need the rest of this page on the next
// on the next read (assuming this was not EOF) // read (assuming this was not EOF).
overshoot = (offset + length) & pagemask; overshoot = (offset + length) & pagemask;
} else { } else {
// For length == 0 we set overshoot 0, so the below // For length == 0 we set overshoot 0, so the below
@ -249,11 +249,12 @@ chunker_process(Chunker *c)
PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch"); PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
return NULL; return NULL;
} }
while(c->remaining <= window_size && !c->eof) { while(c->remaining < min_size + window_size + 1 && !c->eof) { /* see assert in Chunker init */
if(!chunker_fill(c)) { if(!chunker_fill(c)) {
return NULL; return NULL;
} }
} }
/* here we either are at eof ... */
if(c->eof) { if(c->eof) {
c->done = 1; c->done = 1;
if(c->remaining) { if(c->remaining) {
@ -268,8 +269,15 @@ chunker_process(Chunker *c)
return NULL; return NULL;
} }
} }
/* ... or we have at least min_size + window_size + 1 bytes remaining.
* We do not want to "cut" a chunk smaller than min_size and the hash
* window starts at the potential cutting place.
*/
c->position += min_size;
c->remaining -= min_size;
n += min_size;
sum = buzhash(c->data + c->position, window_size, c->table); sum = buzhash(c->data + c->position, window_size, c->table);
while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) { while(c->remaining > c->window_size && (sum & chunk_mask)) {
sum = buzhash_update(sum, c->data[c->position], sum = buzhash_update(sum, c->data[c->position],
c->data[c->position + window_size], c->data[c->position + window_size],
window_size, c->table); window_size, c->table);

View File

@ -371,7 +371,7 @@ Number of files: {0.stats.nfiles}'''.format(
""" """
if dry_run or stdout: if dry_run or stdout:
if b'chunks' in item: if b'chunks' in item:
for data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True): for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
if stdout: if stdout:
sys.stdout.buffer.write(data) sys.stdout.buffer.write(data)
if stdout: if stdout:

View File

@ -96,9 +96,10 @@ def with_archive(method):
class Archiver: class Archiver:
def __init__(self, lock_wait=None): def __init__(self, lock_wait=None, prog=None):
self.exit_code = EXIT_SUCCESS self.exit_code = EXIT_SUCCESS
self.lock_wait = lock_wait self.lock_wait = lock_wait
self.parser = self.build_parser(prog)
def print_error(self, msg, *args): def print_error(self, msg, *args):
msg = args and msg % args or msg msg = args and msg % args or msg
@ -317,6 +318,7 @@ class Archiver:
status = None status = None
# Ignore if nodump flag is set # Ignore if nodump flag is set
if get_flags(path, st) & stat.UF_NODUMP: if get_flags(path, st) & stat.UF_NODUMP:
self.print_file_status('x', path)
return return
if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode): if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode):
if not dry_run: if not dry_run:
@ -1117,7 +1119,7 @@ class Archiver:
self.print_warning(warning) self.print_warning(warning)
return args return args
def build_parser(self, args=None, prog=None): def build_parser(self, prog=None):
common_parser = argparse.ArgumentParser(add_help=False, prog=prog) common_parser = argparse.ArgumentParser(add_help=False, prog=prog)
common_group = common_parser.add_argument_group('Common options') common_group = common_parser.add_argument_group('Common options')
@ -2061,8 +2063,7 @@ class Archiver:
# We can't use argparse for "serve" since we don't want it to show up in "Available commands" # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
if args: if args:
args = self.preprocess_args(args) args = self.preprocess_args(args)
parser = self.build_parser(args) args = self.parser.parse_args(args or ['-h'])
args = parser.parse_args(args or ['-h'])
update_excludes(args) update_excludes(args)
return args return args

View File

@ -23,6 +23,8 @@ cdef class Chunker:
def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size): def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
min_size = 1 << chunk_min_exp min_size = 1 << chunk_min_exp
max_size = 1 << chunk_max_exp max_size = 1 << chunk_max_exp
# see chunker_process, first while loop condition, first term must be able to get True:
assert hash_window_size + min_size + 1 <= max_size, "too small max_size"
hash_mask = (1 << hash_mask_bits) - 1 hash_mask = (1 << hash_mask_bits) - 1
self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff) self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)

View File

@ -1,9 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from collections import namedtuple from collections import namedtuple
import locale
import os import os
cimport cython cimport cython
from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
from libc.errno cimport errno
from cpython.exc cimport PyErr_SetFromErrnoWithFilename
API_VERSION = 2 API_VERSION = 2
@ -52,6 +55,7 @@ MAX_VALUE = _MAX_VALUE
assert _MAX_VALUE % 2 == 1 assert _MAX_VALUE % 2 == 1
@cython.internal @cython.internal
cdef class IndexBase: cdef class IndexBase:
cdef HashIndex *index cdef HashIndex *index
@ -63,7 +67,10 @@ cdef class IndexBase:
path = os.fsencode(path) path = os.fsencode(path)
self.index = hashindex_read(path) self.index = hashindex_read(path)
if not self.index: if not self.index:
raise Exception('hashindex_read failed') if errno:
PyErr_SetFromErrnoWithFilename(OSError, path)
return
raise RuntimeError('hashindex_read failed')
else: else:
self.index = hashindex_init(capacity, self.key_size, self.value_size) self.index = hashindex_init(capacity, self.key_size, self.value_size)
if not self.index: if not self.index:

View File

@ -65,6 +65,18 @@ class ErrorWithTraceback(Error):
traceback = True traceback = True
class InternalOSError(Error):
"""Error while accessing repository: [Errno {}] {}: {}"""
def __init__(self, os_error):
self.errno = os_error.errno
self.strerror = os_error.strerror
self.filename = os_error.filename
def get_message(self):
return self.__doc__.format(self.errno, self.strerror, self.filename)
class IntegrityError(ErrorWithTraceback): class IntegrityError(ErrorWithTraceback):
"""Data integrity error""" """Data integrity error"""

View File

@ -8,13 +8,13 @@ from .platform_base import SyncFile as BaseSyncFile
from .platform_posix import swidth from .platform_posix import swidth
from libc cimport errno from libc cimport errno
from libc.stdint cimport int64_t
API_VERSION = 3 API_VERSION = 3
cdef extern from "sys/types.h": cdef extern from "sys/types.h":
int ACL_TYPE_ACCESS int ACL_TYPE_ACCESS
int ACL_TYPE_DEFAULT int ACL_TYPE_DEFAULT
ctypedef off64_t
cdef extern from "sys/acl.h": cdef extern from "sys/acl.h":
ctypedef struct _acl_t: ctypedef struct _acl_t:
@ -31,7 +31,7 @@ cdef extern from "acl/libacl.h":
int acl_extended_file(const char *path) int acl_extended_file(const char *path)
cdef extern from "fcntl.h": cdef extern from "fcntl.h":
int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags) int sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags)
unsigned int SYNC_FILE_RANGE_WRITE unsigned int SYNC_FILE_RANGE_WRITE
unsigned int SYNC_FILE_RANGE_WAIT_BEFORE unsigned int SYNC_FILE_RANGE_WAIT_BEFORE
unsigned int SYNC_FILE_RANGE_WAIT_AFTER unsigned int SYNC_FILE_RANGE_WAIT_AFTER
@ -47,12 +47,9 @@ cdef extern from "linux/fs.h":
int FS_APPEND_FL int FS_APPEND_FL
int FS_COMPR_FL int FS_COMPR_FL
cdef extern from "stropts.h": cdef extern from "sys/ioctl.h":
int ioctl(int fildes, int request, ...) int ioctl(int fildes, int request, ...)
cdef extern from "errno.h":
int errno
cdef extern from "string.h": cdef extern from "string.h":
char *strerror(int errnum) char *strerror(int errnum)
@ -79,17 +76,20 @@ def set_flags(path, bsd_flags, fd=None):
fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
try: try:
if ioctl(fd, FS_IOC_SETFLAGS, &flags) == -1: if ioctl(fd, FS_IOC_SETFLAGS, &flags) == -1:
raise OSError(errno, strerror(errno).decode(), path) error_number = errno.errno
if error_number != errno.EOPNOTSUPP:
raise OSError(error_number, strerror(error_number).decode(), path)
finally: finally:
if open_fd: if open_fd:
os.close(fd) os.close(fd)
def get_flags(path, st): def get_flags(path, st):
if stat.S_ISLNK(st.st_mode):
return 0
cdef int linux_flags cdef int linux_flags
fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) try:
fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
except OSError:
return 0
try: try:
if ioctl(fd, FS_IOC_GETFLAGS, &linux_flags) == -1: if ioctl(fd, FS_IOC_GETFLAGS, &linux_flags) == -1:
return 0 return 0

View File

@ -2,4 +2,9 @@ cdef extern from "wchar.h":
cdef int wcswidth(const Py_UNICODE *str, size_t n) cdef int wcswidth(const Py_UNICODE *str, size_t n)
def swidth(s): def swidth(s):
return wcswidth(s, len(s)) str_len = len(s)
terminal_width = wcswidth(s, str_len)
if terminal_width >= 0:
return terminal_width
else:
return str_len

View File

@ -19,6 +19,8 @@ RPC_PROTOCOL_VERSION = 2
BUFSIZE = 10 * 1024 * 1024 BUFSIZE = 10 * 1024 * 1024
MAX_INFLIGHT = 100
class ConnectionClosed(Error): class ConnectionClosed(Error):
"""Connection closed by remote host""" """Connection closed by remote host"""
@ -246,7 +248,6 @@ class RemoteRepository:
calls = list(calls) calls = list(calls)
waiting_for = [] waiting_for = []
w_fds = [self.stdin_fd]
while wait or calls: while wait or calls:
while waiting_for: while waiting_for:
try: try:
@ -275,6 +276,10 @@ class RemoteRepository:
return return
except KeyError: except KeyError:
break break
if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
w_fds = [self.stdin_fd]
else:
w_fds = []
r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1) r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1)
if x: if x:
raise Exception('FD exception occurred') raise Exception('FD exception occurred')
@ -298,20 +303,9 @@ class RemoteRepository:
raise ConnectionClosed() raise ConnectionClosed()
data = data.decode('utf-8') data = data.decode('utf-8')
for line in data.splitlines(keepends=True): for line in data.splitlines(keepends=True):
if line.startswith('$LOG '): handle_remote_line(line)
_, level, msg = line.split(' ', 2)
level = getattr(logging, level, logging.CRITICAL) # str -> int
if msg.startswith('Remote:'):
# server format: '$LOG <level> Remote: <msg>'
logging.log(level, msg.rstrip())
else:
# server format '$LOG <level> <logname> Remote: <msg>'
logname, msg = msg.split(' ', 1)
logging.getLogger(logname).log(level, msg.rstrip())
else:
sys.stderr.write("Remote: " + line)
if w: if w:
while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < 100: while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT:
if calls: if calls:
if is_preloaded: if is_preloaded:
if calls[0] in self.cache: if calls[0] in self.cache:
@ -338,8 +332,6 @@ class RemoteRepository:
# that the fd should be writable # that the fd should be writable
if e.errno != errno.EAGAIN: if e.errno != errno.EAGAIN:
raise raise
if not self.to_send and not (calls or self.preload_ids):
w_fds = []
self.ignore_responses |= set(waiting_for) self.ignore_responses |= set(waiting_for)
def check(self, repair=False, save_space=False): def check(self, repair=False, save_space=False):
@ -394,6 +386,21 @@ class RemoteRepository:
self.preload_ids += ids self.preload_ids += ids
def handle_remote_line(line):
if line.startswith('$LOG '):
_, level, msg = line.split(' ', 2)
level = getattr(logging, level, logging.CRITICAL) # str -> int
if msg.startswith('Remote:'):
# server format: '$LOG <level> Remote: <msg>'
logging.log(level, msg.rstrip())
else:
# server format '$LOG <level> <logname> Remote: <msg>'
logname, msg = msg.split(' ', 1)
logging.getLogger(logname).log(level, msg.rstrip())
else:
sys.stderr.write("Remote: " + line)
class RepositoryNoCache: class RepositoryNoCache:
"""A not caching Repository wrapper, passes through to repository. """A not caching Repository wrapper, passes through to repository.

View File

@ -9,11 +9,14 @@ logger = logging.getLogger(__name__)
import os import os
import shutil import shutil
import struct import struct
from collections import defaultdict
from functools import partial
from zlib import crc32 from zlib import crc32
import msgpack import msgpack
from .constants import * # NOQA from .constants import * # NOQA
from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex from .helpers import Error, ErrorWithTraceback, IntegrityError, InternalOSError, Location, ProgressIndicatorPercent, \
bin_to_hex
from .hashindex import NSIndex from .hashindex import NSIndex
from .locking import UpgradableLock, LockError, LockErrorT from .locking import UpgradableLock, LockError, LockErrorT
from .lrucache import LRUCache from .lrucache import LRUCache
@ -26,17 +29,55 @@ TAG_PUT = 0
TAG_DELETE = 1 TAG_DELETE = 1
TAG_COMMIT = 2 TAG_COMMIT = 2
FreeSpace = partial(defaultdict, int)
class Repository: class Repository:
"""Filesystem based transactional key value store """
Filesystem based transactional key value store
Transactionality is achieved by using a log (aka journal) to record changes. The log is a series of numbered files
called segments. Each segment is a series of log entries. The segment number together with the offset of each
entry relative to its segment start establishes an ordering of the log entries. This is the "definition" of
time for the purposes of the log.
Log entries are either PUT, DELETE or COMMIT.
A COMMIT is always the final log entry in a segment and marks all data from the beginning of the log until the
segment ending with the COMMIT as committed and consistent. The segment number of a segment ending with a COMMIT
is called the transaction ID of that commit, and a segment ending with a COMMIT is called committed.
When reading from a repository it is first checked whether the last segment is committed. If it is not, then
all segments after the last committed segment are deleted; they contain log entries whose consistency is not
established by a COMMIT.
Note that the COMMIT can't establish consistency by itself, but only manages to do so with proper support from
the platform (including the hardware). See platform_base.SyncFile for details.
A PUT inserts a key-value pair. The value is stored in the log entry, hence the repository implements
full data logging, meaning that all data is consistent, not just metadata (which is common in file systems).
A DELETE marks a key as deleted.
For a given key only the last entry regarding the key, which is called current (all other entries are called
superseded), is relevant: If there is no entry or the last entry is a DELETE then the key does not exist.
Otherwise the last PUT defines the value of the key.
By superseding a PUT (with either another PUT or a DELETE) the log entry becomes obsolete. A segment containing
such obsolete entries is called sparse, while a segment containing no such entries is called compact.
Sparse segments can be compacted and thereby disk space freed. This destroys the transaction for which the
superseded entries where current.
On disk layout: On disk layout:
dir/README dir/README
dir/config dir/config
dir/data/<X // SEGMENTS_PER_DIR>/<X> dir/data/<X // SEGMENTS_PER_DIR>/<X>
dir/index.X dir/index.X
dir/hints.X dir/hints.X
""" """
class DoesNotExist(Error): class DoesNotExist(Error):
"""Repository {} does not exist.""" """Repository {} does not exist."""
@ -138,7 +179,7 @@ class Repository:
else: else:
return None return None
def get_transaction_id(self): def check_transaction(self):
index_transaction_id = self.get_index_transaction_id() index_transaction_id = self.get_index_transaction_id()
segments_transaction_id = self.io.get_segments_transaction_id() segments_transaction_id = self.io.get_segments_transaction_id()
if index_transaction_id is not None and segments_transaction_id is None: if index_transaction_id is not None and segments_transaction_id is None:
@ -151,6 +192,9 @@ class Repository:
else: else:
replay_from = index_transaction_id replay_from = index_transaction_id
self.replay_segments(replay_from, segments_transaction_id) self.replay_segments(replay_from, segments_transaction_id)
def get_transaction_id(self):
self.check_transaction()
return self.get_index_transaction_id() return self.get_index_transaction_id()
def break_lock(self): def break_lock(self):
@ -191,10 +235,27 @@ class Repository:
self.write_index() self.write_index()
self.rollback() self.rollback()
def open_index(self, transaction_id): def open_index(self, transaction_id, auto_recover=True):
if transaction_id is None: if transaction_id is None:
return NSIndex() return NSIndex()
return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8')) index_path = os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')
try:
return NSIndex.read(index_path)
except RuntimeError as error:
assert str(error) == 'hashindex_read failed' # everything else means we're in *deep* trouble
logger.warning('Repository index missing or corrupted, trying to recover')
try:
os.unlink(index_path)
except OSError as e:
raise InternalOSError(e) from None
if not auto_recover:
raise
self.prepare_txn(self.get_transaction_id())
# don't leave an open transaction around
self.commit()
return self.open_index(self.get_transaction_id())
except OSError as e:
raise InternalOSError(e) from None
def prepare_txn(self, transaction_id, do_cleanup=True): def prepare_txn(self, transaction_id, do_cleanup=True):
self._active_txn = True self._active_txn = True
@ -207,24 +268,51 @@ class Repository:
self._active_txn = False self._active_txn = False
raise raise
if not self.index or transaction_id is None: if not self.index or transaction_id is None:
self.index = self.open_index(transaction_id) try:
self.index = self.open_index(transaction_id, False)
except RuntimeError:
self.check_transaction()
self.index = self.open_index(transaction_id, False)
if transaction_id is None: if transaction_id is None:
self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x] self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x]
self.compact = set() # XXX bad name: segments_needing_compaction = self.compact self.compact = FreeSpace() # XXX bad name: freeable_space_of_segment_x = self.compact[x]
else: else:
if do_cleanup: if do_cleanup:
self.io.cleanup(transaction_id) self.io.cleanup(transaction_id)
with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
hints = msgpack.unpack(fd) index_path = os.path.join(self.path, 'index.%d' % transaction_id)
if hints[b'version'] != 1: try:
raise ValueError('Unknown hints file version: %d' % hints['version']) with open(hints_path, 'rb') as fd:
self.segments = hints[b'segments'] hints = msgpack.unpack(fd)
self.compact = set(hints[b'compact']) except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e:
logger.warning('Repository hints file missing or corrupted, trying to recover')
if not isinstance(e, FileNotFoundError):
os.unlink(hints_path)
# index must exist at this point
os.unlink(index_path)
self.check_transaction()
self.prepare_txn(transaction_id)
return
except OSError as os_error:
raise InternalOSError(os_error) from None
if hints[b'version'] == 1:
logger.debug('Upgrading from v1 hints.%d', transaction_id)
self.segments = hints[b'segments']
self.compact = FreeSpace()
for segment in sorted(hints[b'compact']):
logger.debug('Rebuilding sparse info for segment %d', segment)
self._rebuild_sparse(segment)
logger.debug('Upgrade to v2 hints complete')
elif hints[b'version'] != 2:
raise ValueError('Unknown hints file version: %d' % hints[b'version'])
else:
self.segments = hints[b'segments']
self.compact = FreeSpace(hints[b'compact'])
def write_index(self): def write_index(self):
hints = {b'version': 1, hints = {b'version': 2,
b'segments': self.segments, b'segments': self.segments,
b'compact': list(self.compact)} b'compact': self.compact}
transaction_id = self.io.get_segments_transaction_id() transaction_id = self.io.get_segments_transaction_id()
hints_file = os.path.join(self.path, 'hints.%d' % transaction_id) hints_file = os.path.join(self.path, 'hints.%d' % transaction_id)
with open(hints_file + '.tmp', 'wb') as fd: with open(hints_file + '.tmp', 'wb') as fd:
@ -238,10 +326,10 @@ class Repository:
if self.append_only: if self.append_only:
with open(os.path.join(self.path, 'transactions'), 'a') as log: with open(os.path.join(self.path, 'transactions'), 'a') as log:
print('transaction %d, UTC time %s' % (transaction_id, datetime.utcnow().isoformat()), file=log) print('transaction %d, UTC time %s' % (transaction_id, datetime.utcnow().isoformat()), file=log)
# Remove old indices # Remove old auxiliary files
current = '.%d' % transaction_id current = '.%d' % transaction_id
for name in os.listdir(self.path): for name in os.listdir(self.path):
if not name.startswith('index.') and not name.startswith('hints.'): if not name.startswith(('index.', 'hints.')):
continue continue
if name.endswith(current): if name.endswith(current):
continue continue
@ -267,32 +355,40 @@ class Repository:
for segment in unused: for segment in unused:
assert self.segments.pop(segment) == 0 assert self.segments.pop(segment) == 0
self.io.delete_segment(segment) self.io.delete_segment(segment)
del self.compact[segment]
unused = [] unused = []
for segment in sorted(self.compact): for segment, freeable_space in sorted(self.compact.items()):
if self.io.segment_exists(segment): if not self.io.segment_exists(segment):
for tag, key, offset, data in self.io.iter_objects(segment, include_data=True): del self.compact[segment]
if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset): continue
segment_size = self.io.segment_size(segment)
if segment_size > 0.2 * self.max_segment_size and freeable_space < 0.15 * segment_size:
logger.debug('not compacting segment %d for later (only %d bytes are sparse)',
segment, freeable_space)
continue
segments.setdefault(segment, 0)
for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
try:
new_segment, offset = self.io.write_put(key, data, raise_full=save_space)
except LoggedIO.SegmentFull:
complete_xfer()
new_segment, offset = self.io.write_put(key, data)
self.index[key] = new_segment, offset
segments.setdefault(new_segment, 0)
segments[new_segment] += 1
segments[segment] -= 1
elif tag == TAG_DELETE:
if index_transaction_id is None or segment > index_transaction_id:
try: try:
new_segment, offset = self.io.write_put(key, data, raise_full=save_space) self.io.write_delete(key, raise_full=save_space)
except LoggedIO.SegmentFull: except LoggedIO.SegmentFull:
complete_xfer() complete_xfer()
new_segment, offset = self.io.write_put(key, data) self.io.write_delete(key)
self.index[key] = new_segment, offset assert segments[segment] == 0
segments.setdefault(new_segment, 0) unused.append(segment)
segments[new_segment] += 1
segments[segment] -= 1
elif tag == TAG_DELETE:
if index_transaction_id is None or segment > index_transaction_id:
try:
self.io.write_delete(key, raise_full=save_space)
except LoggedIO.SegmentFull:
complete_xfer()
self.io.write_delete(key)
assert segments[segment] == 0
unused.append(segment)
complete_xfer() complete_xfer()
self.compact = set()
def replay_segments(self, index_transaction_id, segments_transaction_id): def replay_segments(self, index_transaction_id, segments_transaction_id):
self.prepare_txn(index_transaction_id, do_cleanup=False) self.prepare_txn(index_transaction_id, do_cleanup=False)
@ -315,11 +411,12 @@ class Repository:
def _update_index(self, segment, objects, report=None): def _update_index(self, segment, objects, report=None):
"""some code shared between replay_segments and check""" """some code shared between replay_segments and check"""
self.segments[segment] = 0 self.segments[segment] = 0
for tag, key, offset in objects: for tag, key, offset, size in objects:
if tag == TAG_PUT: if tag == TAG_PUT:
try: try:
# If this PUT supersedes an older PUT, mark the old segment for compaction and count the free space
s, _ = self.index[key] s, _ = self.index[key]
self.compact.add(s) self.compact[s] += size
self.segments[s] -= 1 self.segments[s] -= 1
except KeyError: except KeyError:
pass pass
@ -327,12 +424,17 @@ class Repository:
self.segments[segment] += 1 self.segments[segment] += 1
elif tag == TAG_DELETE: elif tag == TAG_DELETE:
try: try:
s, _ = self.index.pop(key) # if the deleted PUT is not in the index, there is nothing to clean up
self.segments[s] -= 1 s, offset = self.index.pop(key)
self.compact.add(s)
except KeyError: except KeyError:
pass pass
self.compact.add(segment) else:
if self.io.segment_exists(s):
# the old index is not necessarily valid for this transaction (e.g. compaction); if the segment
# is already gone, then it was already compacted.
self.segments[s] -= 1
size = self.io.read(s, offset, key, read_data=False)
self.compact[s] += size
elif tag == TAG_COMMIT: elif tag == TAG_COMMIT:
continue continue
else: else:
@ -342,7 +444,22 @@ class Repository:
else: else:
report(msg) report(msg)
if self.segments[segment] == 0: if self.segments[segment] == 0:
self.compact.add(segment) self.compact[segment] += self.io.segment_size(segment)
def _rebuild_sparse(self, segment):
"""Rebuild sparse bytes count for a single segment relative to the current index."""
self.compact[segment] = 0
if self.segments[segment] == 0:
self.compact[segment] += self.io.segment_size(segment)
return
for tag, key, offset, size in self.io.iter_objects(segment, read_data=False):
if tag == TAG_PUT:
if self.index.get(key, (-1, -1)) != (segment, offset):
# This PUT is superseded later
self.compact[segment] += size
elif tag == TAG_DELETE:
# The outcome of the DELETE has been recorded in the PUT branch already
self.compact[segment] += size
def check(self, repair=False, save_space=False): def check(self, repair=False, save_space=False):
"""Check repository consistency """Check repository consistency
@ -457,14 +574,16 @@ class Repository:
if not self._active_txn: if not self._active_txn:
self.prepare_txn(self.get_transaction_id()) self.prepare_txn(self.get_transaction_id())
try: try:
segment, _ = self.index[id] segment, offset = self.index[id]
self.segments[segment] -= 1
self.compact.add(segment)
segment = self.io.write_delete(id)
self.segments.setdefault(segment, 0)
self.compact.add(segment)
except KeyError: except KeyError:
pass pass
else:
self.segments[segment] -= 1
size = self.io.read(segment, offset, id, read_data=False)
self.compact[segment] += size
segment, size = self.io.write_delete(id)
self.compact[segment] += size
self.segments.setdefault(segment, 0)
segment, offset = self.io.write_put(id, data) segment, offset = self.io.write_put(id, data)
self.segments.setdefault(segment, 0) self.segments.setdefault(segment, 0)
self.segments[segment] += 1 self.segments[segment] += 1
@ -478,9 +597,10 @@ class Repository:
except KeyError: except KeyError:
raise self.ObjectNotFound(id, self.path) from None raise self.ObjectNotFound(id, self.path) from None
self.segments[segment] -= 1 self.segments[segment] -= 1
self.compact.add(segment) size = self.io.read(segment, offset, id, read_data=False)
segment = self.io.write_delete(id) self.compact[segment] += size
self.compact.add(segment) segment, size = self.io.write_delete(id)
self.compact[segment] += size
self.segments.setdefault(segment, 0) self.segments.setdefault(segment, 0)
def preload(self, ids): def preload(self, ids):
@ -578,7 +698,7 @@ class LoggedIO:
seen_commit = False seen_commit = False
while True: while True:
try: try:
tag, key, offset = next(iterator) tag, key, offset, _ = next(iterator)
except IntegrityError: except IntegrityError:
return False return False
except StopIteration: except StopIteration:
@ -635,7 +755,18 @@ class LoggedIO:
def segment_exists(self, segment): def segment_exists(self, segment):
return os.path.exists(self.segment_filename(segment)) return os.path.exists(self.segment_filename(segment))
def iter_objects(self, segment, include_data=False): def segment_size(self, segment):
return os.path.getsize(self.segment_filename(segment))
def iter_objects(self, segment, include_data=False, read_data=True):
"""
Return object iterator for *segment*.
If read_data is False then include_data must be False as well.
Integrity checks are skipped: all data obtained from the iterator must be considered informational.
The iterator returns four-tuples of (tag, key, offset, data|size).
"""
fd = self.get_fd(segment) fd = self.get_fd(segment)
fd.seek(0) fd.seek(0)
if fd.read(MAGIC_LEN) != MAGIC: if fd.read(MAGIC_LEN) != MAGIC:
@ -644,11 +775,12 @@ class LoggedIO:
header = fd.read(self.header_fmt.size) header = fd.read(self.header_fmt.size)
while header: while header:
size, tag, key, data = self._read(fd, self.header_fmt, header, segment, offset, size, tag, key, data = self._read(fd, self.header_fmt, header, segment, offset,
(TAG_PUT, TAG_DELETE, TAG_COMMIT)) (TAG_PUT, TAG_DELETE, TAG_COMMIT),
read_data=read_data)
if include_data: if include_data:
yield tag, key, offset, data yield tag, key, offset, data
else: else:
yield tag, key, offset yield tag, key, offset, size
offset += size offset += size
header = fd.read(self.header_fmt.size) header = fd.read(self.header_fmt.size)
@ -672,19 +804,25 @@ class LoggedIO:
fd.write(data[:size]) fd.write(data[:size])
data = data[size:] data = data[size:]
def read(self, segment, offset, id): def read(self, segment, offset, id, read_data=True):
"""
Read entry from *segment* at *offset* with *id*.
If read_data is False the size of the entry is returned instead and integrity checks are skipped.
The return value should thus be considered informational.
"""
if segment == self.segment and self._write_fd: if segment == self.segment and self._write_fd:
self._write_fd.sync() self._write_fd.sync()
fd = self.get_fd(segment) fd = self.get_fd(segment)
fd.seek(offset) fd.seek(offset)
header = fd.read(self.put_header_fmt.size) header = fd.read(self.put_header_fmt.size)
size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, )) size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, ), read_data)
if id != key: if id != key:
raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format( raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format(
segment, offset)) segment, offset))
return data return data if read_data else size
def _read(self, fd, fmt, header, segment, offset, acceptable_tags): def _read(self, fd, fmt, header, segment, offset, acceptable_tags, read_data=True):
# some code shared by read() and iter_objects() # some code shared by read() and iter_objects()
try: try:
hdr_tuple = fmt.unpack(header) hdr_tuple = fmt.unpack(header)
@ -702,18 +840,32 @@ class LoggedIO:
raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format( raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format(
segment, offset)) segment, offset))
length = size - fmt.size length = size - fmt.size
data = fd.read(length) if read_data:
if len(data) != length: data = fd.read(length)
raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format( if len(data) != length:
segment, offset, length, len(data))) raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc: segment, offset, length, len(data)))
raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format( if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
segment, offset)) raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format(
segment, offset))
if key is None and tag in (TAG_PUT, TAG_DELETE):
key, data = data[:32], data[32:]
else:
if key is None and tag in (TAG_PUT, TAG_DELETE):
key = fd.read(32)
length -= 32
if len(key) != 32:
raise IntegrityError('Segment entry key short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
segment, offset, 32, len(key)))
oldpos = fd.tell()
seeked = fd.seek(length, os.SEEK_CUR) - oldpos
data = None
if seeked != length:
raise IntegrityError('Segment entry data short seek [segment {}, offset {}]: expected {}, got {} bytes'.format(
segment, offset, length, seeked))
if tag not in acceptable_tags: if tag not in acceptable_tags:
raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format( raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format(
segment, offset)) segment, offset))
if key is None and tag in (TAG_PUT, TAG_DELETE):
key, data = data[:32], data[32:]
return size, tag, key, data return size, tag, key, data
def write_put(self, id, data, raise_full=False): def write_put(self, id, data, raise_full=False):
@ -732,11 +884,11 @@ class LoggedIO:
crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff) crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
fd.write(b''.join((crc, header, id))) fd.write(b''.join((crc, header, id)))
self.offset += self.put_header_fmt.size self.offset += self.put_header_fmt.size
return self.segment return self.segment, self.put_header_fmt.size
def write_commit(self): def write_commit(self):
fd = self.get_write_fd(no_new=True) self.close_segment()
fd.sync() fd = self.get_write_fd()
header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT) header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT)
crc = self.crc_fmt.pack(crc32(header) & 0xffffffff) crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
fd.write(b''.join((crc, header))) fd.write(b''.join((crc, header)))

View File

@ -5,6 +5,7 @@ import inspect
from io import StringIO from io import StringIO
import logging import logging
import random import random
import socket
import stat import stat
import subprocess import subprocess
import sys import sys
@ -357,6 +358,16 @@ class ArchiverTestCase(ArchiverTestCaseBase):
# the interesting parts of info_output2 and info_output should be same # the interesting parts of info_output2 and info_output should be same
self.assert_equal(filter(info_output), filter(info_output2)) self.assert_equal(filter(info_output), filter(info_output2))
def test_unix_socket(self):
self.cmd('init', self.repository_location)
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
sock.bind(os.path.join(self.input_path, 'unix-socket'))
self.cmd('create', self.repository_location + '::test', 'input')
sock.close()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert not os.path.exists('input/unix-socket')
def test_symlink_extract(self): def test_symlink_extract(self):
self.create_test_files() self.create_test_files()
self.cmd('init', self.repository_location) self.cmd('init', self.repository_location)
@ -916,7 +927,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.assert_in('borgbackup version', output) # implied output even without --info given self.assert_in('borgbackup version', output) # implied output even without --info given
self.assert_not_in('Starting repository check', output) # --info not given for root logger self.assert_not_in('Starting repository check', output) # --info not given for root logger
name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0] name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[1]
with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd: with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd:
fd.seek(100) fd.seek(100)
fd.write(b'XXXX') fd.write(b'XXXX')
@ -986,14 +997,21 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('file1', size=1024 * 80)
os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago
self.create_regular_file('file2', size=1024 * 80) self.create_regular_file('file2', size=1024 * 80)
if has_lchflags:
self.create_regular_file('file3', size=1024 * 80)
platform.set_flags(os.path.join(self.input_path, 'file3'), stat.UF_NODUMP)
self.cmd('init', self.repository_location) self.cmd('init', self.repository_location)
output = self.cmd('create', '--list', self.repository_location + '::test', 'input') output = self.cmd('create', '--list', self.repository_location + '::test', 'input')
self.assert_in("A input/file1", output) self.assert_in("A input/file1", output)
self.assert_in("A input/file2", output) self.assert_in("A input/file2", output)
if has_lchflags:
self.assert_in("x input/file3", output)
# should find second file as excluded # should find second file as excluded
output = self.cmd('create', '--list', self.repository_location + '::test1', 'input', '--exclude', '*/file2') output = self.cmd('create', '--list', self.repository_location + '::test1', 'input', '--exclude', '*/file2')
self.assert_in("U input/file1", output) self.assert_in("U input/file1", output)
self.assert_in("x input/file2", output) self.assert_in("x input/file2", output)
if has_lchflags:
self.assert_in("x input/file3", output)
def test_create_topical(self): def test_create_topical(self):
now = time.time() now = time.time()
@ -1244,6 +1262,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
try: try:
self.cmd('mount', self.repository_location, mountpoint, fork=True) self.cmd('mount', self.repository_location, mountpoint, fork=True)
self.wait_for_mount(mountpoint) self.wait_for_mount(mountpoint)
if has_lchflags:
# remove the file we did not backup, so input and output become equal
os.remove(os.path.join('input', 'flagfile'))
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input')) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input')) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
finally: finally:
@ -1265,6 +1286,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
try: try:
self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True) self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True)
self.wait_for_mount(mountpoint) self.wait_for_mount(mountpoint)
if has_lchflags:
# remove the file we did not backup, so input and output become equal
os.remove(os.path.join('input', 'flagfile'))
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input')) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
finally: finally:
if sys.platform.startswith('linux'): if sys.platform.startswith('linux'):
@ -1491,9 +1515,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '::test', 'input') self.cmd('create', self.repository_location + '::test', 'input')
archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()): with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location) self.cmd('recreate', '-pv', '--chunker-params', '10,13,11,4095', self.repository_location)
assert 'test.recreate' in self.cmd('list', self.repository_location) assert 'test.recreate' in self.cmd('list', self.repository_location)
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location) output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,13,11,4095', self.repository_location)
assert 'Found test.recreate, will resume' in output assert 'Found test.recreate, will resume' in output
assert 'Copied 1 chunks from a partially processed item' in output assert 'Copied 1 chunks from a partially processed item' in output
archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')

View File

@ -7,10 +7,10 @@ import tempfile
from unittest.mock import patch from unittest.mock import patch
from ..hashindex import NSIndex from ..hashindex import NSIndex
from ..helpers import Location, IntegrityError from ..helpers import Location, IntegrityError, InternalOSError
from ..locking import UpgradableLock, LockFailed from ..locking import UpgradableLock, LockFailed
from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line
from ..repository import Repository, LoggedIO from ..repository import Repository, LoggedIO, MAGIC
from . import BaseTestCase from . import BaseTestCase
@ -125,6 +125,46 @@ class RepositoryTestCase(RepositoryTestCaseBase):
self.assert_equal(len(self.repository.list(limit=50)), 50) self.assert_equal(len(self.repository.list(limit=50)), 50)
class LocalRepositoryTestCase(RepositoryTestCaseBase):
# test case that doesn't work with remote repositories
def _assert_sparse(self):
# The superseded 123456... PUT
assert self.repository.compact[0] == 41 + 9
# The DELETE issued by the superseding PUT (or issued directly)
assert self.repository.compact[2] == 41
self.repository._rebuild_sparse(0)
assert self.repository.compact[0] == 41 + 9
def test_sparse1(self):
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.put(b'00000000000000000000000000000001', b'123456789')
self.repository.commit()
self.repository.put(b'00000000000000000000000000000001', b'bar')
self._assert_sparse()
def test_sparse2(self):
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.put(b'00000000000000000000000000000001', b'123456789')
self.repository.commit()
self.repository.delete(b'00000000000000000000000000000001')
self._assert_sparse()
def test_sparse_delete(self):
self.repository.put(b'00000000000000000000000000000000', b'1245')
self.repository.delete(b'00000000000000000000000000000000')
self.repository.io._write_fd.sync()
# The on-line tracking works on a per-object basis...
assert self.repository.compact[0] == 41 + 41 + 4
self.repository._rebuild_sparse(0)
# ...while _rebuild_sparse can mark whole segments as completely sparse (which then includes the segment magic)
assert self.repository.compact[0] == 41 + 41 + 4 + len(MAGIC)
self.repository.commit()
assert 0 not in [segment for segment, _ in self.repository.io.segment_iterator()]
class RepositoryCommitTestCase(RepositoryTestCaseBase): class RepositoryCommitTestCase(RepositoryTestCaseBase):
def add_keys(self): def add_keys(self):
@ -216,18 +256,69 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
self.repository.commit() self.repository.commit()
self.repository.append_only = False self.repository.append_only = False
assert segments_in_repository() == 1 assert segments_in_repository() == 2
self.repository.put(b'00000000000000000000000000000000', b'foo') self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.commit() self.repository.commit()
# normal: compact squashes the data together, only one segment # normal: compact squashes the data together, only one segment
assert segments_in_repository() == 1 assert segments_in_repository() == 4
self.repository.append_only = True self.repository.append_only = True
assert segments_in_repository() == 1 assert segments_in_repository() == 4
self.repository.put(b'00000000000000000000000000000000', b'foo') self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.commit() self.repository.commit()
# append only: does not compact, only new segments written # append only: does not compact, only new segments written
assert segments_in_repository() == 2 assert segments_in_repository() == 6
class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
def setUp(self):
super().setUp()
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.commit()
self.repository.close()
def do_commit(self):
with self.repository:
self.repository.put(b'00000000000000000000000000000000', b'fox')
self.repository.commit()
def test_corrupted_hints(self):
with open(os.path.join(self.repository.path, 'hints.1'), 'ab') as fd:
fd.write(b'123456789')
self.do_commit()
def test_deleted_hints(self):
os.unlink(os.path.join(self.repository.path, 'hints.1'))
self.do_commit()
def test_deleted_index(self):
os.unlink(os.path.join(self.repository.path, 'index.1'))
self.do_commit()
def test_unreadable_hints(self):
hints = os.path.join(self.repository.path, 'hints.1')
os.unlink(hints)
os.mkdir(hints)
with self.assert_raises(InternalOSError):
self.do_commit()
def test_index(self):
with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd:
fd.write(b'123456789')
self.do_commit()
def test_index_outside_transaction(self):
with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd:
fd.write(b'123456789')
with self.repository:
assert len(self.repository) == 1
def test_unreadable_index(self):
index = os.path.join(self.repository.path, 'index.1')
os.unlink(index)
os.mkdir(index)
with self.assert_raises(InternalOSError):
self.do_commit()
class RepositoryCheckTestCase(RepositoryTestCaseBase): class RepositoryCheckTestCase(RepositoryTestCaseBase):
@ -296,20 +387,20 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
self.add_objects([[1, 2, 3], [4, 5, 6]]) self.add_objects([[1, 2, 3], [4, 5, 6]])
self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
self.check(status=True) self.check(status=True)
self.delete_segment(1) self.delete_segment(2)
self.repository.rollback() self.repository.rollback()
self.check(repair=True, status=True) self.check(repair=True, status=True)
self.assert_equal(set([1, 2, 3]), self.list_objects()) self.assert_equal(set([1, 2, 3]), self.list_objects())
def test_repair_missing_commit_segment(self): def test_repair_missing_commit_segment(self):
self.add_objects([[1, 2, 3], [4, 5, 6]]) self.add_objects([[1, 2, 3], [4, 5, 6]])
self.delete_segment(1) self.delete_segment(3)
self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4)) self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4))
self.assert_equal(set([1, 2, 3]), self.list_objects()) self.assert_equal(set([1, 2, 3]), self.list_objects())
def test_repair_corrupted_commit_segment(self): def test_repair_corrupted_commit_segment(self):
self.add_objects([[1, 2, 3], [4, 5, 6]]) self.add_objects([[1, 2, 3], [4, 5, 6]])
with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd: with open(os.path.join(self.tmppath, 'repository', 'data', '0', '3'), 'r+b') as fd:
fd.seek(-1, os.SEEK_END) fd.seek(-1, os.SEEK_END)
fd.write(b'X') fd.write(b'X')
self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4)) self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4))
@ -319,15 +410,15 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
def test_repair_no_commits(self): def test_repair_no_commits(self):
self.add_objects([[1, 2, 3]]) self.add_objects([[1, 2, 3]])
with open(os.path.join(self.tmppath, 'repository', 'data', '0', '0'), 'r+b') as fd: with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd:
fd.seek(-1, os.SEEK_END) fd.seek(-1, os.SEEK_END)
fd.write(b'X') fd.write(b'X')
self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4)) self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
self.check(status=False) self.check(status=False)
self.check(status=False) self.check(status=False)
self.assert_equal(self.list_indices(), ['index.0'])
self.check(repair=True, status=True)
self.assert_equal(self.list_indices(), ['index.1']) self.assert_equal(self.list_indices(), ['index.1'])
self.check(repair=True, status=True)
self.assert_equal(self.list_indices(), ['index.3'])
self.check(status=True) self.check(status=True)
self.get_objects(3) self.get_objects(3)
self.assert_equal(set([1, 2, 3]), self.list_objects()) self.assert_equal(set([1, 2, 3]), self.list_objects())
@ -341,10 +432,10 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
def test_repair_index_too_new(self): def test_repair_index_too_new(self):
self.add_objects([[1, 2, 3], [4, 5, 6]]) self.add_objects([[1, 2, 3], [4, 5, 6]])
self.assert_equal(self.list_indices(), ['index.1']) self.assert_equal(self.list_indices(), ['index.3'])
self.rename_index('index.100') self.rename_index('index.100')
self.check(status=True) self.check(status=True)
self.assert_equal(self.list_indices(), ['index.1']) self.assert_equal(self.list_indices(), ['index.3'])
self.get_objects(4) self.get_objects(4)
self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
@ -400,63 +491,51 @@ class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
pass pass
class RemoteRepositoryLoggingStub(RemoteRepository): class RemoteLoggerTestCase(BaseTestCase):
""" run a remote command that just prints a logging-formatted message to
stderr, and stub out enough of RemoteRepository to avoid the resulting
exceptions """
def __init__(self, *args, **kw):
self.msg = kw.pop('msg')
super().__init__(*args, **kw)
def borg_cmd(self, cmd, testing):
return [sys.executable, '-c', 'import sys; print("{}", file=sys.stderr)'.format(self.msg), ]
def __del__(self):
# clean up from exception without triggering assert
if self.p:
self.close()
class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase):
def setUp(self): def setUp(self):
self.location = Location('__testsuite__:/doesntexist/repo')
self.stream = io.StringIO() self.stream = io.StringIO()
self.handler = logging.StreamHandler(self.stream) self.handler = logging.StreamHandler(self.stream)
logging.getLogger().handlers[:] = [self.handler] logging.getLogger().handlers[:] = [self.handler]
logging.getLogger('borg.repository').handlers[:] = [] logging.getLogger('borg.repository').handlers[:] = []
logging.getLogger('borg.repository.foo').handlers[:] = [] logging.getLogger('borg.repository.foo').handlers[:] = []
# capture stderr
sys.stderr.flush()
self.old_stderr = sys.stderr
self.stderr = sys.stderr = io.StringIO()
def tearDown(self): def tearDown(self):
pass sys.stderr = self.old_stderr
def create_repository(self, msg): def test_stderr_messages(self):
try: handle_remote_line("unstructured stderr message")
RemoteRepositoryLoggingStub(self.location, msg=msg) self.assert_equal(self.stream.getvalue(), '')
except ConnectionClosedWithHint: # stderr messages don't get an implicit newline
# stub is dumb, so this exception expected self.assert_equal(self.stderr.getvalue(), 'Remote: unstructured stderr message')
pass
def test_old_format_messages(self): def test_old_format_messages(self):
self.handler.setLevel(logging.DEBUG) self.handler.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG)
self.create_repository("$LOG INFO Remote: old format message") handle_remote_line("$LOG INFO Remote: old format message")
self.assert_equal(self.stream.getvalue(), 'Remote: old format message\n') self.assert_equal(self.stream.getvalue(), 'Remote: old format message\n')
self.assert_equal(self.stderr.getvalue(), '')
def test_new_format_messages(self): def test_new_format_messages(self):
self.handler.setLevel(logging.DEBUG) self.handler.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG)
self.create_repository("$LOG INFO borg.repository Remote: new format message") handle_remote_line("$LOG INFO borg.repository Remote: new format message")
self.assert_equal(self.stream.getvalue(), 'Remote: new format message\n') self.assert_equal(self.stream.getvalue(), 'Remote: new format message\n')
self.assert_equal(self.stderr.getvalue(), '')
def test_remote_messages_screened(self): def test_remote_messages_screened(self):
# default borg config for root logger # default borg config for root logger
self.handler.setLevel(logging.WARNING) self.handler.setLevel(logging.WARNING)
logging.getLogger().setLevel(logging.WARNING) logging.getLogger().setLevel(logging.WARNING)
self.create_repository("$LOG INFO borg.repository Remote: new format info message") handle_remote_line("$LOG INFO borg.repository Remote: new format info message")
self.assert_equal(self.stream.getvalue(), '') self.assert_equal(self.stream.getvalue(), '')
self.assert_equal(self.stderr.getvalue(), '')
def test_info_to_correct_local_child(self): def test_info_to_correct_local_child(self):
logging.getLogger('borg.repository').setLevel(logging.INFO) logging.getLogger('borg.repository').setLevel(logging.INFO)
@ -474,7 +553,8 @@ class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase):
foo_handler.setLevel(logging.INFO) foo_handler.setLevel(logging.INFO)
logging.getLogger('borg.repository.foo').handlers[:] = [foo_handler] logging.getLogger('borg.repository.foo').handlers[:] = [foo_handler]
self.create_repository("$LOG INFO borg.repository Remote: new format child message") handle_remote_line("$LOG INFO borg.repository Remote: new format child message")
self.assert_equal(foo_stream.getvalue(), '') self.assert_equal(foo_stream.getvalue(), '')
self.assert_equal(child_stream.getvalue(), 'Remote: new format child message\n') self.assert_equal(child_stream.getvalue(), 'Remote: new format child message\n')
self.assert_equal(self.stream.getvalue(), '') self.assert_equal(self.stream.getvalue(), '')
self.assert_equal(self.stderr.getvalue(), '')

View File

@ -2,7 +2,7 @@
# fakeroot -u tox --recreate # fakeroot -u tox --recreate
[tox] [tox]
envlist = py{34,35},flake8 envlist = py{34,35,36},flake8
[testenv] [testenv]
# Change dir to avoid import problem for cython code. The directory does # Change dir to avoid import problem for cython code. The directory does