diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 76726d25b..ccedf56a7 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -17,13 +17,20 @@ a good amount of free space on the filesystem that has your backup repository (and also on ~/.cache). A few GB should suffice for most hard-drive sized repositories. See also :ref:`cache-memory-usage`. +Borg doesn't use space reserved for root on repository disks (even when run as root), +on file systems which do not support this mechanism (e.g. XFS) we recommend to +reserve some space in Borg itself just to be safe by adjusting the +``additional_free_space`` setting in the ``[repository]`` section of a repositories +``config`` file. A good starting point is ``2G``. + If |project_name| runs out of disk space, it tries to free as much space as it can while aborting the current operation safely, which allows to free more space -by deleting/pruning archives. This mechanism is not bullet-proof though. +by deleting/pruning archives. This mechanism is not bullet-proof in some +circumstances [1]_. + If you *really* run out of disk space, it can be hard or impossible to free space, because |project_name| needs free space to operate - even to delete backup -archives. There is a ``--save-space`` option for some commands, but even with -that |project_name| will need free space to operate. +archives. You can use some monitoring process or just include the free space information in your backup log files (you check them regularly anyway, right?). @@ -36,6 +43,13 @@ Also helpful: - consider using quotas - use `prune` regularly +.. [1] This failsafe can fail in these circumstances: + + - The underlying file system doesn't support statvfs(2), or returns incorrect + data, or the repository doesn't reside on a single file system + - Other tasks fill the disk simultaneously + - Hard quotas (which may not be reflected in statvfs(2)) + A step by step example ---------------------- diff --git a/src/borg/_hashindex.c b/src/borg/_hashindex.c index bfa3ef09b..0a92ca60e 100644 --- a/src/borg/_hashindex.c +++ b/src/borg/_hashindex.c @@ -441,7 +441,13 @@ hashindex_next_key(HashIndex *index, const void *key) } static int -hashindex_get_size(HashIndex *index) +hashindex_len(HashIndex *index) { return index->num_entries; } + +static int +hashindex_size(HashIndex *index) +{ + return sizeof(HashHeader) + index->num_buckets * index->bucket_size; +} diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 389cf2563..74c52c9c1 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -8,7 +8,7 @@ from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t from libc.errno cimport errno from cpython.exc cimport PyErr_SetFromErrnoWithFilename -API_VERSION = 2 +API_VERSION = 3 cdef extern from "_hashindex.c": @@ -18,7 +18,8 @@ cdef extern from "_hashindex.c": HashIndex *hashindex_read(char *path) HashIndex *hashindex_init(int capacity, int key_size, int value_size) void hashindex_free(HashIndex *index) - int hashindex_get_size(HashIndex *index) + int hashindex_len(HashIndex *index) + int hashindex_size(HashIndex *index) int hashindex_write(HashIndex *index, char *path) void *hashindex_get(HashIndex *index, void *key) void *hashindex_next_key(HashIndex *index, void *key) @@ -119,7 +120,11 @@ cdef class IndexBase: raise def __len__(self): - return hashindex_get_size(self.index) + return hashindex_len(self.index) + + def size(self): + """Return size (bytes) of hash table.""" + return hashindex_size(self.index) cdef class NSIndex(IndexBase): diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 4da8fe663..db345f419 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -85,7 +85,7 @@ class PlaceholderError(Error): def check_extension_modules(): from . import platform - if hashindex.API_VERSION != 2: + if hashindex.API_VERSION != 3: raise ExtensionModuleError if chunker.API_VERSION != 2: raise ExtensionModuleError @@ -618,6 +618,26 @@ def format_file_size(v, precision=2, sign=False): return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign) +def parse_file_size(s): + """Return int from file size (1234, 55G, 1.7T).""" + if not s: + return int(s) # will raise + suffix = s[-1] + power = 1000 + try: + factor = { + 'K': power, + 'M': power**2, + 'G': power**3, + 'T': power**4, + 'P': power**5, + }[suffix] + s = s[:-1] + except KeyError: + factor = 1 + return int(float(s) * factor) + + def sizeof_fmt(num, suffix='B', units=None, power=None, sep='', precision=2, sign=False): prefix = '+' if sign and num > 0 else '' diff --git a/src/borg/repository.py b/src/borg/repository.py index c83edcec8..468a2efa5 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) from .constants import * # NOQA from .hashindex import NSIndex -from .helpers import Error, ErrorWithTraceback, IntegrityError +from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size, parse_file_size from .helpers import Location from .helpers import ProgressIndicatorPercent from .helpers import bin_to_hex @@ -101,6 +101,9 @@ class Repository: id = bin_to_hex(id) super().__init__(id, repo) + class InsufficientFreeSpaceError(Error): + """Insufficient free space to complete transaction (required: {}, available: {}).""" + def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False): self.path = os.path.abspath(path) self._location = Location('file://%s' % self.path) @@ -136,8 +139,10 @@ class Repository: # EIO or FS corruption ensues, which is why we specifically check for ENOSPC. if self._active_txn and no_space_left_on_device: logger.warning('No space left on device, cleaning up partial transaction to free space.') - self.io.cleanup(self.io.get_segments_transaction_id()) - self.rollback() + cleanup = True + else: + cleanup = False + self.rollback(cleanup) self.close() @property @@ -160,6 +165,7 @@ class Repository: config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR)) config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE)) config.set('repository', 'append_only', str(int(self.append_only))) + config.set('repository', 'additional_free_space', '0') config.set('repository', 'id', bin_to_hex(os.urandom(32))) self.save_config(path, config) @@ -231,6 +237,7 @@ class Repository: raise self.InvalidRepository(path) self.max_segment_size = self.config.getint('repository', 'max_segment_size') self.segments_per_dir = self.config.getint('repository', 'segments_per_dir') + self.additional_free_space = parse_file_size(self.config.get('repository', 'additional_free_space', fallback=0)) # append_only can be set in the constructor # it shouldn't be overridden (True -> False) here self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False) @@ -248,9 +255,11 @@ class Repository: def commit(self, save_space=False): """Commit transaction """ + # save_space is not used anymore, but stays for RPC/API compatibility. + self.check_free_space() self.io.write_commit() if not self.append_only: - self.compact_segments(save_space=save_space) + self.compact_segments() self.write_index() self.rollback() @@ -348,7 +357,45 @@ class Repository: os.unlink(os.path.join(self.path, name)) self.index = None - def compact_segments(self, save_space=False): + def check_free_space(self): + """Pre-commit check for sufficient free space to actually perform the commit.""" + # As a baseline we take four times the current (on-disk) index size. + # At this point the index may only be updated by compaction, which won't resize it. + # We still apply a factor of four so that a later, separate invocation can free space + # (journaling all deletes for all chunks is one index size) or still make minor additions + # (which may grow the index up to twice it's current size). + # Note that in a subsequent operation the committed index is still on-disk, therefore we + # arrive at index_size * (1 + 2 + 1). + # In that order: journaled deletes (1), hashtable growth (2), persisted index (1). + required_free_space = self.index.size() * 4 + + # Conservatively estimate hints file size: + # 10 bytes for each segment-refcount pair, 10 bytes for each segment-space pair + # Assume maximum of 5 bytes per integer. Segment numbers will usually be packed more densely (1-3 bytes), + # as will refcounts and free space integers. For 5 MiB segments this estimate is good to ~20 PB repo size. + # Add 4K to generously account for constant format overhead. + hints_size = len(self.segments) * 10 + len(self.compact) * 10 + 4096 + required_free_space += hints_size + + required_free_space += self.additional_free_space + if not self.append_only: + # Keep one full worst-case segment free in non-append-only mode + required_free_space += self.max_segment_size + MAX_OBJECT_SIZE + try: + st_vfs = os.statvfs(self.path) + except OSError as os_error: + logger.warning('Failed to check free space before committing: ' + str(os_error)) + return + # f_bavail: even as root - don't touch the Federal Block Reserve! + free_space = st_vfs.f_bavail * st_vfs.f_bsize + logger.debug('check_free_space: required bytes {}, free bytes {}'.format(required_free_space, free_space)) + if free_space < required_free_space: + self.rollback(cleanup=True) + formatted_required = format_file_size(required_free_space) + formatted_free = format_file_size(free_space) + raise self.InsufficientFreeSpaceError(formatted_required, formatted_free) + + def compact_segments(self): """Compact sparse segments by copying data into new segments """ if not self.compact: @@ -357,12 +404,11 @@ class Repository: segments = self.segments unused = [] # list of segments, that are not used anymore - def complete_xfer(): - # complete the transfer (usually exactly when some target segment - # is full, or at the very end when everything is processed) + def complete_xfer(intermediate=True): + # complete the current transfer (when some target segment is full) nonlocal unused # commit the new, compact, used segments - self.io.write_commit() + self.io.write_commit(intermediate=intermediate) # get rid of the old, sparse, unused segments. free space. for segment in unused: assert self.segments.pop(segment) == 0 @@ -383,7 +429,7 @@ class Repository: for tag, key, offset, data in self.io.iter_objects(segment, include_data=True): if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset): try: - new_segment, offset = self.io.write_put(key, data, raise_full=save_space) + new_segment, offset = self.io.write_put(key, data, raise_full=True) except LoggedIO.SegmentFull: complete_xfer() new_segment, offset = self.io.write_put(key, data) @@ -394,13 +440,13 @@ class Repository: elif tag == TAG_DELETE: if index_transaction_id is None or segment > index_transaction_id: try: - self.io.write_delete(key, raise_full=save_space) + self.io.write_delete(key, raise_full=True) except LoggedIO.SegmentFull: complete_xfer() self.io.write_delete(key) assert segments[segment] == 0 unused.append(segment) - complete_xfer() + complete_xfer(intermediate=False) def replay_segments(self, index_transaction_id, segments_transaction_id): self.prepare_txn(index_transaction_id, do_cleanup=False) @@ -536,7 +582,7 @@ class Repository: if current_index.get(key, (-1, -1)) != value: report_error('Index mismatch for key {}. {} != {}'.format(key, value, current_index.get(key, (-1, -1)))) if repair: - self.compact_segments(save_space=save_space) + self.compact_segments() self.write_index() self.rollback() if error_found: @@ -548,9 +594,11 @@ class Repository: logger.info('Completed repository check, no problems found.') return not error_found or repair - def rollback(self): + def rollback(self, cleanup=False): """ """ + if cleanup: + self.io.cleanup(self.io.get_segments_transaction_id()) self.index = None self._active_txn = False @@ -898,9 +946,15 @@ class LoggedIO: self.offset += self.put_header_fmt.size return self.segment, self.put_header_fmt.size - def write_commit(self): - self.close_segment() - fd = self.get_write_fd() + def write_commit(self, intermediate=False): + if intermediate: + # Intermediate commits go directly into the current segment - this makes checking their validity more + # expensive, but is faster and reduces clobber. + fd = self.get_write_fd() + fd.sync() + else: + self.close_segment() + fd = self.get_write_fd() header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT) crc = self.crc_fmt.pack(crc32(header) & 0xffffffff) fd.write(b''.join((crc, header))) diff --git a/src/borg/testsuite/hashindex.py b/src/borg/testsuite/hashindex.py index 000dfe4c3..a7d757145 100644 --- a/src/borg/testsuite/hashindex.py +++ b/src/borg/testsuite/hashindex.py @@ -122,6 +122,21 @@ class HashIndexTestCase(BaseTestCase): assert unique_chunks == 3 +class HashIndexSizeTestCase(BaseTestCase): + def test_size_on_disk(self): + idx = ChunkIndex() + assert idx.size() == 18 + 1031 * (32 + 3 * 4) + + def test_size_on_disk_accurate(self): + idx = ChunkIndex() + for i in range(1234): + idx[H(i)] = i, i**2, i**3 + with tempfile.NamedTemporaryFile() as file: + idx.write(file.name) + size = os.path.getsize(file.name) + assert idx.size() == size + + class HashIndexRefcountingTestCase(BaseTestCase): def test_chunkindex_limit(self): idx = ChunkIndex() diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index 9cecca559..e3b160724 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -10,7 +10,7 @@ import msgpack import msgpack.fallback from ..helpers import Location -from ..helpers import partial_format, format_file_size, format_timedelta, format_line, PlaceholderError +from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError from ..helpers import make_path_safe, clean_lines from ..helpers import prune_within, prune_split from ..helpers import get_cache_dir, get_keys_dir @@ -682,6 +682,26 @@ def test_file_size_sign(): assert format_file_size(size, sign=True) == fmt +@pytest.mark.parametrize('string,value', ( + ('1', 1), + ('20', 20), + ('5K', 5000), + ('1.75M', 1750000), + ('1e+9', 1e9), + ('-1T', -1e12), +)) +def test_parse_file_size(string, value): + assert parse_file_size(string) == int(value) + + +@pytest.mark.parametrize('string', ( + '', '5 Äpfel', '4E', '2229 bit', '1B', +)) +def test_parse_file_size_invalid(string): + with pytest.raises(ValueError): + parse_file_size(string) + + def test_is_slow_msgpack(): saved_packer = msgpack.Packer try: diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index a9daa2b61..0135cacfc 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -6,6 +6,8 @@ import sys import tempfile from unittest.mock import patch +import pytest + from ..hashindex import NSIndex from ..helpers import Location from ..helpers import IntegrityError @@ -35,6 +37,15 @@ class RepositoryTestCaseBase(BaseTestCase): self.repository.close() self.repository = self.open() + def add_keys(self): + self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.put(b'00000000000000000000000000000001', b'bar') + self.repository.put(b'00000000000000000000000000000003', b'bar') + self.repository.commit() + self.repository.put(b'00000000000000000000000000000001', b'bar2') + self.repository.put(b'00000000000000000000000000000002', b'boo') + self.repository.delete(b'00000000000000000000000000000003') + class RepositoryTestCase(RepositoryTestCaseBase): @@ -168,15 +179,6 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase): class RepositoryCommitTestCase(RepositoryTestCaseBase): - def add_keys(self): - self.repository.put(b'00000000000000000000000000000000', b'foo') - self.repository.put(b'00000000000000000000000000000001', b'bar') - self.repository.put(b'00000000000000000000000000000003', b'bar') - self.repository.commit() - self.repository.put(b'00000000000000000000000000000001', b'bar2') - self.repository.put(b'00000000000000000000000000000002', b'boo') - self.repository.delete(b'00000000000000000000000000000003') - def test_replay_of_missing_index(self): self.add_keys() for name in os.listdir(self.repository.path): @@ -274,6 +276,19 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): assert segments_in_repository() == 6 +class RepositoryFreeSpaceTestCase(RepositoryTestCaseBase): + def test_additional_free_space(self): + self.add_keys() + self.repository.config.set('repository', 'additional_free_space', '1000T') + self.repository.save_key(b'shortcut to save_config') + self.reopen() + + with self.repository: + self.repository.put(b'00000000000000000000000000000000', b'foobar') + with pytest.raises(Repository.InsufficientFreeSpaceError): + self.repository.commit() + + class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): def setUp(self): super().setUp() @@ -449,7 +464,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): # Simulate a crash before compact with patch.object(Repository, 'compact_segments') as compact: self.repository.commit() - compact.assert_called_once_with(save_space=False) + compact.assert_called_once_with() self.reopen() with self.repository: self.check(repair=True)