Check for sufficient free space before committing

This commit is contained in:
Marian Beermann 2016-07-14 02:08:15 +02:00
parent 389503db60
commit e9a73b808f
8 changed files with 165 additions and 22 deletions

View File

@ -17,13 +17,20 @@ a good amount of free space on the filesystem that has your backup repository
(and also on ~/.cache). A few GB should suffice for most hard-drive sized
repositories. See also :ref:`cache-memory-usage`.
Borg doesn't use space reserved for root on repository disks (even when run as root),
on file systems which do not support this mechanism (e.g. XFS) we recommend to
reserve some space in Borg itself just to be safe by adjusting the
``additional_free_space`` setting in the ``[repository]`` section of a repositories
``config`` file. A good starting point is ``2G``.
If |project_name| runs out of disk space, it tries to free as much space as it
can while aborting the current operation safely, which allows to free more space
by deleting/pruning archives. This mechanism is not bullet-proof though.
by deleting/pruning archives. This mechanism is not bullet-proof in some
circumstances [1]_.
If you *really* run out of disk space, it can be hard or impossible to free space,
because |project_name| needs free space to operate - even to delete backup
archives. There is a ``--save-space`` option for some commands, but even with
that |project_name| will need free space to operate.
archives.
You can use some monitoring process or just include the free space information
in your backup log files (you check them regularly anyway, right?).
@ -36,6 +43,13 @@ Also helpful:
- consider using quotas
- use `prune` regularly
.. [1] This failsafe can fail in these circumstances:
- The underlying file system doesn't support statvfs(2), or returns incorrect
data, or the repository doesn't reside on a single file system
- Other tasks fill the disk simultaneously
- Hard quotas (which may not be reflected in statvfs(2))
A step by step example
----------------------

View File

@ -441,7 +441,13 @@ hashindex_next_key(HashIndex *index, const void *key)
}
static int
hashindex_get_size(HashIndex *index)
hashindex_len(HashIndex *index)
{
return index->num_entries;
}
static int
hashindex_size(HashIndex *index)
{
return sizeof(HashHeader) + index->num_buckets * index->bucket_size;
}

View File

@ -8,7 +8,7 @@ from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
from libc.errno cimport errno
from cpython.exc cimport PyErr_SetFromErrnoWithFilename
API_VERSION = 2
API_VERSION = 3
cdef extern from "_hashindex.c":
@ -18,7 +18,8 @@ cdef extern from "_hashindex.c":
HashIndex *hashindex_read(char *path)
HashIndex *hashindex_init(int capacity, int key_size, int value_size)
void hashindex_free(HashIndex *index)
int hashindex_get_size(HashIndex *index)
int hashindex_len(HashIndex *index)
int hashindex_size(HashIndex *index)
int hashindex_write(HashIndex *index, char *path)
void *hashindex_get(HashIndex *index, void *key)
void *hashindex_next_key(HashIndex *index, void *key)
@ -119,7 +120,11 @@ cdef class IndexBase:
raise
def __len__(self):
return hashindex_get_size(self.index)
return hashindex_len(self.index)
def size(self):
"""Return size (bytes) of hash table."""
return hashindex_size(self.index)
cdef class NSIndex(IndexBase):

View File

@ -85,7 +85,7 @@ class PlaceholderError(Error):
def check_extension_modules():
from . import platform
if hashindex.API_VERSION != 2:
if hashindex.API_VERSION != 3:
raise ExtensionModuleError
if chunker.API_VERSION != 2:
raise ExtensionModuleError
@ -618,6 +618,26 @@ def format_file_size(v, precision=2, sign=False):
return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign)
def parse_file_size(s):
"""Return int from file size (1234, 55G, 1.7T)."""
if not s:
return int(s) # will raise
suffix = s[-1]
power = 1000
try:
factor = {
'K': power,
'M': power**2,
'G': power**3,
'T': power**4,
'P': power**5,
}[suffix]
s = s[:-1]
except KeyError:
factor = 1
return int(float(s) * factor)
def sizeof_fmt(num, suffix='B', units=None, power=None, sep='', precision=2, sign=False):
prefix = '+' if sign and num > 0 else ''

View File

@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
from .constants import * # NOQA
from .hashindex import NSIndex
from .helpers import Error, ErrorWithTraceback, IntegrityError
from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size, parse_file_size
from .helpers import Location
from .helpers import ProgressIndicatorPercent
from .helpers import bin_to_hex
@ -101,6 +101,9 @@ class Repository:
id = bin_to_hex(id)
super().__init__(id, repo)
class InsufficientFreeSpaceError(Error):
"""Insufficient free space to complete transaction (required: {}, available: {})."""
def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False):
self.path = os.path.abspath(path)
self._location = Location('file://%s' % self.path)
@ -136,8 +139,10 @@ class Repository:
# EIO or FS corruption ensues, which is why we specifically check for ENOSPC.
if self._active_txn and no_space_left_on_device:
logger.warning('No space left on device, cleaning up partial transaction to free space.')
self.io.cleanup(self.io.get_segments_transaction_id())
self.rollback()
cleanup = True
else:
cleanup = False
self.rollback(cleanup)
self.close()
@property
@ -160,6 +165,7 @@ class Repository:
config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR))
config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE))
config.set('repository', 'append_only', str(int(self.append_only)))
config.set('repository', 'additional_free_space', '0')
config.set('repository', 'id', bin_to_hex(os.urandom(32)))
self.save_config(path, config)
@ -231,6 +237,7 @@ class Repository:
raise self.InvalidRepository(path)
self.max_segment_size = self.config.getint('repository', 'max_segment_size')
self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
self.additional_free_space = parse_file_size(self.config.get('repository', 'additional_free_space', fallback=0))
# append_only can be set in the constructor
# it shouldn't be overridden (True -> False) here
self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False)
@ -249,6 +256,7 @@ class Repository:
"""Commit transaction
"""
# save_space is not used anymore, but stays for RPC/API compatibility.
self.check_free_space()
self.io.write_commit()
if not self.append_only:
self.compact_segments()
@ -349,6 +357,44 @@ class Repository:
os.unlink(os.path.join(self.path, name))
self.index = None
def check_free_space(self):
"""Pre-commit check for sufficient free space to actually perform the commit."""
# As a baseline we take four times the current (on-disk) index size.
# At this point the index may only be updated by compaction, which won't resize it.
# We still apply a factor of four so that a later, separate invocation can free space
# (journaling all deletes for all chunks is one index size) or still make minor additions
# (which may grow the index up to twice it's current size).
# Note that in a subsequent operation the committed index is still on-disk, therefore we
# arrive at index_size * (1 + 2 + 1).
# In that order: journaled deletes (1), hashtable growth (2), persisted index (1).
required_free_space = self.index.size() * 4
# Conservatively estimate hints file size:
# 10 bytes for each segment-refcount pair, 10 bytes for each segment-space pair
# Assume maximum of 5 bytes per integer. Segment numbers will usually be packed more densely (1-3 bytes),
# as will refcounts and free space integers. For 5 MiB segments this estimate is good to ~20 PB repo size.
# Add 4K to generously account for constant format overhead.
hints_size = len(self.segments) * 10 + len(self.compact) * 10 + 4096
required_free_space += hints_size
required_free_space += self.additional_free_space
if not self.append_only:
# Keep one full worst-case segment free in non-append-only mode
required_free_space += self.max_segment_size + MAX_OBJECT_SIZE
try:
st_vfs = os.statvfs(self.path)
except OSError as os_error:
logger.warning('Failed to check free space before committing: ' + str(os_error))
return
# f_bavail: even as root - don't touch the Federal Block Reserve!
free_space = st_vfs.f_bavail * st_vfs.f_bsize
logger.debug('check_free_space: required bytes {}, free bytes {}'.format(required_free_space, free_space))
if free_space < required_free_space:
self.rollback(cleanup=True)
formatted_required = format_file_size(required_free_space)
formatted_free = format_file_size(free_space)
raise self.InsufficientFreeSpaceError(formatted_required, formatted_free)
def compact_segments(self):
"""Compact sparse segments by copying data into new segments
"""
@ -548,9 +594,11 @@ class Repository:
logger.info('Completed repository check, no problems found.')
return not error_found or repair
def rollback(self):
def rollback(self, cleanup=False):
"""
"""
if cleanup:
self.io.cleanup(self.io.get_segments_transaction_id())
self.index = None
self._active_txn = False

View File

@ -122,6 +122,21 @@ class HashIndexTestCase(BaseTestCase):
assert unique_chunks == 3
class HashIndexSizeTestCase(BaseTestCase):
def test_size_on_disk(self):
idx = ChunkIndex()
assert idx.size() == 18 + 1031 * (32 + 3 * 4)
def test_size_on_disk_accurate(self):
idx = ChunkIndex()
for i in range(1234):
idx[H(i)] = i, i**2, i**3
with tempfile.NamedTemporaryFile() as file:
idx.write(file.name)
size = os.path.getsize(file.name)
assert idx.size() == size
class HashIndexRefcountingTestCase(BaseTestCase):
def test_chunkindex_limit(self):
idx = ChunkIndex()

View File

@ -10,7 +10,7 @@ import msgpack
import msgpack.fallback
from ..helpers import Location
from ..helpers import partial_format, format_file_size, format_timedelta, format_line, PlaceholderError
from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError
from ..helpers import make_path_safe, clean_lines
from ..helpers import prune_within, prune_split
from ..helpers import get_cache_dir, get_keys_dir
@ -682,6 +682,26 @@ def test_file_size_sign():
assert format_file_size(size, sign=True) == fmt
@pytest.mark.parametrize('string,value', (
('1', 1),
('20', 20),
('5K', 5000),
('1.75M', 1750000),
('1e+9', 1e9),
('-1T', -1e12),
))
def test_parse_file_size(string, value):
assert parse_file_size(string) == int(value)
@pytest.mark.parametrize('string', (
'', '5 Äpfel', '4E', '2229 bit', '1B',
))
def test_parse_file_size_invalid(string):
with pytest.raises(ValueError):
parse_file_size(string)
def test_is_slow_msgpack():
saved_packer = msgpack.Packer
try:

View File

@ -6,6 +6,8 @@ import sys
import tempfile
from unittest.mock import patch
import pytest
from ..hashindex import NSIndex
from ..helpers import Location
from ..helpers import IntegrityError
@ -35,6 +37,15 @@ class RepositoryTestCaseBase(BaseTestCase):
self.repository.close()
self.repository = self.open()
def add_keys(self):
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.put(b'00000000000000000000000000000001', b'bar')
self.repository.put(b'00000000000000000000000000000003', b'bar')
self.repository.commit()
self.repository.put(b'00000000000000000000000000000001', b'bar2')
self.repository.put(b'00000000000000000000000000000002', b'boo')
self.repository.delete(b'00000000000000000000000000000003')
class RepositoryTestCase(RepositoryTestCaseBase):
@ -168,15 +179,6 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase):
class RepositoryCommitTestCase(RepositoryTestCaseBase):
def add_keys(self):
self.repository.put(b'00000000000000000000000000000000', b'foo')
self.repository.put(b'00000000000000000000000000000001', b'bar')
self.repository.put(b'00000000000000000000000000000003', b'bar')
self.repository.commit()
self.repository.put(b'00000000000000000000000000000001', b'bar2')
self.repository.put(b'00000000000000000000000000000002', b'boo')
self.repository.delete(b'00000000000000000000000000000003')
def test_replay_of_missing_index(self):
self.add_keys()
for name in os.listdir(self.repository.path):
@ -274,6 +276,19 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
assert segments_in_repository() == 6
class RepositoryFreeSpaceTestCase(RepositoryTestCaseBase):
def test_additional_free_space(self):
self.add_keys()
self.repository.config.set('repository', 'additional_free_space', '1000T')
self.repository.save_key(b'shortcut to save_config')
self.reopen()
with self.repository:
self.repository.put(b'00000000000000000000000000000000', b'foobar')
with pytest.raises(Repository.InsufficientFreeSpaceError):
self.repository.commit()
class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
def setUp(self):
super().setUp()