mirror of https://github.com/borgbackup/borg.git
Merge pull request #1336 from enkore/issue/1272
Check free space before committing
This commit is contained in:
commit
e09a059ce8
|
@ -17,13 +17,20 @@ a good amount of free space on the filesystem that has your backup repository
|
|||
(and also on ~/.cache). A few GB should suffice for most hard-drive sized
|
||||
repositories. See also :ref:`cache-memory-usage`.
|
||||
|
||||
Borg doesn't use space reserved for root on repository disks (even when run as root),
|
||||
on file systems which do not support this mechanism (e.g. XFS) we recommend to
|
||||
reserve some space in Borg itself just to be safe by adjusting the
|
||||
``additional_free_space`` setting in the ``[repository]`` section of a repositories
|
||||
``config`` file. A good starting point is ``2G``.
|
||||
|
||||
If |project_name| runs out of disk space, it tries to free as much space as it
|
||||
can while aborting the current operation safely, which allows to free more space
|
||||
by deleting/pruning archives. This mechanism is not bullet-proof though.
|
||||
by deleting/pruning archives. This mechanism is not bullet-proof in some
|
||||
circumstances [1]_.
|
||||
|
||||
If you *really* run out of disk space, it can be hard or impossible to free space,
|
||||
because |project_name| needs free space to operate - even to delete backup
|
||||
archives. There is a ``--save-space`` option for some commands, but even with
|
||||
that |project_name| will need free space to operate.
|
||||
archives.
|
||||
|
||||
You can use some monitoring process or just include the free space information
|
||||
in your backup log files (you check them regularly anyway, right?).
|
||||
|
@ -36,6 +43,13 @@ Also helpful:
|
|||
- consider using quotas
|
||||
- use `prune` regularly
|
||||
|
||||
.. [1] This failsafe can fail in these circumstances:
|
||||
|
||||
- The underlying file system doesn't support statvfs(2), or returns incorrect
|
||||
data, or the repository doesn't reside on a single file system
|
||||
- Other tasks fill the disk simultaneously
|
||||
- Hard quotas (which may not be reflected in statvfs(2))
|
||||
|
||||
|
||||
A step by step example
|
||||
----------------------
|
||||
|
|
|
@ -441,7 +441,13 @@ hashindex_next_key(HashIndex *index, const void *key)
|
|||
}
|
||||
|
||||
static int
|
||||
hashindex_get_size(HashIndex *index)
|
||||
hashindex_len(HashIndex *index)
|
||||
{
|
||||
return index->num_entries;
|
||||
}
|
||||
|
||||
static int
|
||||
hashindex_size(HashIndex *index)
|
||||
{
|
||||
return sizeof(HashHeader) + index->num_buckets * index->bucket_size;
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
|
|||
from libc.errno cimport errno
|
||||
from cpython.exc cimport PyErr_SetFromErrnoWithFilename
|
||||
|
||||
API_VERSION = 2
|
||||
API_VERSION = 3
|
||||
|
||||
|
||||
cdef extern from "_hashindex.c":
|
||||
|
@ -18,7 +18,8 @@ cdef extern from "_hashindex.c":
|
|||
HashIndex *hashindex_read(char *path)
|
||||
HashIndex *hashindex_init(int capacity, int key_size, int value_size)
|
||||
void hashindex_free(HashIndex *index)
|
||||
int hashindex_get_size(HashIndex *index)
|
||||
int hashindex_len(HashIndex *index)
|
||||
int hashindex_size(HashIndex *index)
|
||||
int hashindex_write(HashIndex *index, char *path)
|
||||
void *hashindex_get(HashIndex *index, void *key)
|
||||
void *hashindex_next_key(HashIndex *index, void *key)
|
||||
|
@ -119,7 +120,11 @@ cdef class IndexBase:
|
|||
raise
|
||||
|
||||
def __len__(self):
|
||||
return hashindex_get_size(self.index)
|
||||
return hashindex_len(self.index)
|
||||
|
||||
def size(self):
|
||||
"""Return size (bytes) of hash table."""
|
||||
return hashindex_size(self.index)
|
||||
|
||||
|
||||
cdef class NSIndex(IndexBase):
|
||||
|
|
|
@ -85,7 +85,7 @@ class PlaceholderError(Error):
|
|||
|
||||
def check_extension_modules():
|
||||
from . import platform
|
||||
if hashindex.API_VERSION != 2:
|
||||
if hashindex.API_VERSION != 3:
|
||||
raise ExtensionModuleError
|
||||
if chunker.API_VERSION != 2:
|
||||
raise ExtensionModuleError
|
||||
|
@ -618,6 +618,26 @@ def format_file_size(v, precision=2, sign=False):
|
|||
return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign)
|
||||
|
||||
|
||||
def parse_file_size(s):
|
||||
"""Return int from file size (1234, 55G, 1.7T)."""
|
||||
if not s:
|
||||
return int(s) # will raise
|
||||
suffix = s[-1]
|
||||
power = 1000
|
||||
try:
|
||||
factor = {
|
||||
'K': power,
|
||||
'M': power**2,
|
||||
'G': power**3,
|
||||
'T': power**4,
|
||||
'P': power**5,
|
||||
}[suffix]
|
||||
s = s[:-1]
|
||||
except KeyError:
|
||||
factor = 1
|
||||
return int(float(s) * factor)
|
||||
|
||||
|
||||
def sizeof_fmt(num, suffix='B', units=None, power=None, sep='', precision=2, sign=False):
|
||||
prefix = '+' if sign and num > 0 else ''
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
from .constants import * # NOQA
|
||||
from .hashindex import NSIndex
|
||||
from .helpers import Error, ErrorWithTraceback, IntegrityError
|
||||
from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size, parse_file_size
|
||||
from .helpers import Location
|
||||
from .helpers import ProgressIndicatorPercent
|
||||
from .helpers import bin_to_hex
|
||||
|
@ -101,6 +101,9 @@ class Repository:
|
|||
id = bin_to_hex(id)
|
||||
super().__init__(id, repo)
|
||||
|
||||
class InsufficientFreeSpaceError(Error):
|
||||
"""Insufficient free space to complete transaction (required: {}, available: {})."""
|
||||
|
||||
def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False):
|
||||
self.path = os.path.abspath(path)
|
||||
self._location = Location('file://%s' % self.path)
|
||||
|
@ -136,8 +139,10 @@ class Repository:
|
|||
# EIO or FS corruption ensues, which is why we specifically check for ENOSPC.
|
||||
if self._active_txn and no_space_left_on_device:
|
||||
logger.warning('No space left on device, cleaning up partial transaction to free space.')
|
||||
self.io.cleanup(self.io.get_segments_transaction_id())
|
||||
self.rollback()
|
||||
cleanup = True
|
||||
else:
|
||||
cleanup = False
|
||||
self.rollback(cleanup)
|
||||
self.close()
|
||||
|
||||
@property
|
||||
|
@ -160,6 +165,7 @@ class Repository:
|
|||
config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR))
|
||||
config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE))
|
||||
config.set('repository', 'append_only', str(int(self.append_only)))
|
||||
config.set('repository', 'additional_free_space', '0')
|
||||
config.set('repository', 'id', bin_to_hex(os.urandom(32)))
|
||||
self.save_config(path, config)
|
||||
|
||||
|
@ -231,6 +237,7 @@ class Repository:
|
|||
raise self.InvalidRepository(path)
|
||||
self.max_segment_size = self.config.getint('repository', 'max_segment_size')
|
||||
self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
|
||||
self.additional_free_space = parse_file_size(self.config.get('repository', 'additional_free_space', fallback=0))
|
||||
# append_only can be set in the constructor
|
||||
# it shouldn't be overridden (True -> False) here
|
||||
self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False)
|
||||
|
@ -248,9 +255,11 @@ class Repository:
|
|||
def commit(self, save_space=False):
|
||||
"""Commit transaction
|
||||
"""
|
||||
# save_space is not used anymore, but stays for RPC/API compatibility.
|
||||
self.check_free_space()
|
||||
self.io.write_commit()
|
||||
if not self.append_only:
|
||||
self.compact_segments(save_space=save_space)
|
||||
self.compact_segments()
|
||||
self.write_index()
|
||||
self.rollback()
|
||||
|
||||
|
@ -348,7 +357,45 @@ class Repository:
|
|||
os.unlink(os.path.join(self.path, name))
|
||||
self.index = None
|
||||
|
||||
def compact_segments(self, save_space=False):
|
||||
def check_free_space(self):
|
||||
"""Pre-commit check for sufficient free space to actually perform the commit."""
|
||||
# As a baseline we take four times the current (on-disk) index size.
|
||||
# At this point the index may only be updated by compaction, which won't resize it.
|
||||
# We still apply a factor of four so that a later, separate invocation can free space
|
||||
# (journaling all deletes for all chunks is one index size) or still make minor additions
|
||||
# (which may grow the index up to twice it's current size).
|
||||
# Note that in a subsequent operation the committed index is still on-disk, therefore we
|
||||
# arrive at index_size * (1 + 2 + 1).
|
||||
# In that order: journaled deletes (1), hashtable growth (2), persisted index (1).
|
||||
required_free_space = self.index.size() * 4
|
||||
|
||||
# Conservatively estimate hints file size:
|
||||
# 10 bytes for each segment-refcount pair, 10 bytes for each segment-space pair
|
||||
# Assume maximum of 5 bytes per integer. Segment numbers will usually be packed more densely (1-3 bytes),
|
||||
# as will refcounts and free space integers. For 5 MiB segments this estimate is good to ~20 PB repo size.
|
||||
# Add 4K to generously account for constant format overhead.
|
||||
hints_size = len(self.segments) * 10 + len(self.compact) * 10 + 4096
|
||||
required_free_space += hints_size
|
||||
|
||||
required_free_space += self.additional_free_space
|
||||
if not self.append_only:
|
||||
# Keep one full worst-case segment free in non-append-only mode
|
||||
required_free_space += self.max_segment_size + MAX_OBJECT_SIZE
|
||||
try:
|
||||
st_vfs = os.statvfs(self.path)
|
||||
except OSError as os_error:
|
||||
logger.warning('Failed to check free space before committing: ' + str(os_error))
|
||||
return
|
||||
# f_bavail: even as root - don't touch the Federal Block Reserve!
|
||||
free_space = st_vfs.f_bavail * st_vfs.f_bsize
|
||||
logger.debug('check_free_space: required bytes {}, free bytes {}'.format(required_free_space, free_space))
|
||||
if free_space < required_free_space:
|
||||
self.rollback(cleanup=True)
|
||||
formatted_required = format_file_size(required_free_space)
|
||||
formatted_free = format_file_size(free_space)
|
||||
raise self.InsufficientFreeSpaceError(formatted_required, formatted_free)
|
||||
|
||||
def compact_segments(self):
|
||||
"""Compact sparse segments by copying data into new segments
|
||||
"""
|
||||
if not self.compact:
|
||||
|
@ -357,12 +404,11 @@ class Repository:
|
|||
segments = self.segments
|
||||
unused = [] # list of segments, that are not used anymore
|
||||
|
||||
def complete_xfer():
|
||||
# complete the transfer (usually exactly when some target segment
|
||||
# is full, or at the very end when everything is processed)
|
||||
def complete_xfer(intermediate=True):
|
||||
# complete the current transfer (when some target segment is full)
|
||||
nonlocal unused
|
||||
# commit the new, compact, used segments
|
||||
self.io.write_commit()
|
||||
self.io.write_commit(intermediate=intermediate)
|
||||
# get rid of the old, sparse, unused segments. free space.
|
||||
for segment in unused:
|
||||
assert self.segments.pop(segment) == 0
|
||||
|
@ -383,7 +429,7 @@ class Repository:
|
|||
for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
|
||||
if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
|
||||
try:
|
||||
new_segment, offset = self.io.write_put(key, data, raise_full=save_space)
|
||||
new_segment, offset = self.io.write_put(key, data, raise_full=True)
|
||||
except LoggedIO.SegmentFull:
|
||||
complete_xfer()
|
||||
new_segment, offset = self.io.write_put(key, data)
|
||||
|
@ -394,13 +440,13 @@ class Repository:
|
|||
elif tag == TAG_DELETE:
|
||||
if index_transaction_id is None or segment > index_transaction_id:
|
||||
try:
|
||||
self.io.write_delete(key, raise_full=save_space)
|
||||
self.io.write_delete(key, raise_full=True)
|
||||
except LoggedIO.SegmentFull:
|
||||
complete_xfer()
|
||||
self.io.write_delete(key)
|
||||
assert segments[segment] == 0
|
||||
unused.append(segment)
|
||||
complete_xfer()
|
||||
complete_xfer(intermediate=False)
|
||||
|
||||
def replay_segments(self, index_transaction_id, segments_transaction_id):
|
||||
self.prepare_txn(index_transaction_id, do_cleanup=False)
|
||||
|
@ -536,7 +582,7 @@ class Repository:
|
|||
if current_index.get(key, (-1, -1)) != value:
|
||||
report_error('Index mismatch for key {}. {} != {}'.format(key, value, current_index.get(key, (-1, -1))))
|
||||
if repair:
|
||||
self.compact_segments(save_space=save_space)
|
||||
self.compact_segments()
|
||||
self.write_index()
|
||||
self.rollback()
|
||||
if error_found:
|
||||
|
@ -548,9 +594,11 @@ class Repository:
|
|||
logger.info('Completed repository check, no problems found.')
|
||||
return not error_found or repair
|
||||
|
||||
def rollback(self):
|
||||
def rollback(self, cleanup=False):
|
||||
"""
|
||||
"""
|
||||
if cleanup:
|
||||
self.io.cleanup(self.io.get_segments_transaction_id())
|
||||
self.index = None
|
||||
self._active_txn = False
|
||||
|
||||
|
@ -898,9 +946,15 @@ class LoggedIO:
|
|||
self.offset += self.put_header_fmt.size
|
||||
return self.segment, self.put_header_fmt.size
|
||||
|
||||
def write_commit(self):
|
||||
self.close_segment()
|
||||
fd = self.get_write_fd()
|
||||
def write_commit(self, intermediate=False):
|
||||
if intermediate:
|
||||
# Intermediate commits go directly into the current segment - this makes checking their validity more
|
||||
# expensive, but is faster and reduces clobber.
|
||||
fd = self.get_write_fd()
|
||||
fd.sync()
|
||||
else:
|
||||
self.close_segment()
|
||||
fd = self.get_write_fd()
|
||||
header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT)
|
||||
crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
|
||||
fd.write(b''.join((crc, header)))
|
||||
|
|
|
@ -122,6 +122,21 @@ class HashIndexTestCase(BaseTestCase):
|
|||
assert unique_chunks == 3
|
||||
|
||||
|
||||
class HashIndexSizeTestCase(BaseTestCase):
|
||||
def test_size_on_disk(self):
|
||||
idx = ChunkIndex()
|
||||
assert idx.size() == 18 + 1031 * (32 + 3 * 4)
|
||||
|
||||
def test_size_on_disk_accurate(self):
|
||||
idx = ChunkIndex()
|
||||
for i in range(1234):
|
||||
idx[H(i)] = i, i**2, i**3
|
||||
with tempfile.NamedTemporaryFile() as file:
|
||||
idx.write(file.name)
|
||||
size = os.path.getsize(file.name)
|
||||
assert idx.size() == size
|
||||
|
||||
|
||||
class HashIndexRefcountingTestCase(BaseTestCase):
|
||||
def test_chunkindex_limit(self):
|
||||
idx = ChunkIndex()
|
||||
|
|
|
@ -10,7 +10,7 @@ import msgpack
|
|||
import msgpack.fallback
|
||||
|
||||
from ..helpers import Location
|
||||
from ..helpers import partial_format, format_file_size, format_timedelta, format_line, PlaceholderError
|
||||
from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError
|
||||
from ..helpers import make_path_safe, clean_lines
|
||||
from ..helpers import prune_within, prune_split
|
||||
from ..helpers import get_cache_dir, get_keys_dir
|
||||
|
@ -682,6 +682,26 @@ def test_file_size_sign():
|
|||
assert format_file_size(size, sign=True) == fmt
|
||||
|
||||
|
||||
@pytest.mark.parametrize('string,value', (
|
||||
('1', 1),
|
||||
('20', 20),
|
||||
('5K', 5000),
|
||||
('1.75M', 1750000),
|
||||
('1e+9', 1e9),
|
||||
('-1T', -1e12),
|
||||
))
|
||||
def test_parse_file_size(string, value):
|
||||
assert parse_file_size(string) == int(value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('string', (
|
||||
'', '5 Äpfel', '4E', '2229 bit', '1B',
|
||||
))
|
||||
def test_parse_file_size_invalid(string):
|
||||
with pytest.raises(ValueError):
|
||||
parse_file_size(string)
|
||||
|
||||
|
||||
def test_is_slow_msgpack():
|
||||
saved_packer = msgpack.Packer
|
||||
try:
|
||||
|
|
|
@ -6,6 +6,8 @@ import sys
|
|||
import tempfile
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from ..hashindex import NSIndex
|
||||
from ..helpers import Location
|
||||
from ..helpers import IntegrityError
|
||||
|
@ -35,6 +37,15 @@ class RepositoryTestCaseBase(BaseTestCase):
|
|||
self.repository.close()
|
||||
self.repository = self.open()
|
||||
|
||||
def add_keys(self):
|
||||
self.repository.put(b'00000000000000000000000000000000', b'foo')
|
||||
self.repository.put(b'00000000000000000000000000000001', b'bar')
|
||||
self.repository.put(b'00000000000000000000000000000003', b'bar')
|
||||
self.repository.commit()
|
||||
self.repository.put(b'00000000000000000000000000000001', b'bar2')
|
||||
self.repository.put(b'00000000000000000000000000000002', b'boo')
|
||||
self.repository.delete(b'00000000000000000000000000000003')
|
||||
|
||||
|
||||
class RepositoryTestCase(RepositoryTestCaseBase):
|
||||
|
||||
|
@ -168,15 +179,6 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase):
|
|||
|
||||
class RepositoryCommitTestCase(RepositoryTestCaseBase):
|
||||
|
||||
def add_keys(self):
|
||||
self.repository.put(b'00000000000000000000000000000000', b'foo')
|
||||
self.repository.put(b'00000000000000000000000000000001', b'bar')
|
||||
self.repository.put(b'00000000000000000000000000000003', b'bar')
|
||||
self.repository.commit()
|
||||
self.repository.put(b'00000000000000000000000000000001', b'bar2')
|
||||
self.repository.put(b'00000000000000000000000000000002', b'boo')
|
||||
self.repository.delete(b'00000000000000000000000000000003')
|
||||
|
||||
def test_replay_of_missing_index(self):
|
||||
self.add_keys()
|
||||
for name in os.listdir(self.repository.path):
|
||||
|
@ -274,6 +276,19 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
|
|||
assert segments_in_repository() == 6
|
||||
|
||||
|
||||
class RepositoryFreeSpaceTestCase(RepositoryTestCaseBase):
|
||||
def test_additional_free_space(self):
|
||||
self.add_keys()
|
||||
self.repository.config.set('repository', 'additional_free_space', '1000T')
|
||||
self.repository.save_key(b'shortcut to save_config')
|
||||
self.reopen()
|
||||
|
||||
with self.repository:
|
||||
self.repository.put(b'00000000000000000000000000000000', b'foobar')
|
||||
with pytest.raises(Repository.InsufficientFreeSpaceError):
|
||||
self.repository.commit()
|
||||
|
||||
|
||||
class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
@ -449,7 +464,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
|
|||
# Simulate a crash before compact
|
||||
with patch.object(Repository, 'compact_segments') as compact:
|
||||
self.repository.commit()
|
||||
compact.assert_called_once_with(save_space=False)
|
||||
compact.assert_called_once_with()
|
||||
self.reopen()
|
||||
with self.repository:
|
||||
self.check(repair=True)
|
||||
|
|
Loading…
Reference in New Issue