diff --git a/docs/faq.rst b/docs/faq.rst index 23dab538..bae9a2e4 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -186,6 +186,24 @@ stops after a while (some minutes, hours, ... - not immediately) with That's a good question and we are trying to find a good answer in :issue:`636`. +Why am I seeing idle borg serve processes on the repo server? +------------------------------------------------------------- + +Maybe the ssh connection between client and server broke down and that was not +yet noticed on the server. Try these settings: + +:: + + # /etc/ssh/sshd_config on borg repo server - kill connection to client + # after ClientAliveCountMax * ClientAliveInterval seconds with no response + ClientAliveInterval 20 + ClientAliveCountMax 3 + +If you have multiple borg create ... ; borg create ... commands in a already +serialized way in a single script, you need to give them --lock-wait N (with N +being a bit more than the time the server needs to terminate broken down +connections and release the lock). + The borg cache eats way too much disk space, what can I do? ----------------------------------------------------------- @@ -223,17 +241,23 @@ Yes, |project_name| supports resuming backups. During a backup a special checkpoint archive named ``.checkpoint`` is saved every checkpoint interval (the default value for this is 5 -minutes) containing all the data backed-up until that point. This checkpoint -archive is a valid archive, but it is only a partial backup. Having it -in the repo until a successful, full backup is completed is useful because it -references all the transmitted chunks up to the checkpoint time. This means -that at most worth of data needs to be retransmitted -if you restart the backup. +minutes) containing all the data backed-up until that point. + +Checkpoints only happen between files (so they don't help for interruptions +happening while a very large file is being processed). + +This checkpoint archive is a valid archive (all files in it are valid and complete), +but it is only a partial backup (not all files that you wanted to backup are +contained in it). Having it in the repo until a successful, full backup is +completed is useful because it references all the transmitted chunks up +to the checkpoint. This means that in case of an interruption, you only need to +retransfer the data since the last checkpoint. If a backup was interrupted, you do not need to do any special considerations, just invoke ``borg create`` as you always do. You may use the same archive name as in previous attempt or a different one (e.g. if you always include the current datetime), it does not matter. + |project_name| always does full single-pass backups, so it will start again from the beginning - but it will be much faster, because some of the data was already stored into the repo (and is still referenced by the checkpoint @@ -243,6 +267,28 @@ Once your backup has finished successfully, you can delete all ``.checkpoint`` archives. If you run ``borg prune``, it will also care for deleting unneeded checkpoints. +How can I backup huge file(s) over a instable connection? +--------------------------------------------------------- + +You can use this "split trick" as a workaround for the in-between-files-only +checkpoints (see above), huge files and a instable connection to the repository: + +Split the huge file(s) into parts of manageable size (e.g. 100MB) and create +a temporary archive of them. Borg will create checkpoints now more frequently +than if you try to backup the files in their original form (e.g. 100GB). + +After that, you can remove the parts again and backup the huge file(s) in +their original form. This will now work a lot faster as a lot of content chunks +are already in the repository. + +After you have successfully backed up the huge original file(s), you can remove +the temporary archive you made from the parts. + +We realize that this is just a better-than-nothing workaround, see :issue:`1198` +for a potential solution. + +Please note that this workaround only helps you for backup, not for restore. + If it crashes with a UnicodeError, what can I do? ------------------------------------------------- diff --git a/docs/quickstart.rst b/docs/quickstart.rst index f61656a3..5b50398a 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -103,10 +103,11 @@ Automating backups The following example script backs up ``/home`` and ``/var/www`` to a remote server. The script also uses the :ref:`borg_prune` subcommand to maintain a -certain number of old archives:: +certain number of old archives: + +:: #!/bin/sh - # setting this, so the repo does not need to be given on the commandline: export BORG_REPO=username@remoteserver.com:backup @@ -115,18 +116,18 @@ certain number of old archives:: export BORG_PASSPHRASE=mysecret # Backup most important stuff: - borg create --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \ - /etc \ - /home \ - /var \ - --exclude '/home/*/.cache' \ + borg create --stats -C lz4 ::'{hostname}-{now:%Y-%m-%d}' \ + /etc \ + /home \ + /var \ + --exclude '/home/*/.cache' \ --exclude '*.pyc' # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly - # archives of THIS machine. Using --prefix is very important to + # archives of THIS machine. The '{hostname}-' prefix is very important to # limit prune's operation to this machine's archives and not apply to # other machine's archives also. - borg prune -v --prefix `hostname`- \ + borg prune -v --prefix '{hostname}-' \ --keep-daily=7 --keep-weekly=4 --keep-monthly=6 .. backup_compression: diff --git a/docs/support.rst b/docs/support.rst index 1547c666..7cd1890d 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -23,6 +23,12 @@ Join us on channel #borgbackup on chat.freenode.net. As usual on IRC, just ask or tell directly and then patiently wait for replies. Stay connected. +You could use the following link (after connecting, you can change the random +nickname you got by typing "/nick mydesirednickname"): + +http://webchat.freenode.net/?randomnick=1&channels=%23borgbackup&uio=MTY9dHJ1ZSY5PXRydWUa8 + + Mailing list ------------ diff --git a/docs/usage.rst b/docs/usage.rst index b19234c6..49e90c68 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -452,8 +452,9 @@ will see what it would do without it actually doing anything. # Do a dry-run without actually deleting anything. $ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo - # Same as above but only apply to archive names starting with "foo": - $ borg prune --keep-daily=7 --keep-weekly=4 --prefix=foo /path/to/repo + # Same as above but only apply to archive names starting with the hostname + # of the machine followed by a "-" character: + $ borg prune --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo # Keep 7 end of day, 4 additional end of week archives, # and an end of month archive for every month: diff --git a/requirements.d/attic.txt b/requirements.d/attic.txt new file mode 100644 index 00000000..b5068ffd --- /dev/null +++ b/requirements.d/attic.txt @@ -0,0 +1,5 @@ +# Please note: +# attic only builds using OpenSSL 1.0.x, it can not be installed using OpenSSL >= 1.1.0. +# If attic is not installed, our unit tests will just skip the tests that require attic. +attic + diff --git a/src/borg/archiver.py b/src/borg/archiver.py index ce6f7a6d..f44d2898 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -28,7 +28,7 @@ from .cache import Cache from .constants import * # NOQA from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR from .helpers import Error, NoManifestError -from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec +from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec, PrefixSpec from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive from .helpers import safe_encode, remove_surrogates, bin_to_hex from .helpers import prune_within, prune_split @@ -1079,35 +1079,68 @@ class Archiver: whitespace removal paths with whitespace at the beginning or end can only be excluded using regular expressions. - Examples: + Examples:: - # Exclude '/home/user/file.o' but not '/home/user/file.odt': - $ borg create -e '*.o' backup / + # Exclude '/home/user/file.o' but not '/home/user/file.odt': + $ borg create -e '*.o' backup / - # Exclude '/home/user/junk' and '/home/user/subdir/junk' but - # not '/home/user/importantjunk' or '/etc/junk': - $ borg create -e '/home/*/junk' backup / + # Exclude '/home/user/junk' and '/home/user/subdir/junk' but + # not '/home/user/importantjunk' or '/etc/junk': + $ borg create -e '/home/*/junk' backup / - # Exclude the contents of '/home/user/cache' but not the directory itself: - $ borg create -e /home/user/cache/ backup / + # Exclude the contents of '/home/user/cache' but not the directory itself: + $ borg create -e /home/user/cache/ backup / - # The file '/home/user/cache/important' is *not* backed up: - $ borg create -e /home/user/cache/ backup / /home/user/cache/important + # The file '/home/user/cache/important' is *not* backed up: + $ borg create -e /home/user/cache/ backup / /home/user/cache/important - # The contents of directories in '/home' are not backed up when their name - # ends in '.tmp' - $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / + # The contents of directories in '/home' are not backed up when their name + # ends in '.tmp' + $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / - # Load exclusions from file - $ cat >exclude.txt <exclude.txt <I') _long = struct.Struct('>Q') +_2long = struct.Struct('>QQ') bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0] bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] long_to_bytes = lambda x: _long.pack(x) -cdef Py_buffer ro_buffer(object data) except *: - cdef Py_buffer view - PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) - return view +def bytes16_to_int(b, offset=0): + h, l = _2long.unpack_from(b, offset) + return (h << 64) + l + + +def int_to_bytes16(i): + max_uint64 = 0xffffffffffffffff + l = i & max_uint64 + h = (i >> 64) & max_uint64 + return _2long.pack(h, l) + + +def increment_iv(iv, amount=1): + """ + Increment the IV by the given amount (default 1). + + :param iv: input IV, 16 bytes (128 bit) + :param amount: increment value + :return: input_IV + amount, 16 bytes (128 bit) + """ + assert len(iv) == 16 + iv = bytes16_to_int(iv) + iv += amount + iv = int_to_bytes16(iv) + return iv def num_aes_blocks(int length): @@ -63,27 +84,35 @@ def num_aes_blocks(int length): return (length + 15) // 16 +cdef Py_buffer ro_buffer(object data) except *: + cdef Py_buffer view + PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) + return view + + cdef class AES: """A thin wrapper around the OpenSSL EVP cipher API """ - cdef EVP_CIPHER_CTX ctx + cdef EVP_CIPHER_CTX *ctx cdef int is_encrypt + cdef unsigned char iv_orig[16] + cdef int blocks def __cinit__(self, is_encrypt, key, iv=None): - EVP_CIPHER_CTX_init(&self.ctx) + self.ctx = EVP_CIPHER_CTX_new() self.is_encrypt = is_encrypt # Set cipher type and mode cipher_mode = EVP_aes_256_ctr() if self.is_encrypt: - if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL): + if not EVP_EncryptInit_ex(self.ctx, cipher_mode, NULL, NULL, NULL): raise Exception('EVP_EncryptInit_ex failed') else: # decrypt - if not EVP_DecryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL): + if not EVP_DecryptInit_ex(self.ctx, cipher_mode, NULL, NULL, NULL): raise Exception('EVP_DecryptInit_ex failed') self.reset(key, iv) def __dealloc__(self): - EVP_CIPHER_CTX_cleanup(&self.ctx) + EVP_CIPHER_CTX_free(self.ctx) def reset(self, key=None, iv=None): cdef const unsigned char *key2 = NULL @@ -92,17 +121,21 @@ cdef class AES: key2 = key if iv: iv2 = iv + assert isinstance(iv, bytes) and len(iv) == 16 + for i in range(16): + self.iv_orig[i] = iv[i] + self.blocks = 0 # number of AES blocks encrypted starting with iv_orig # Initialise key and IV if self.is_encrypt: - if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): + if not EVP_EncryptInit_ex(self.ctx, NULL, NULL, key2, iv2): raise Exception('EVP_EncryptInit_ex failed') else: # decrypt - if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): + if not EVP_DecryptInit_ex(self.ctx, NULL, NULL, key2, iv2): raise Exception('EVP_DecryptInit_ex failed') @property def iv(self): - return self.ctx.iv[:16] + return increment_iv(self.iv_orig[:16], self.blocks) def encrypt(self, data): cdef Py_buffer data_buf = ro_buffer(data) @@ -114,12 +147,13 @@ cdef class AES: if not out: raise MemoryError try: - if not EVP_EncryptUpdate(&self.ctx, out, &outl, data_buf.buf, inl): + if not EVP_EncryptUpdate(self.ctx, out, &outl, data_buf.buf, inl): raise Exception('EVP_EncryptUpdate failed') ctl = outl - if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl): + if not EVP_EncryptFinal_ex(self.ctx, out+ctl, &outl): raise Exception('EVP_EncryptFinal failed') ctl += outl + self.blocks += num_aes_blocks(ctl) return out[:ctl] finally: free(out) @@ -137,15 +171,16 @@ cdef class AES: if not out: raise MemoryError try: - if not EVP_DecryptUpdate(&self.ctx, out, &outl, data_buf.buf, inl): + if not EVP_DecryptUpdate(self.ctx, out, &outl, data_buf.buf, inl): raise Exception('EVP_DecryptUpdate failed') ptl = outl - if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0: + if EVP_DecryptFinal_ex(self.ctx, out+ptl, &outl) <= 0: # this error check is very important for modes with padding or # authentication. for them, a failure here means corrupted data. # CTR mode does not use padding nor authentication. raise Exception('EVP_DecryptFinal failed') ptl += outl + self.blocks += num_aes_blocks(inl) return out[:ptl] finally: free(out) diff --git a/src/borg/helpers.py b/src/borg/helpers.py index a62ca54f..78c6e544 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -88,6 +88,10 @@ class NoManifestError(Error): """Repository has no manifest.""" +class PlaceholderError(Error): + """Formatting Error: "{}".format({}): {}({})""" + + def check_extension_modules(): from . import platform if hashindex.API_VERSION != 2: @@ -509,6 +513,10 @@ def CompressionSpec(s): raise ValueError +def PrefixSpec(s): + return replace_placeholders(s) + + def dir_is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to @@ -560,18 +568,25 @@ def partial_format(format, mapping): def format_line(format, data): - # TODO: Filter out unwanted properties of str.format(), because "format" is user provided. - try: return format.format(**data) - except (KeyError, ValueError) as e: - # this should catch format errors - print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e))) except Exception as e: - # something unexpected, print error and raise exception - print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e))) - raise - return '' + raise PlaceholderError(format, data, e.__class__.__name__, str(e)) + + +def replace_placeholders(text): + """Replace placeholders in text with their values.""" + current_time = datetime.now() + data = { + 'pid': os.getpid(), + 'fqdn': socket.getfqdn(), + 'hostname': socket.gethostname(), + 'now': current_time.now(), + 'utcnow': current_time.utcnow(), + 'user': uid2user(os.getuid(), os.getuid()), + 'uuid4': str(uuid.uuid4()), + } + return format_line(text, data) def safe_timestamp(item_timestamp_ns): @@ -744,22 +759,8 @@ class Location: if not self.parse(self.orig): raise ValueError - def preformat_text(self, text): - """Format repository and archive path with common tags""" - current_time = datetime.now() - data = { - 'pid': os.getpid(), - 'fqdn': socket.getfqdn(), - 'hostname': socket.gethostname(), - 'now': current_time.now(), - 'utcnow': current_time.utcnow(), - 'user': uid2user(os.getuid(), os.getuid()), - 'uuid4': str(uuid.uuid4()) - } - return format_line(text, data) - def parse(self, text): - text = self.preformat_text(text) + text = replace_placeholders(text) valid = self._parse(text) if valid: return True diff --git a/src/borg/repository.py b/src/borg/repository.py index 70413183..aac8af06 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -176,7 +176,9 @@ class Repository: shutil.rmtree(self.path) def get_index_transaction_id(self): - indices = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit())) + indices = sorted(int(fn[6:]) + for fn in os.listdir(self.path) + if fn.startswith('index.') and fn[6:].isdigit() and os.stat(os.path.join(self.path, fn)).st_size != 0) if indices: return indices[-1] else: diff --git a/src/borg/selftest.py b/src/borg/selftest.py index 1426f3cb..139ed7e8 100644 --- a/src/borg/selftest.py +++ b/src/borg/selftest.py @@ -30,7 +30,7 @@ SELFTEST_CASES = [ ChunkerTestCase, ] -SELFTEST_COUNT = 27 +SELFTEST_COUNT = 29 class SelfTestResult(TestResult): diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index dfa2333e..5ec7eab5 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -440,8 +440,21 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(fd.read(hole_size), b'\0' * hole_size) st = os.stat(filename) self.assert_equal(st.st_size, total_len) - if sparse_support and hasattr(st, 'st_blocks'): - self.assert_true(st.st_blocks * 512 < total_len / 9) # is output sparse? + if sparse_support: + if hasattr(st, 'st_blocks'): + # do only check if it is less, do NOT check if it is much less + # as that causes troubles on xfs and zfs: + self.assert_true(st.st_blocks * 512 < total_len) + if hasattr(os, 'SEEK_HOLE') and hasattr(os, 'SEEK_DATA'): + with open(filename, 'rb') as fd: + # only check if the first hole is as expected, because the 2nd hole check + # is problematic on xfs due to its "dynamic speculative EOF preallocation + try: + self.assert_equal(fd.seek(0, os.SEEK_HOLE), 0) + self.assert_equal(fd.seek(0, os.SEEK_DATA), hole_size) + except OSError: + # does not really support SEEK_HOLE/SEEK_DATA + pass def test_unusual_filenames(self): filenames = ['normal', 'with some blanks', '(with_parens)', ] diff --git a/src/borg/testsuite/crypto.py b/src/borg/testsuite/crypto.py index e3eff8be..b79a5d83 100644 --- a/src/borg/testsuite/crypto.py +++ b/src/borg/testsuite/crypto.py @@ -1,6 +1,8 @@ from binascii import hexlify, unhexlify from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256 +from ..crypto import increment_iv, bytes16_to_int, int_to_bytes16 + from . import BaseTestCase # Note: these tests are part of the self test, do not use or import py.test functionality here. @@ -16,6 +18,27 @@ class CryptoTestCase(BaseTestCase): self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1) self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1') + def test_bytes16_to_int(self): + self.assert_equal(bytes16_to_int(b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'), 1) + self.assert_equal(int_to_bytes16(1), b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1') + self.assert_equal(bytes16_to_int(b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0'), 2 ** 64) + self.assert_equal(int_to_bytes16(2 ** 64), b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0') + + def test_increment_iv(self): + iv0 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0' + iv1 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1' + iv2 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2' + self.assert_equal(increment_iv(iv0, 0), iv0) + self.assert_equal(increment_iv(iv0, 1), iv1) + self.assert_equal(increment_iv(iv0, 2), iv2) + iva = b'\0\0\0\0\0\0\0\0\xff\xff\xff\xff\xff\xff\xff\xff' + ivb = b'\0\0\0\0\0\0\0\1\x00\x00\x00\x00\x00\x00\x00\x00' + ivc = b'\0\0\0\0\0\0\0\1\x00\x00\x00\x00\x00\x00\x00\x01' + self.assert_equal(increment_iv(iva, 0), iva) + self.assert_equal(increment_iv(iva, 1), ivb) + self.assert_equal(increment_iv(iva, 2), ivc) + self.assert_equal(increment_iv(iv0, 2**64), ivb) + def test_aes(self): key = b'X' * 32 data = b'foo' * 10 diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index ede20e16..9cecca55 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -10,7 +10,7 @@ import msgpack import msgpack.fallback from ..helpers import Location -from ..helpers import partial_format, format_file_size, format_timedelta +from ..helpers import partial_format, format_file_size, format_timedelta, format_line, PlaceholderError from ..helpers import make_path_safe, clean_lines from ..helpers import prune_within, prune_split from ..helpers import get_cache_dir, get_keys_dir @@ -22,6 +22,7 @@ from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless from ..helpers import load_excludes from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2 from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern + from . import BaseTestCase, environment_variable, FakeInputs @@ -937,3 +938,18 @@ def test_compression_decider2(): assert compr_spec['name'] == 'zlib' compr_spec, chunk = cd.decide(Chunk(None, compress=CompressionSpec('lzma'))) assert compr_spec['name'] == 'lzma' + + +def test_format_line(): + data = dict(foo='bar baz') + assert format_line('', data) == '' + assert format_line('{foo}', data) == 'bar baz' + assert format_line('foo{foo}foo', data) == 'foobar bazfoo' + + +def test_format_line_erroneous(): + data = dict() + with pytest.raises(PlaceholderError): + assert format_line('{invalid}', data) + with pytest.raises(PlaceholderError): + assert format_line('{}', data) diff --git a/tox.ini b/tox.ini index 9adf926d..b5dbed60 100644 --- a/tox.ini +++ b/tox.ini @@ -7,7 +7,7 @@ envlist = py{34,35,36},flake8 [testenv] deps = -rrequirements.d/development.txt - attic + -rrequirements.d/attic.txt commands = py.test --cov=borg --cov-config=.coveragerc --benchmark-skip --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = *