From 2e1cf17dd5ba5a99185a7a9285bb90a29bb3523f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Sep 2016 18:41:27 +0200 Subject: [PATCH 01/13] add release signing key / security contact to README, fixes #1560 --- README.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.rst b/README.rst index f6132773e..9b5451c61 100644 --- a/README.rst +++ b/README.rst @@ -114,6 +114,22 @@ Now doing another backup, just to show off the great deduplication: For a graphical frontend refer to our complementary project `BorgWeb `_. +Checking Release Authenticity and Security Contact +================================================== + +`Releases `_ are signed with this GPG key, +please use GPG to verify their authenticity. + +In case you discover a security issue, please use this contact for reporting it privately +and please, if possible, use encrypted E-Mail: + +Thomas Waldmann + +GPG Key Fingerprint: 6D5B EF9A DD20 7580 5747 B70F 9F88 FB52 FAF7 B393 + +The public key can be fetched from any GPG keyserver, but be careful: you must +use the **full fingerprint** to check that you got the correct key. + Links ===== From 2c5b8d690bced58f2a13d7c71e3e15711716f75d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Sep 2016 19:05:07 +0200 Subject: [PATCH 02/13] improve borg info --help, explain size infos, fixes #1532 --- borg/archiver.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index 43ec093ad..7605fcd92 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1344,6 +1344,15 @@ def build_parser(self, args=None, prog=None): info_epilog = textwrap.dedent(""" This command displays some detailed information about the specified archive. + + Please note that the deduplicated sizes of the individual archives do not add + up to the deduplicated size of the repository ("all archives"), because the two + are meaning different things: + + This archive / deduplicated size = amount of data stored ONLY for this archive + = unique chunks of this archive. + All archives / deduplicated size = amount of data stored in the repo + = all chunks in the repository. """) subparser = subparsers.add_parser('info', parents=[common_parser], description=self.do_info.__doc__, From f70008238a26905fd5cb72ba62c075414fb44fa7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Sep 2016 19:22:39 +0200 Subject: [PATCH 03/13] link reference docs and faq about BORG_FILES_CACHE_TTL, fixes #1561 --- docs/faq.rst | 2 ++ docs/usage.rst | 1 + 2 files changed, 3 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index c772f5fa7..0806c483c 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -356,6 +356,8 @@ those files are reported as being added when, really, chunks are already used. +.. _always_chunking: + It always chunks all my files, even unchanged ones! --------------------------------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index ab92c1cb1..89a9e3cc7 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -89,6 +89,7 @@ General: BORG_FILES_CACHE_TTL When set to a numeric value, this determines the maximum "time to live" for the files cache entries (default: 20). The files cache is used to quickly determine whether a file is unchanged. + The FAQ explains this more detailled in: :ref:`always_chunking` TMPDIR where temporary files are stored (might need a lot of temporary space for some operations) From 45d72722af253f1a9501895cbcde8c4862966324 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Sep 2016 21:11:47 +0200 Subject: [PATCH 04/13] add bestpractices badge --- README.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 9b5451c61..f9db78a9d 100644 --- a/README.rst +++ b/README.rst @@ -185,7 +185,7 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. Borg is distributed under a 3-clause BSD license, see `License`_ for the complete license. -|doc| |build| |coverage| +|doc| |build| |coverage| |bestpractices| .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable :alt: Documentation @@ -202,3 +202,7 @@ Borg is distributed under a 3-clause BSD license, see `License`_ for the complet .. |screencast| image:: https://asciinema.org/a/28691.png :alt: BorgBackup Installation and Basic Usage :target: https://asciinema.org/a/28691?autoplay=1&speed=2 + +.. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge + :alt: Best Practices Score + :target: https://bestpractices.coreinfrastructure.org/projects/271 From ac8d65cc47adc8ca3065b0a6d3c2b19e09efda79 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Tue, 6 Sep 2016 13:03:59 +0200 Subject: [PATCH 05/13] Fix second block in "Easy to use" section not showing on GitHub Fixes #1576 --- README.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/README.rst b/README.rst index f9db78a9d..57af39576 100644 --- a/README.rst +++ b/README.rst @@ -92,7 +92,6 @@ Initialize a new backup repository and create a backup archive:: Now doing another backup, just to show off the great deduplication: .. code-block:: none - :emphasize-lines: 11 $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents ----------------------------------------------------------------------------- From 9fe0140d94dcb7bc65f02cb400ea6a294b0a2ac7 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 7 Sep 2016 16:08:07 +0200 Subject: [PATCH 06/13] hashindex: export max load factor to Python-space --- borg/hashindex.pyx | 3 +++ borg/testsuite/hashindex.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 59741ad6e..ce1dac047 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -23,6 +23,8 @@ cdef extern from "_hashindex.c": uint32_t _htole32(uint32_t v) uint32_t _le32toh(uint32_t v) + double HASH_MAX_LOAD + cdef _NoDefault = object() @@ -54,6 +56,7 @@ cdef class IndexBase: cdef HashIndex *index cdef int key_size + MAX_LOAD_FACTOR = HASH_MAX_LOAD def __cinit__(self, capacity=0, path=None, key_size=32): self.key_size = key_size if path: diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index 75cd80227..4a6bd4432 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -276,3 +276,8 @@ def test_nsindex_segment_limit(): assert H(1) not in idx idx[H(2)] = hashindex.MAX_VALUE, 0 assert H(2) in idx + + +def test_max_load_factor(): + assert NSIndex.MAX_LOAD_FACTOR < 1 + assert ChunkIndex.MAX_LOAD_FACTOR < 1 From 197552526ff52c2a0473c6a000e34597c8a90ac3 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 7 Sep 2016 16:08:35 +0200 Subject: [PATCH 07/13] hashindex: make MAX_VALUE a class constant --- borg/hashindex.pyx | 7 ++++--- borg/testsuite/hashindex.py | 38 ++++++++++++++++++------------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index ce1dac047..c32c4dd1a 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -47,7 +47,6 @@ assert UINT32_MAX == 2**32-1 # module-level constant because cdef's in classes can't have default values cdef uint32_t _MAX_VALUE = 2**32-1025 -MAX_VALUE = _MAX_VALUE assert _MAX_VALUE % 2 == 1 @@ -57,6 +56,8 @@ cdef class IndexBase: cdef int key_size MAX_LOAD_FACTOR = HASH_MAX_LOAD + MAX_VALUE = _MAX_VALUE + def __cinit__(self, capacity=0, path=None, key_size=32): self.key_size = key_size if path: @@ -283,7 +284,7 @@ cdef class ChunkIndex(IndexBase): unique_chunks += 1 values = (key + self.key_size) refcount = _le32toh(values[0]) - assert refcount <= MAX_VALUE, "invalid reference count" + assert refcount <= _MAX_VALUE, "invalid reference count" chunks += refcount unique_size += _le32toh(values[1]) unique_csize += _le32toh(values[2]) @@ -343,5 +344,5 @@ cdef class ChunkKeyIterator: raise StopIteration cdef uint32_t *value = (self.key + self.key_size) cdef uint32_t refcount = _le32toh(value[0]) - assert refcount <= MAX_VALUE, "invalid reference count" + assert refcount <= _MAX_VALUE, "invalid reference count" return (self.key)[:self.key_size], (refcount, _le32toh(value[1]), _le32toh(value[2])) diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index 4a6bd4432..b81cbf47f 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -124,16 +124,16 @@ def test_chunkindex_summarize(self): class HashIndexRefcountingTestCase(BaseTestCase): def test_chunkindex_limit(self): idx = ChunkIndex() - idx[H(1)] = hashindex.MAX_VALUE - 1, 1, 2 + idx[H(1)] = ChunkIndex.MAX_VALUE - 1, 1, 2 # 5 is arbitray, any number of incref/decrefs shouldn't move it once it's limited for i in range(5): # first incref to move it to the limit refcount, *_ = idx.incref(H(1)) - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE for i in range(5): refcount, *_ = idx.decref(H(1)) - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def _merge(self, refcounta, refcountb): def merge(refcount1, refcount2): @@ -152,23 +152,23 @@ def merge(refcount1, refcount2): def test_chunkindex_merge_limit1(self): # Check that it does *not* limit at MAX_VALUE - 1 # (MAX_VALUE is odd) - half = hashindex.MAX_VALUE // 2 - assert self._merge(half, half) == hashindex.MAX_VALUE - 1 + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half, half) == ChunkIndex.MAX_VALUE - 1 def test_chunkindex_merge_limit2(self): # 3000000000 + 2000000000 > MAX_VALUE - assert self._merge(3000000000, 2000000000) == hashindex.MAX_VALUE + assert self._merge(3000000000, 2000000000) == ChunkIndex.MAX_VALUE def test_chunkindex_merge_limit3(self): # Crossover point: both addition and limit semantics will yield the same result - half = hashindex.MAX_VALUE // 2 - assert self._merge(half + 1, half) == hashindex.MAX_VALUE + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half + 1, half) == ChunkIndex.MAX_VALUE def test_chunkindex_merge_limit4(self): # Beyond crossover, result of addition would be 2**31 - half = hashindex.MAX_VALUE // 2 - assert self._merge(half + 2, half) == hashindex.MAX_VALUE - assert self._merge(half + 1, half + 1) == hashindex.MAX_VALUE + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half + 2, half) == ChunkIndex.MAX_VALUE + assert self._merge(half + 1, half + 1) == ChunkIndex.MAX_VALUE def test_chunkindex_add(self): idx1 = ChunkIndex() @@ -179,17 +179,17 @@ def test_chunkindex_add(self): def test_incref_limit(self): idx1 = ChunkIndex() - idx1[H(1)] = (hashindex.MAX_VALUE, 6, 7) + idx1[H(1)] = (ChunkIndex.MAX_VALUE, 6, 7) idx1.incref(H(1)) refcount, *_ = idx1[H(1)] - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def test_decref_limit(self): idx1 = ChunkIndex() - idx1[H(1)] = hashindex.MAX_VALUE, 6, 7 + idx1[H(1)] = ChunkIndex.MAX_VALUE, 6, 7 idx1.decref(H(1)) refcount, *_ = idx1[H(1)] - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def test_decref_zero(self): idx1 = ChunkIndex() @@ -209,7 +209,7 @@ def test_incref_decref(self): def test_setitem_raises(self): idx1 = ChunkIndex() with pytest.raises(AssertionError): - idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0 + idx1[H(1)] = ChunkIndex.MAX_VALUE + 1, 0, 0 def test_keyerror(self): idx = ChunkIndex() @@ -266,15 +266,15 @@ def test_read_known_good(self): idx2 = ChunkIndex() idx2[H(3)] = 2**32 - 123456, 6, 7 idx1.merge(idx2) - assert idx1[H(3)] == (hashindex.MAX_VALUE, 0, 0) + assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 0, 0) def test_nsindex_segment_limit(): idx = NSIndex() with pytest.raises(AssertionError): - idx[H(1)] = hashindex.MAX_VALUE + 1, 0 + idx[H(1)] = NSIndex.MAX_VALUE + 1, 0 assert H(1) not in idx - idx[H(2)] = hashindex.MAX_VALUE, 0 + idx[H(2)] = NSIndex.MAX_VALUE, 0 assert H(2) in idx From 4cb3355d9051d42046fab88bfa4b2e570e1cefc3 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Thu, 8 Sep 2016 16:39:44 +0200 Subject: [PATCH 08/13] create --read-special fix crash on broken symlink also correctly processes broken symlinks. before this regressed to a crash (5b45385) a broken symlink would've been skipped. --- borg/archiver.py | 9 +++++++-- borg/testsuite/archiver.py | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 7605fcd92..785a7b8d1 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -306,8 +306,13 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present, if not read_special: status = archive.process_symlink(path, st) else: - st_target = os.stat(path) - if is_special(st_target.st_mode): + try: + st_target = os.stat(path) + except OSError: + special = False + else: + special = is_special(st_target.st_mode) + if special: status = archive.process_file(path, st_target, cache) else: status = archive.process_symlink(path, st) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index f563ea428..7b2193595 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -901,6 +901,14 @@ def test_create_topical(self): output = self.cmd('create', '-v', '--list', '--filter=AM', self.repository_location + '::test3', 'input') self.assert_in('file1', output) + def test_create_read_special_broken_symlink(self): + os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link')) + self.cmd('init', self.repository_location) + archive = self.repository_location + '::test' + self.cmd('create', '--read-special', archive, 'input') + output = self.cmd('list', archive) + assert 'input/link -> somewhere doesnt exist' in output + # def test_cmdline_compatibility(self): # self.create_regular_file('file1', size=1024 * 80) # self.cmd('init', self.repository_location) From f1cf7bc322281e6511adbe30cbe36799135cc3b2 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Thu, 8 Sep 2016 16:43:48 +0200 Subject: [PATCH 09/13] process_symlink: fix missing backup_io() Fixes a chmod/chown/chgrp/unlink/rename/... crash race between getting dirents and dispatching to process_symlink. --- borg/archive.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/archive.py b/borg/archive.py index a3a133171..dfe870160 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -610,7 +610,8 @@ def process_dev(self, path, st): return 'b' # block device def process_symlink(self, path, st): - source = os.readlink(path) + with backup_io(): + source = os.readlink(path) item = {b'path': make_path_safe(path), b'source': source} item.update(self.stat_attrs(st, path)) self.add_item(item) From b2e389e0a044fa03039fb651d5a22d58f37dc60c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Sep 2016 21:05:16 +0200 Subject: [PATCH 10/13] docs: add contribution guidelines --- docs/development.rst | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/docs/development.rst b/docs/development.rst index 480a17065..3e89e34c7 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -10,6 +10,46 @@ This chapter will get you started with |project_name| development. |project_name| is written in Python (with a little bit of Cython and C for the performance critical parts). +Contributions +------------- + +... are welcome! + +Some guidance for contributors: + +- discuss about changes on github issue tracker, IRC or mailing list + +- choose the branch you base your changesets on wisely: + + - choose x.y-maint for stuff that should go into next x.y release + (it usually gets merged into master branch later also) + - choose master if that does not apply + +- do clean changesets: + + - focus on some topic, resist changing anything else. + - do not do style changes mixed with functional changes. + - try to avoid refactorings mixed with functional changes. + - if you need to fix something after commit/push: + + - if there are ongoing reviews: do a fixup commit you can + merge into the bad commit later. + - if there are no ongoing reviews or you did not push the + bad commit yet: edit the commit to include your fix or + merge the fixup commit before pushing. + - have a nice, clear, typo-free commit comment + - if you fixed an issue, refer to it in your commit comment + - follow the style guide (see below) + +- if you write new code, please add tests and docs for it + +- run the tests, fix anything that comes up + +- make a pull request on github + +- wait for review by other developers + + Style guide ----------- From be3616b6b391ae16709260c0b199f20be2330ef7 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 9 Sep 2016 16:11:06 +0200 Subject: [PATCH 11/13] ArchiveChecker: use MAX_LOAD_FACTOR constant --- borg/archive.py | 5 +++-- borg/testsuite/hashindex.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index a3a133171..e6dd39557 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -853,8 +853,9 @@ def init_chunks(self): """Fetch a list of all object keys from repository """ # Explicitly set the initial hash table capacity to avoid performance issues - # due to hash table "resonance" - capacity = int(len(self.repository) * 1.35 + 1) # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c) + # due to hash table "resonance". + # Since reconstruction of archive items can add some new chunks, add 10 % headroom + capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1) self.chunks = ChunkIndex(capacity) marker = None while True: diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index b81cbf47f..629ae4e57 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -279,5 +279,5 @@ def test_nsindex_segment_limit(): def test_max_load_factor(): - assert NSIndex.MAX_LOAD_FACTOR < 1 - assert ChunkIndex.MAX_LOAD_FACTOR < 1 + assert NSIndex.MAX_LOAD_FACTOR < 1.0 + assert ChunkIndex.MAX_LOAD_FACTOR < 1.0 From c8f4e9e34ca20ff3b0688f843913bf930e1fc9d7 Mon Sep 17 00:00:00 2001 From: Julian Andres Klode Date: Tue, 13 Sep 2016 21:28:16 +0200 Subject: [PATCH 12/13] Correctly exit with proper unlock on SIGHUP, fixes #1593 If the connections hangs up, the borg server needs to clean up, especially unlock the repository, so a later try will work again. This is especially problematic with systemd systems that have KillUserProcesses enabled (which is the default): Logind sends a SIGHUP message to the session scope when the session ends. --- borg/archiver.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index 785a7b8d1..bb8e33f77 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1666,6 +1666,14 @@ def sig_term_handler(signum, stack): raise SIGTERMReceived +class SIGHUPReceived(BaseException): + pass + + +def sig_hup_handler(signum, stack): + raise SIGHUPReceived + + def setup_signal_handlers(): # pragma: no cover sigs = [] if hasattr(signal, 'SIGUSR1'): @@ -1674,7 +1682,12 @@ def setup_signal_handlers(): # pragma: no cover sigs.append(signal.SIGINFO) # kill -INFO pid (or ctrl-t) for sig in sigs: signal.signal(sig, sig_info_handler) + # If we received SIGTERM or SIGHUP, catch them and raise a proper exception + # that can be handled for an orderly exit. SIGHUP is important especially + # for systemd systems, where logind sends it when a session exits, in + # addition to any traditional use. signal.signal(signal.SIGTERM, sig_term_handler) + signal.signal(signal.SIGHUP, sig_hup_handler) def main(): # pragma: no cover @@ -1713,6 +1726,9 @@ def main(): # pragma: no cover except SIGTERMReceived: msg = 'Received SIGTERM.' exit_code = EXIT_ERROR + except SIGHUPReceived: + msg = 'Received SIGHUP.' + exit_code = EXIT_ERROR if msg: logger.error(msg) if args.show_rc: From 3c3502a9a05bd7996f5218d7de2e21ecc99601c3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 14 Sep 2016 02:22:46 +0200 Subject: [PATCH 13/13] update wheezy vagrant box to 7.11 7.9 is not available any more. --- Vagrantfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index f2e0945f6..8316ec2f9 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -385,7 +385,7 @@ Vagrant.configure(2) do |config| end config.vm.define "wheezy32" do |b| - b.vm.box = "boxcutter/debian79-i386" + b.vm.box = "boxcutter/debian711-i386" b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32") @@ -398,7 +398,7 @@ Vagrant.configure(2) do |config| end config.vm.define "wheezy64" do |b| - b.vm.box = "boxcutter/debian79" + b.vm.box = "boxcutter/debian711" b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64")