diff --git a/README.rst b/README.rst index f6132773e..57af39576 100644 --- a/README.rst +++ b/README.rst @@ -92,7 +92,6 @@ Initialize a new backup repository and create a backup archive:: Now doing another backup, just to show off the great deduplication: .. code-block:: none - :emphasize-lines: 11 $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents ----------------------------------------------------------------------------- @@ -114,6 +113,22 @@ Now doing another backup, just to show off the great deduplication: For a graphical frontend refer to our complementary project `BorgWeb `_. +Checking Release Authenticity and Security Contact +================================================== + +`Releases `_ are signed with this GPG key, +please use GPG to verify their authenticity. + +In case you discover a security issue, please use this contact for reporting it privately +and please, if possible, use encrypted E-Mail: + +Thomas Waldmann + +GPG Key Fingerprint: 6D5B EF9A DD20 7580 5747 B70F 9F88 FB52 FAF7 B393 + +The public key can be fetched from any GPG keyserver, but be careful: you must +use the **full fingerprint** to check that you got the correct key. + Links ===== @@ -169,7 +184,7 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. Borg is distributed under a 3-clause BSD license, see `License`_ for the complete license. -|doc| |build| |coverage| +|doc| |build| |coverage| |bestpractices| .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable :alt: Documentation @@ -186,3 +201,7 @@ Borg is distributed under a 3-clause BSD license, see `License`_ for the complet .. |screencast| image:: https://asciinema.org/a/28691.png :alt: BorgBackup Installation and Basic Usage :target: https://asciinema.org/a/28691?autoplay=1&speed=2 + +.. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge + :alt: Best Practices Score + :target: https://bestpractices.coreinfrastructure.org/projects/271 diff --git a/Vagrantfile b/Vagrantfile index c489e707e..adbaf6589 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -387,7 +387,7 @@ Vagrant.configure(2) do |config| end config.vm.define "wheezy32" do |b| - b.vm.box = "boxcutter/debian79-i386" + b.vm.box = "boxcutter/debian711-i386" b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32") @@ -400,7 +400,7 @@ Vagrant.configure(2) do |config| end config.vm.define "wheezy64" do |b| - b.vm.box = "boxcutter/debian79" + b.vm.box = "boxcutter/debian711" b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64") diff --git a/docs/development.rst b/docs/development.rst index 9885d083e..63bc82bd8 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -10,6 +10,45 @@ This chapter will get you started with |project_name| development. |project_name| is written in Python (with a little bit of Cython and C for the performance critical parts). +Contributions +------------- + +... are welcome! + +Some guidance for contributors: + +- discuss about changes on github issue tracker, IRC or mailing list + +- choose the branch you base your changesets on wisely: + + - choose x.y-maint for stuff that should go into next x.y release + (it usually gets merged into master branch later also) + - choose master if that does not apply + +- do clean changesets: + + - focus on some topic, resist changing anything else. + - do not do style changes mixed with functional changes. + - try to avoid refactorings mixed with functional changes. + - if you need to fix something after commit/push: + + - if there are ongoing reviews: do a fixup commit you can + merge into the bad commit later. + - if there are no ongoing reviews or you did not push the + bad commit yet: edit the commit to include your fix or + merge the fixup commit before pushing. + - have a nice, clear, typo-free commit comment + - if you fixed an issue, refer to it in your commit comment + - follow the style guide (see below) + +- if you write new code, please add tests and docs for it + +- run the tests, fix anything that comes up + +- make a pull request on github + +- wait for review by other developers + Code and issues --------------- diff --git a/docs/faq.rst b/docs/faq.rst index 4b6b68378..68b447de8 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -352,6 +352,8 @@ those files are reported as being added when, really, chunks are already used. +.. _always_chunking: + It always chunks all my files, even unchanged ones! --------------------------------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index 1c4966854..332b6e421 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -86,6 +86,7 @@ General: BORG_FILES_CACHE_TTL When set to a numeric value, this determines the maximum "time to live" for the files cache entries (default: 20). The files cache is used to quickly determine whether a file is unchanged. + The FAQ explains this more detailled in: :ref:`always_chunking` TMPDIR where temporary files are stored (might need a lot of temporary space for some operations) diff --git a/src/borg/archive.py b/src/borg/archive.py index 9546cb0af..4db30f5a1 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -732,7 +732,8 @@ Number of files: {0.stats.nfiles}'''.format( return 'b' # block device def process_symlink(self, path, st): - source = os.readlink(path) + with backup_io(): + source = os.readlink(path) item = Item(path=make_path_safe(path), source=source) item.update(self.stat_attrs(st, path)) self.add_item(item) @@ -1009,8 +1010,9 @@ class ArchiveChecker: """Fetch a list of all object keys from repository """ # Explicitly set the initial hash table capacity to avoid performance issues - # due to hash table "resonance" - capacity = int(len(self.repository) * 1.35 + 1) # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c) + # due to hash table "resonance". + # Since reconstruction of archive items can add some new chunks, add 10 % headroom + capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1) self.chunks = ChunkIndex(capacity) marker = None while True: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 5936cdf8b..a63513de1 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -379,8 +379,13 @@ class Archiver: if not read_special: status = archive.process_symlink(path, st) else: - st_target = os.stat(path) - if is_special(st_target.st_mode): + try: + st_target = os.stat(path) + except OSError: + special = False + else: + special = is_special(st_target.st_mode) + if special: status = archive.process_file(path, st_target, cache) else: status = archive.process_symlink(path, st) @@ -984,7 +989,8 @@ class Archiver: dry_run=args.dry_run) with signal_handler(signal.SIGTERM, interrupt), \ - signal_handler(signal.SIGINT, interrupt): + signal_handler(signal.SIGINT, interrupt), \ + signal_handler(signal.SIGHUP, interrupt): if args.location.archive: name = args.location.archive if recreater.is_temporary_archive(name): @@ -1865,11 +1871,14 @@ class Archiver: info_epilog = textwrap.dedent(""" This command displays detailed information about the specified archive or repository. - The "This archive" line refers exclusively to the given archive: - "Deduplicated size" is the size of the unique chunks stored only for the - given archive. + Please note that the deduplicated sizes of the individual archives do not add + up to the deduplicated size of the repository ("all archives"), because the two + are meaning different things: - The "All archives" line shows global statistics (all chunks). + This archive / deduplicated size = amount of data stored ONLY for this archive + = unique chunks of this archive. + All archives / deduplicated size = amount of data stored in the repo + = all chunks in the repository. """) subparser = subparsers.add_parser('info', parents=[common_parser], add_help=False, description=self.do_info.__doc__, @@ -2375,6 +2384,14 @@ def sig_term_handler(signum, stack): raise SIGTERMReceived +class SIGHUPReceived(BaseException): + pass + + +def sig_hup_handler(signum, stack): + raise SIGHUPReceived + + def setup_signal_handlers(): # pragma: no cover sigs = [] if hasattr(signal, 'SIGUSR1'): @@ -2383,7 +2400,12 @@ def setup_signal_handlers(): # pragma: no cover sigs.append(signal.SIGINFO) # kill -INFO pid (or ctrl-t) for sig in sigs: signal.signal(sig, sig_info_handler) + # If we received SIGTERM or SIGHUP, catch them and raise a proper exception + # that can be handled for an orderly exit. SIGHUP is important especially + # for systemd systems, where logind sends it when a session exits, in + # addition to any traditional use. signal.signal(signal.SIGTERM, sig_term_handler) + signal.signal(signal.SIGHUP, sig_hup_handler) def main(): # pragma: no cover @@ -2438,6 +2460,9 @@ def main(): # pragma: no cover tb_log_level = logging.DEBUG tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) exit_code = EXIT_ERROR + except SIGHUPReceived: + msg = 'Received SIGHUP.' + exit_code = EXIT_ERROR if msg: logger.error(msg) if tb: diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 74c52c9c1..900f3f3b5 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -28,6 +28,8 @@ cdef extern from "_hashindex.c": uint32_t _htole32(uint32_t v) uint32_t _le32toh(uint32_t v) + double HASH_MAX_LOAD + cdef _NoDefault = object() @@ -50,7 +52,6 @@ assert UINT32_MAX == 2**32-1 # module-level constant because cdef's in classes can't have default values cdef uint32_t _MAX_VALUE = 2**32-1025 -MAX_VALUE = _MAX_VALUE assert _MAX_VALUE % 2 == 1 @@ -60,6 +61,9 @@ cdef class IndexBase: cdef HashIndex *index cdef int key_size + MAX_LOAD_FACTOR = HASH_MAX_LOAD + MAX_VALUE = _MAX_VALUE + def __cinit__(self, capacity=0, path=None, key_size=32): self.key_size = key_size if path: @@ -296,7 +300,7 @@ cdef class ChunkIndex(IndexBase): unique_chunks += 1 values = (key + self.key_size) refcount = _le32toh(values[0]) - assert refcount <= MAX_VALUE, "invalid reference count" + assert refcount <= _MAX_VALUE, "invalid reference count" chunks += refcount unique_size += _le32toh(values[1]) unique_csize += _le32toh(values[2]) @@ -358,5 +362,5 @@ cdef class ChunkKeyIterator: raise StopIteration cdef uint32_t *value = (self.key + self.key_size) cdef uint32_t refcount = _le32toh(value[0]) - assert refcount <= MAX_VALUE, "invalid reference count" + assert refcount <= _MAX_VALUE, "invalid reference count" return (self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]), _le32toh(value[2])) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 181ccb17a..e6d89671b 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1130,6 +1130,14 @@ class ArchiverTestCase(ArchiverTestCaseBase): output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input') self.assert_in('file1', output) + def test_create_read_special_broken_symlink(self): + os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link')) + self.cmd('init', self.repository_location) + archive = self.repository_location + '::test' + self.cmd('create', '--read-special', archive, 'input') + output = self.cmd('list', archive) + assert 'input/link -> somewhere doesnt exist' in output + # def test_cmdline_compatibility(self): # self.create_regular_file('file1', size=1024 * 80) # self.cmd('init', self.repository_location) diff --git a/src/borg/testsuite/hashindex.py b/src/borg/testsuite/hashindex.py index 5ddb85171..63b068275 100644 --- a/src/borg/testsuite/hashindex.py +++ b/src/borg/testsuite/hashindex.py @@ -140,16 +140,16 @@ class HashIndexSizeTestCase(BaseTestCase): class HashIndexRefcountingTestCase(BaseTestCase): def test_chunkindex_limit(self): idx = ChunkIndex() - idx[H(1)] = hashindex.MAX_VALUE - 1, 1, 2 + idx[H(1)] = ChunkIndex.MAX_VALUE - 1, 1, 2 # 5 is arbitray, any number of incref/decrefs shouldn't move it once it's limited for i in range(5): # first incref to move it to the limit refcount, *_ = idx.incref(H(1)) - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE for i in range(5): refcount, *_ = idx.decref(H(1)) - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def _merge(self, refcounta, refcountb): def merge(refcount1, refcount2): @@ -168,23 +168,23 @@ class HashIndexRefcountingTestCase(BaseTestCase): def test_chunkindex_merge_limit1(self): # Check that it does *not* limit at MAX_VALUE - 1 # (MAX_VALUE is odd) - half = hashindex.MAX_VALUE // 2 - assert self._merge(half, half) == hashindex.MAX_VALUE - 1 + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half, half) == ChunkIndex.MAX_VALUE - 1 def test_chunkindex_merge_limit2(self): # 3000000000 + 2000000000 > MAX_VALUE - assert self._merge(3000000000, 2000000000) == hashindex.MAX_VALUE + assert self._merge(3000000000, 2000000000) == ChunkIndex.MAX_VALUE def test_chunkindex_merge_limit3(self): # Crossover point: both addition and limit semantics will yield the same result - half = hashindex.MAX_VALUE // 2 - assert self._merge(half + 1, half) == hashindex.MAX_VALUE + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half + 1, half) == ChunkIndex.MAX_VALUE def test_chunkindex_merge_limit4(self): # Beyond crossover, result of addition would be 2**31 - half = hashindex.MAX_VALUE // 2 - assert self._merge(half + 2, half) == hashindex.MAX_VALUE - assert self._merge(half + 1, half + 1) == hashindex.MAX_VALUE + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half + 2, half) == ChunkIndex.MAX_VALUE + assert self._merge(half + 1, half + 1) == ChunkIndex.MAX_VALUE def test_chunkindex_add(self): idx1 = ChunkIndex() @@ -195,17 +195,17 @@ class HashIndexRefcountingTestCase(BaseTestCase): def test_incref_limit(self): idx1 = ChunkIndex() - idx1[H(1)] = (hashindex.MAX_VALUE, 6, 7) + idx1[H(1)] = (ChunkIndex.MAX_VALUE, 6, 7) idx1.incref(H(1)) refcount, *_ = idx1[H(1)] - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def test_decref_limit(self): idx1 = ChunkIndex() - idx1[H(1)] = hashindex.MAX_VALUE, 6, 7 + idx1[H(1)] = ChunkIndex.MAX_VALUE, 6, 7 idx1.decref(H(1)) refcount, *_ = idx1[H(1)] - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def test_decref_zero(self): idx1 = ChunkIndex() @@ -225,7 +225,7 @@ class HashIndexRefcountingTestCase(BaseTestCase): def test_setitem_raises(self): idx1 = ChunkIndex() with self.assert_raises(AssertionError): - idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0 + idx1[H(1)] = ChunkIndex.MAX_VALUE + 1, 0, 0 def test_keyerror(self): idx = ChunkIndex() @@ -282,14 +282,20 @@ class HashIndexDataTestCase(BaseTestCase): idx2 = ChunkIndex() idx2[H(3)] = 2**32 - 123456, 6, 7 idx1.merge(idx2) - assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7) + assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7) class NSIndexTestCase(BaseTestCase): def test_nsindex_segment_limit(self): idx = NSIndex() with self.assert_raises(AssertionError): - idx[H(1)] = hashindex.MAX_VALUE + 1, 0 + idx[H(1)] = NSIndex.MAX_VALUE + 1, 0 assert H(1) not in idx - idx[H(2)] = hashindex.MAX_VALUE, 0 + idx[H(2)] = NSIndex.MAX_VALUE, 0 assert H(2) in idx + + +class AllIndexTestCase(BaseTestCase): + def test_max_load_factor(self): + assert NSIndex.MAX_LOAD_FACTOR < 1.0 + assert ChunkIndex.MAX_LOAD_FACTOR < 1.0