1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-25 09:19:31 +00:00

Merge branch '1.0-maint' into merge-1.0-maint

# Conflicts:
#	docs/development.rst
#	src/borg/archive.py
#	src/borg/archiver.py
#	src/borg/hashindex.pyx
#	src/borg/testsuite/hashindex.py
This commit is contained in:
Thomas Waldmann 2016-09-14 02:53:41 +02:00
commit 1287d1ae92
10 changed files with 141 additions and 36 deletions

View file

@ -92,7 +92,6 @@ Initialize a new backup repository and create a backup archive::
Now doing another backup, just to show off the great deduplication:
.. code-block:: none
:emphasize-lines: 11
$ borg create -v --stats /path/to/repo::Saturday2 ~/Documents
-----------------------------------------------------------------------------
@ -114,6 +113,22 @@ Now doing another backup, just to show off the great deduplication:
For a graphical frontend refer to our complementary project `BorgWeb <https://borgweb.readthedocs.io/>`_.
Checking Release Authenticity and Security Contact
==================================================
`Releases <https://github.com/borgbackup/borg/releases>`_ are signed with this GPG key,
please use GPG to verify their authenticity.
In case you discover a security issue, please use this contact for reporting it privately
and please, if possible, use encrypted E-Mail:
Thomas Waldmann <tw@waldmann-edv.de>
GPG Key Fingerprint: 6D5B EF9A DD20 7580 5747 B70F 9F88 FB52 FAF7 B393
The public key can be fetched from any GPG keyserver, but be careful: you must
use the **full fingerprint** to check that you got the correct key.
Links
=====
@ -169,7 +184,7 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
Borg is distributed under a 3-clause BSD license, see `License`_ for the complete license.
|doc| |build| |coverage|
|doc| |build| |coverage| |bestpractices|
.. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable
:alt: Documentation
@ -186,3 +201,7 @@ Borg is distributed under a 3-clause BSD license, see `License`_ for the complet
.. |screencast| image:: https://asciinema.org/a/28691.png
:alt: BorgBackup Installation and Basic Usage
:target: https://asciinema.org/a/28691?autoplay=1&speed=2
.. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge
:alt: Best Practices Score
:target: https://bestpractices.coreinfrastructure.org/projects/271

4
Vagrantfile vendored
View file

@ -387,7 +387,7 @@ Vagrant.configure(2) do |config|
end
config.vm.define "wheezy32" do |b|
b.vm.box = "boxcutter/debian79-i386"
b.vm.box = "boxcutter/debian711-i386"
b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy
b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32")
@ -400,7 +400,7 @@ Vagrant.configure(2) do |config|
end
config.vm.define "wheezy64" do |b|
b.vm.box = "boxcutter/debian79"
b.vm.box = "boxcutter/debian711"
b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy
b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64")

View file

@ -10,11 +10,50 @@ This chapter will get you started with |project_name| development.
|project_name| is written in Python (with a little bit of Cython and C for
the performance critical parts).
Contributions
-------------
... are welcome!
Some guidance for contributors:
- discuss about changes on github issue tracker, IRC or mailing list
- choose the branch you base your changesets on wisely:
- choose x.y-maint for stuff that should go into next x.y release
(it usually gets merged into master branch later also)
- choose master if that does not apply
- do clean changesets:
- focus on some topic, resist changing anything else.
- do not do style changes mixed with functional changes.
- try to avoid refactorings mixed with functional changes.
- if you need to fix something after commit/push:
- if there are ongoing reviews: do a fixup commit you can
merge into the bad commit later.
- if there are no ongoing reviews or you did not push the
bad commit yet: edit the commit to include your fix or
merge the fixup commit before pushing.
- have a nice, clear, typo-free commit comment
- if you fixed an issue, refer to it in your commit comment
- follow the style guide (see below)
- if you write new code, please add tests and docs for it
- run the tests, fix anything that comes up
- make a pull request on github
- wait for review by other developers
Code and issues
---------------
Code is stored on Github, in the `Borgbackup organization
<https://github.com/borgbackup/borg/>`_. `Issues
https://github.com/borgbackup/borg/>`_. `Issues
<https://github.com/borgbackup/borg/issues>`_ and `pull requests
<https://github.com/borgbackup/borg/pulls>`_ should be sent there as
well. See also the :ref:`support` section for more details.

View file

@ -352,6 +352,8 @@ those files are reported as being added when, really, chunks are
already used.
.. _always_chunking:
It always chunks all my files, even unchanged ones!
---------------------------------------------------

View file

@ -86,6 +86,7 @@ General:
BORG_FILES_CACHE_TTL
When set to a numeric value, this determines the maximum "time to live" for the files cache
entries (default: 20). The files cache is used to quickly determine whether a file is unchanged.
The FAQ explains this more detailled in: :ref:`always_chunking`
TMPDIR
where temporary files are stored (might need a lot of temporary space for some operations)

View file

@ -732,7 +732,8 @@ def process_dev(self, path, st):
return 'b' # block device
def process_symlink(self, path, st):
source = os.readlink(path)
with backup_io():
source = os.readlink(path)
item = Item(path=make_path_safe(path), source=source)
item.update(self.stat_attrs(st, path))
self.add_item(item)
@ -1009,8 +1010,9 @@ def init_chunks(self):
"""Fetch a list of all object keys from repository
"""
# Explicitly set the initial hash table capacity to avoid performance issues
# due to hash table "resonance"
capacity = int(len(self.repository) * 1.35 + 1) # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c)
# due to hash table "resonance".
# Since reconstruction of archive items can add some new chunks, add 10 % headroom
capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
self.chunks = ChunkIndex(capacity)
marker = None
while True:

View file

@ -379,8 +379,13 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
if not read_special:
status = archive.process_symlink(path, st)
else:
st_target = os.stat(path)
if is_special(st_target.st_mode):
try:
st_target = os.stat(path)
except OSError:
special = False
else:
special = is_special(st_target.st_mode)
if special:
status = archive.process_file(path, st_target, cache)
else:
status = archive.process_symlink(path, st)
@ -1865,11 +1870,14 @@ def build_parser(self, prog=None):
info_epilog = textwrap.dedent("""
This command displays detailed information about the specified archive or repository.
The "This archive" line refers exclusively to the given archive:
"Deduplicated size" is the size of the unique chunks stored only for the
given archive.
Please note that the deduplicated sizes of the individual archives do not add
up to the deduplicated size of the repository ("all archives"), because the two
are meaning different things:
The "All archives" line shows global statistics (all chunks).
This archive / deduplicated size = amount of data stored ONLY for this archive
= unique chunks of this archive.
All archives / deduplicated size = amount of data stored in the repo
= all chunks in the repository.
""")
subparser = subparsers.add_parser('info', parents=[common_parser], add_help=False,
description=self.do_info.__doc__,
@ -2375,6 +2383,14 @@ def sig_term_handler(signum, stack):
raise SIGTERMReceived
class SIGHUPReceived(BaseException):
pass
def sig_hup_handler(signum, stack):
raise SIGHUPReceived
def setup_signal_handlers(): # pragma: no cover
sigs = []
if hasattr(signal, 'SIGUSR1'):
@ -2383,7 +2399,12 @@ def setup_signal_handlers(): # pragma: no cover
sigs.append(signal.SIGINFO) # kill -INFO pid (or ctrl-t)
for sig in sigs:
signal.signal(sig, sig_info_handler)
# If we received SIGTERM or SIGHUP, catch them and raise a proper exception
# that can be handled for an orderly exit. SIGHUP is important especially
# for systemd systems, where logind sends it when a session exits, in
# addition to any traditional use.
signal.signal(signal.SIGTERM, sig_term_handler)
signal.signal(signal.SIGHUP, sig_hup_handler)
def main(): # pragma: no cover
@ -2438,6 +2459,9 @@ def main(): # pragma: no cover
tb_log_level = logging.DEBUG
tb = '%s\n%s' % (traceback.format_exc(), sysinfo())
exit_code = EXIT_ERROR
except SIGHUPReceived:
msg = 'Received SIGHUP.'
exit_code = EXIT_ERROR
if msg:
logger.error(msg)
if tb:

View file

@ -28,6 +28,8 @@ cdef extern from "_hashindex.c":
uint32_t _htole32(uint32_t v)
uint32_t _le32toh(uint32_t v)
double HASH_MAX_LOAD
cdef _NoDefault = object()
@ -50,7 +52,6 @@ assert UINT32_MAX == 2**32-1
# module-level constant because cdef's in classes can't have default values
cdef uint32_t _MAX_VALUE = 2**32-1025
MAX_VALUE = _MAX_VALUE
assert _MAX_VALUE % 2 == 1
@ -60,6 +61,9 @@ cdef class IndexBase:
cdef HashIndex *index
cdef int key_size
MAX_LOAD_FACTOR = HASH_MAX_LOAD
MAX_VALUE = _MAX_VALUE
def __cinit__(self, capacity=0, path=None, key_size=32):
self.key_size = key_size
if path:
@ -296,7 +300,7 @@ cdef class ChunkIndex(IndexBase):
unique_chunks += 1
values = <uint32_t*> (key + self.key_size)
refcount = _le32toh(values[0])
assert refcount <= MAX_VALUE, "invalid reference count"
assert refcount <= _MAX_VALUE, "invalid reference count"
chunks += refcount
unique_size += _le32toh(values[1])
unique_csize += _le32toh(values[2])
@ -358,5 +362,5 @@ cdef class ChunkKeyIterator:
raise StopIteration
cdef uint32_t *value = <uint32_t *>(self.key + self.key_size)
cdef uint32_t refcount = _le32toh(value[0])
assert refcount <= MAX_VALUE, "invalid reference count"
assert refcount <= _MAX_VALUE, "invalid reference count"
return (<char *>self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]), _le32toh(value[2]))

View file

@ -1130,6 +1130,14 @@ def test_create_topical(self):
output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input')
self.assert_in('file1', output)
def test_create_read_special_broken_symlink(self):
os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link'))
self.cmd('init', self.repository_location)
archive = self.repository_location + '::test'
self.cmd('create', '--read-special', archive, 'input')
output = self.cmd('list', archive)
assert 'input/link -> somewhere doesnt exist' in output
# def test_cmdline_compatibility(self):
# self.create_regular_file('file1', size=1024 * 80)
# self.cmd('init', self.repository_location)

View file

@ -140,16 +140,16 @@ def test_size_on_disk_accurate(self):
class HashIndexRefcountingTestCase(BaseTestCase):
def test_chunkindex_limit(self):
idx = ChunkIndex()
idx[H(1)] = hashindex.MAX_VALUE - 1, 1, 2
idx[H(1)] = ChunkIndex.MAX_VALUE - 1, 1, 2
# 5 is arbitray, any number of incref/decrefs shouldn't move it once it's limited
for i in range(5):
# first incref to move it to the limit
refcount, *_ = idx.incref(H(1))
assert refcount == hashindex.MAX_VALUE
assert refcount == ChunkIndex.MAX_VALUE
for i in range(5):
refcount, *_ = idx.decref(H(1))
assert refcount == hashindex.MAX_VALUE
assert refcount == ChunkIndex.MAX_VALUE
def _merge(self, refcounta, refcountb):
def merge(refcount1, refcount2):
@ -168,23 +168,23 @@ def merge(refcount1, refcount2):
def test_chunkindex_merge_limit1(self):
# Check that it does *not* limit at MAX_VALUE - 1
# (MAX_VALUE is odd)
half = hashindex.MAX_VALUE // 2
assert self._merge(half, half) == hashindex.MAX_VALUE - 1
half = ChunkIndex.MAX_VALUE // 2
assert self._merge(half, half) == ChunkIndex.MAX_VALUE - 1
def test_chunkindex_merge_limit2(self):
# 3000000000 + 2000000000 > MAX_VALUE
assert self._merge(3000000000, 2000000000) == hashindex.MAX_VALUE
assert self._merge(3000000000, 2000000000) == ChunkIndex.MAX_VALUE
def test_chunkindex_merge_limit3(self):
# Crossover point: both addition and limit semantics will yield the same result
half = hashindex.MAX_VALUE // 2
assert self._merge(half + 1, half) == hashindex.MAX_VALUE
half = ChunkIndex.MAX_VALUE // 2
assert self._merge(half + 1, half) == ChunkIndex.MAX_VALUE
def test_chunkindex_merge_limit4(self):
# Beyond crossover, result of addition would be 2**31
half = hashindex.MAX_VALUE // 2
assert self._merge(half + 2, half) == hashindex.MAX_VALUE
assert self._merge(half + 1, half + 1) == hashindex.MAX_VALUE
half = ChunkIndex.MAX_VALUE // 2
assert self._merge(half + 2, half) == ChunkIndex.MAX_VALUE
assert self._merge(half + 1, half + 1) == ChunkIndex.MAX_VALUE
def test_chunkindex_add(self):
idx1 = ChunkIndex()
@ -195,17 +195,17 @@ def test_chunkindex_add(self):
def test_incref_limit(self):
idx1 = ChunkIndex()
idx1[H(1)] = (hashindex.MAX_VALUE, 6, 7)
idx1[H(1)] = (ChunkIndex.MAX_VALUE, 6, 7)
idx1.incref(H(1))
refcount, *_ = idx1[H(1)]
assert refcount == hashindex.MAX_VALUE
assert refcount == ChunkIndex.MAX_VALUE
def test_decref_limit(self):
idx1 = ChunkIndex()
idx1[H(1)] = hashindex.MAX_VALUE, 6, 7
idx1[H(1)] = ChunkIndex.MAX_VALUE, 6, 7
idx1.decref(H(1))
refcount, *_ = idx1[H(1)]
assert refcount == hashindex.MAX_VALUE
assert refcount == ChunkIndex.MAX_VALUE
def test_decref_zero(self):
idx1 = ChunkIndex()
@ -225,7 +225,7 @@ def test_incref_decref(self):
def test_setitem_raises(self):
idx1 = ChunkIndex()
with self.assert_raises(AssertionError):
idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0
idx1[H(1)] = ChunkIndex.MAX_VALUE + 1, 0, 0
def test_keyerror(self):
idx = ChunkIndex()
@ -282,14 +282,20 @@ def test_read_known_good(self):
idx2 = ChunkIndex()
idx2[H(3)] = 2**32 - 123456, 6, 7
idx1.merge(idx2)
assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7)
assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7)
class NSIndexTestCase(BaseTestCase):
def test_nsindex_segment_limit(self):
idx = NSIndex()
with self.assert_raises(AssertionError):
idx[H(1)] = hashindex.MAX_VALUE + 1, 0
idx[H(1)] = NSIndex.MAX_VALUE + 1, 0
assert H(1) not in idx
idx[H(2)] = hashindex.MAX_VALUE, 0
idx[H(2)] = NSIndex.MAX_VALUE, 0
assert H(2) in idx
class AllIndexTestCase(BaseTestCase):
def test_max_load_factor(self):
assert NSIndex.MAX_LOAD_FACTOR < 1.0
assert ChunkIndex.MAX_LOAD_FACTOR < 1.0