1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-25 09:19:31 +00:00

Merge branch '1.0-maint' into merge-1.0-maint

# Conflicts:
#	docs/development.rst
#	src/borg/archive.py
#	src/borg/archiver.py
#	src/borg/hashindex.pyx
#	src/borg/testsuite/hashindex.py
This commit is contained in:
Thomas Waldmann 2016-09-14 02:53:41 +02:00
commit 1287d1ae92
10 changed files with 141 additions and 36 deletions

View file

@ -92,7 +92,6 @@ Initialize a new backup repository and create a backup archive::
Now doing another backup, just to show off the great deduplication: Now doing another backup, just to show off the great deduplication:
.. code-block:: none .. code-block:: none
:emphasize-lines: 11
$ borg create -v --stats /path/to/repo::Saturday2 ~/Documents $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
@ -114,6 +113,22 @@ Now doing another backup, just to show off the great deduplication:
For a graphical frontend refer to our complementary project `BorgWeb <https://borgweb.readthedocs.io/>`_. For a graphical frontend refer to our complementary project `BorgWeb <https://borgweb.readthedocs.io/>`_.
Checking Release Authenticity and Security Contact
==================================================
`Releases <https://github.com/borgbackup/borg/releases>`_ are signed with this GPG key,
please use GPG to verify their authenticity.
In case you discover a security issue, please use this contact for reporting it privately
and please, if possible, use encrypted E-Mail:
Thomas Waldmann <tw@waldmann-edv.de>
GPG Key Fingerprint: 6D5B EF9A DD20 7580 5747 B70F 9F88 FB52 FAF7 B393
The public key can be fetched from any GPG keyserver, but be careful: you must
use the **full fingerprint** to check that you got the correct key.
Links Links
===== =====
@ -169,7 +184,7 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
Borg is distributed under a 3-clause BSD license, see `License`_ for the complete license. Borg is distributed under a 3-clause BSD license, see `License`_ for the complete license.
|doc| |build| |coverage| |doc| |build| |coverage| |bestpractices|
.. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable
:alt: Documentation :alt: Documentation
@ -186,3 +201,7 @@ Borg is distributed under a 3-clause BSD license, see `License`_ for the complet
.. |screencast| image:: https://asciinema.org/a/28691.png .. |screencast| image:: https://asciinema.org/a/28691.png
:alt: BorgBackup Installation and Basic Usage :alt: BorgBackup Installation and Basic Usage
:target: https://asciinema.org/a/28691?autoplay=1&speed=2 :target: https://asciinema.org/a/28691?autoplay=1&speed=2
.. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge
:alt: Best Practices Score
:target: https://bestpractices.coreinfrastructure.org/projects/271

4
Vagrantfile vendored
View file

@ -387,7 +387,7 @@ Vagrant.configure(2) do |config|
end end
config.vm.define "wheezy32" do |b| config.vm.define "wheezy32" do |b|
b.vm.box = "boxcutter/debian79-i386" b.vm.box = "boxcutter/debian711-i386"
b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy
b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32") b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32")
@ -400,7 +400,7 @@ Vagrant.configure(2) do |config|
end end
config.vm.define "wheezy64" do |b| config.vm.define "wheezy64" do |b|
b.vm.box = "boxcutter/debian79" b.vm.box = "boxcutter/debian711"
b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy
b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64") b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64")

View file

@ -10,11 +10,50 @@ This chapter will get you started with |project_name| development.
|project_name| is written in Python (with a little bit of Cython and C for |project_name| is written in Python (with a little bit of Cython and C for
the performance critical parts). the performance critical parts).
Contributions
-------------
... are welcome!
Some guidance for contributors:
- discuss about changes on github issue tracker, IRC or mailing list
- choose the branch you base your changesets on wisely:
- choose x.y-maint for stuff that should go into next x.y release
(it usually gets merged into master branch later also)
- choose master if that does not apply
- do clean changesets:
- focus on some topic, resist changing anything else.
- do not do style changes mixed with functional changes.
- try to avoid refactorings mixed with functional changes.
- if you need to fix something after commit/push:
- if there are ongoing reviews: do a fixup commit you can
merge into the bad commit later.
- if there are no ongoing reviews or you did not push the
bad commit yet: edit the commit to include your fix or
merge the fixup commit before pushing.
- have a nice, clear, typo-free commit comment
- if you fixed an issue, refer to it in your commit comment
- follow the style guide (see below)
- if you write new code, please add tests and docs for it
- run the tests, fix anything that comes up
- make a pull request on github
- wait for review by other developers
Code and issues Code and issues
--------------- ---------------
Code is stored on Github, in the `Borgbackup organization Code is stored on Github, in the `Borgbackup organization
<https://github.com/borgbackup/borg/>`_. `Issues https://github.com/borgbackup/borg/>`_. `Issues
<https://github.com/borgbackup/borg/issues>`_ and `pull requests <https://github.com/borgbackup/borg/issues>`_ and `pull requests
<https://github.com/borgbackup/borg/pulls>`_ should be sent there as <https://github.com/borgbackup/borg/pulls>`_ should be sent there as
well. See also the :ref:`support` section for more details. well. See also the :ref:`support` section for more details.

View file

@ -352,6 +352,8 @@ those files are reported as being added when, really, chunks are
already used. already used.
.. _always_chunking:
It always chunks all my files, even unchanged ones! It always chunks all my files, even unchanged ones!
--------------------------------------------------- ---------------------------------------------------

View file

@ -86,6 +86,7 @@ General:
BORG_FILES_CACHE_TTL BORG_FILES_CACHE_TTL
When set to a numeric value, this determines the maximum "time to live" for the files cache When set to a numeric value, this determines the maximum "time to live" for the files cache
entries (default: 20). The files cache is used to quickly determine whether a file is unchanged. entries (default: 20). The files cache is used to quickly determine whether a file is unchanged.
The FAQ explains this more detailled in: :ref:`always_chunking`
TMPDIR TMPDIR
where temporary files are stored (might need a lot of temporary space for some operations) where temporary files are stored (might need a lot of temporary space for some operations)

View file

@ -732,7 +732,8 @@ def process_dev(self, path, st):
return 'b' # block device return 'b' # block device
def process_symlink(self, path, st): def process_symlink(self, path, st):
source = os.readlink(path) with backup_io():
source = os.readlink(path)
item = Item(path=make_path_safe(path), source=source) item = Item(path=make_path_safe(path), source=source)
item.update(self.stat_attrs(st, path)) item.update(self.stat_attrs(st, path))
self.add_item(item) self.add_item(item)
@ -1009,8 +1010,9 @@ def init_chunks(self):
"""Fetch a list of all object keys from repository """Fetch a list of all object keys from repository
""" """
# Explicitly set the initial hash table capacity to avoid performance issues # Explicitly set the initial hash table capacity to avoid performance issues
# due to hash table "resonance" # due to hash table "resonance".
capacity = int(len(self.repository) * 1.35 + 1) # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c) # Since reconstruction of archive items can add some new chunks, add 10 % headroom
capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
self.chunks = ChunkIndex(capacity) self.chunks = ChunkIndex(capacity)
marker = None marker = None
while True: while True:

View file

@ -379,8 +379,13 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
if not read_special: if not read_special:
status = archive.process_symlink(path, st) status = archive.process_symlink(path, st)
else: else:
st_target = os.stat(path) try:
if is_special(st_target.st_mode): st_target = os.stat(path)
except OSError:
special = False
else:
special = is_special(st_target.st_mode)
if special:
status = archive.process_file(path, st_target, cache) status = archive.process_file(path, st_target, cache)
else: else:
status = archive.process_symlink(path, st) status = archive.process_symlink(path, st)
@ -1865,11 +1870,14 @@ def build_parser(self, prog=None):
info_epilog = textwrap.dedent(""" info_epilog = textwrap.dedent("""
This command displays detailed information about the specified archive or repository. This command displays detailed information about the specified archive or repository.
The "This archive" line refers exclusively to the given archive: Please note that the deduplicated sizes of the individual archives do not add
"Deduplicated size" is the size of the unique chunks stored only for the up to the deduplicated size of the repository ("all archives"), because the two
given archive. are meaning different things:
The "All archives" line shows global statistics (all chunks). This archive / deduplicated size = amount of data stored ONLY for this archive
= unique chunks of this archive.
All archives / deduplicated size = amount of data stored in the repo
= all chunks in the repository.
""") """)
subparser = subparsers.add_parser('info', parents=[common_parser], add_help=False, subparser = subparsers.add_parser('info', parents=[common_parser], add_help=False,
description=self.do_info.__doc__, description=self.do_info.__doc__,
@ -2375,6 +2383,14 @@ def sig_term_handler(signum, stack):
raise SIGTERMReceived raise SIGTERMReceived
class SIGHUPReceived(BaseException):
pass
def sig_hup_handler(signum, stack):
raise SIGHUPReceived
def setup_signal_handlers(): # pragma: no cover def setup_signal_handlers(): # pragma: no cover
sigs = [] sigs = []
if hasattr(signal, 'SIGUSR1'): if hasattr(signal, 'SIGUSR1'):
@ -2383,7 +2399,12 @@ def setup_signal_handlers(): # pragma: no cover
sigs.append(signal.SIGINFO) # kill -INFO pid (or ctrl-t) sigs.append(signal.SIGINFO) # kill -INFO pid (or ctrl-t)
for sig in sigs: for sig in sigs:
signal.signal(sig, sig_info_handler) signal.signal(sig, sig_info_handler)
# If we received SIGTERM or SIGHUP, catch them and raise a proper exception
# that can be handled for an orderly exit. SIGHUP is important especially
# for systemd systems, where logind sends it when a session exits, in
# addition to any traditional use.
signal.signal(signal.SIGTERM, sig_term_handler) signal.signal(signal.SIGTERM, sig_term_handler)
signal.signal(signal.SIGHUP, sig_hup_handler)
def main(): # pragma: no cover def main(): # pragma: no cover
@ -2438,6 +2459,9 @@ def main(): # pragma: no cover
tb_log_level = logging.DEBUG tb_log_level = logging.DEBUG
tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) tb = '%s\n%s' % (traceback.format_exc(), sysinfo())
exit_code = EXIT_ERROR exit_code = EXIT_ERROR
except SIGHUPReceived:
msg = 'Received SIGHUP.'
exit_code = EXIT_ERROR
if msg: if msg:
logger.error(msg) logger.error(msg)
if tb: if tb:

View file

@ -28,6 +28,8 @@ cdef extern from "_hashindex.c":
uint32_t _htole32(uint32_t v) uint32_t _htole32(uint32_t v)
uint32_t _le32toh(uint32_t v) uint32_t _le32toh(uint32_t v)
double HASH_MAX_LOAD
cdef _NoDefault = object() cdef _NoDefault = object()
@ -50,7 +52,6 @@ assert UINT32_MAX == 2**32-1
# module-level constant because cdef's in classes can't have default values # module-level constant because cdef's in classes can't have default values
cdef uint32_t _MAX_VALUE = 2**32-1025 cdef uint32_t _MAX_VALUE = 2**32-1025
MAX_VALUE = _MAX_VALUE
assert _MAX_VALUE % 2 == 1 assert _MAX_VALUE % 2 == 1
@ -60,6 +61,9 @@ cdef class IndexBase:
cdef HashIndex *index cdef HashIndex *index
cdef int key_size cdef int key_size
MAX_LOAD_FACTOR = HASH_MAX_LOAD
MAX_VALUE = _MAX_VALUE
def __cinit__(self, capacity=0, path=None, key_size=32): def __cinit__(self, capacity=0, path=None, key_size=32):
self.key_size = key_size self.key_size = key_size
if path: if path:
@ -296,7 +300,7 @@ cdef class ChunkIndex(IndexBase):
unique_chunks += 1 unique_chunks += 1
values = <uint32_t*> (key + self.key_size) values = <uint32_t*> (key + self.key_size)
refcount = _le32toh(values[0]) refcount = _le32toh(values[0])
assert refcount <= MAX_VALUE, "invalid reference count" assert refcount <= _MAX_VALUE, "invalid reference count"
chunks += refcount chunks += refcount
unique_size += _le32toh(values[1]) unique_size += _le32toh(values[1])
unique_csize += _le32toh(values[2]) unique_csize += _le32toh(values[2])
@ -358,5 +362,5 @@ cdef class ChunkKeyIterator:
raise StopIteration raise StopIteration
cdef uint32_t *value = <uint32_t *>(self.key + self.key_size) cdef uint32_t *value = <uint32_t *>(self.key + self.key_size)
cdef uint32_t refcount = _le32toh(value[0]) cdef uint32_t refcount = _le32toh(value[0])
assert refcount <= MAX_VALUE, "invalid reference count" assert refcount <= _MAX_VALUE, "invalid reference count"
return (<char *>self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]), _le32toh(value[2])) return (<char *>self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]), _le32toh(value[2]))

View file

@ -1130,6 +1130,14 @@ def test_create_topical(self):
output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input') output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input')
self.assert_in('file1', output) self.assert_in('file1', output)
def test_create_read_special_broken_symlink(self):
os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link'))
self.cmd('init', self.repository_location)
archive = self.repository_location + '::test'
self.cmd('create', '--read-special', archive, 'input')
output = self.cmd('list', archive)
assert 'input/link -> somewhere doesnt exist' in output
# def test_cmdline_compatibility(self): # def test_cmdline_compatibility(self):
# self.create_regular_file('file1', size=1024 * 80) # self.create_regular_file('file1', size=1024 * 80)
# self.cmd('init', self.repository_location) # self.cmd('init', self.repository_location)

View file

@ -140,16 +140,16 @@ def test_size_on_disk_accurate(self):
class HashIndexRefcountingTestCase(BaseTestCase): class HashIndexRefcountingTestCase(BaseTestCase):
def test_chunkindex_limit(self): def test_chunkindex_limit(self):
idx = ChunkIndex() idx = ChunkIndex()
idx[H(1)] = hashindex.MAX_VALUE - 1, 1, 2 idx[H(1)] = ChunkIndex.MAX_VALUE - 1, 1, 2
# 5 is arbitray, any number of incref/decrefs shouldn't move it once it's limited # 5 is arbitray, any number of incref/decrefs shouldn't move it once it's limited
for i in range(5): for i in range(5):
# first incref to move it to the limit # first incref to move it to the limit
refcount, *_ = idx.incref(H(1)) refcount, *_ = idx.incref(H(1))
assert refcount == hashindex.MAX_VALUE assert refcount == ChunkIndex.MAX_VALUE
for i in range(5): for i in range(5):
refcount, *_ = idx.decref(H(1)) refcount, *_ = idx.decref(H(1))
assert refcount == hashindex.MAX_VALUE assert refcount == ChunkIndex.MAX_VALUE
def _merge(self, refcounta, refcountb): def _merge(self, refcounta, refcountb):
def merge(refcount1, refcount2): def merge(refcount1, refcount2):
@ -168,23 +168,23 @@ def merge(refcount1, refcount2):
def test_chunkindex_merge_limit1(self): def test_chunkindex_merge_limit1(self):
# Check that it does *not* limit at MAX_VALUE - 1 # Check that it does *not* limit at MAX_VALUE - 1
# (MAX_VALUE is odd) # (MAX_VALUE is odd)
half = hashindex.MAX_VALUE // 2 half = ChunkIndex.MAX_VALUE // 2
assert self._merge(half, half) == hashindex.MAX_VALUE - 1 assert self._merge(half, half) == ChunkIndex.MAX_VALUE - 1
def test_chunkindex_merge_limit2(self): def test_chunkindex_merge_limit2(self):
# 3000000000 + 2000000000 > MAX_VALUE # 3000000000 + 2000000000 > MAX_VALUE
assert self._merge(3000000000, 2000000000) == hashindex.MAX_VALUE assert self._merge(3000000000, 2000000000) == ChunkIndex.MAX_VALUE
def test_chunkindex_merge_limit3(self): def test_chunkindex_merge_limit3(self):
# Crossover point: both addition and limit semantics will yield the same result # Crossover point: both addition and limit semantics will yield the same result
half = hashindex.MAX_VALUE // 2 half = ChunkIndex.MAX_VALUE // 2
assert self._merge(half + 1, half) == hashindex.MAX_VALUE assert self._merge(half + 1, half) == ChunkIndex.MAX_VALUE
def test_chunkindex_merge_limit4(self): def test_chunkindex_merge_limit4(self):
# Beyond crossover, result of addition would be 2**31 # Beyond crossover, result of addition would be 2**31
half = hashindex.MAX_VALUE // 2 half = ChunkIndex.MAX_VALUE // 2
assert self._merge(half + 2, half) == hashindex.MAX_VALUE assert self._merge(half + 2, half) == ChunkIndex.MAX_VALUE
assert self._merge(half + 1, half + 1) == hashindex.MAX_VALUE assert self._merge(half + 1, half + 1) == ChunkIndex.MAX_VALUE
def test_chunkindex_add(self): def test_chunkindex_add(self):
idx1 = ChunkIndex() idx1 = ChunkIndex()
@ -195,17 +195,17 @@ def test_chunkindex_add(self):
def test_incref_limit(self): def test_incref_limit(self):
idx1 = ChunkIndex() idx1 = ChunkIndex()
idx1[H(1)] = (hashindex.MAX_VALUE, 6, 7) idx1[H(1)] = (ChunkIndex.MAX_VALUE, 6, 7)
idx1.incref(H(1)) idx1.incref(H(1))
refcount, *_ = idx1[H(1)] refcount, *_ = idx1[H(1)]
assert refcount == hashindex.MAX_VALUE assert refcount == ChunkIndex.MAX_VALUE
def test_decref_limit(self): def test_decref_limit(self):
idx1 = ChunkIndex() idx1 = ChunkIndex()
idx1[H(1)] = hashindex.MAX_VALUE, 6, 7 idx1[H(1)] = ChunkIndex.MAX_VALUE, 6, 7
idx1.decref(H(1)) idx1.decref(H(1))
refcount, *_ = idx1[H(1)] refcount, *_ = idx1[H(1)]
assert refcount == hashindex.MAX_VALUE assert refcount == ChunkIndex.MAX_VALUE
def test_decref_zero(self): def test_decref_zero(self):
idx1 = ChunkIndex() idx1 = ChunkIndex()
@ -225,7 +225,7 @@ def test_incref_decref(self):
def test_setitem_raises(self): def test_setitem_raises(self):
idx1 = ChunkIndex() idx1 = ChunkIndex()
with self.assert_raises(AssertionError): with self.assert_raises(AssertionError):
idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0 idx1[H(1)] = ChunkIndex.MAX_VALUE + 1, 0, 0
def test_keyerror(self): def test_keyerror(self):
idx = ChunkIndex() idx = ChunkIndex()
@ -282,14 +282,20 @@ def test_read_known_good(self):
idx2 = ChunkIndex() idx2 = ChunkIndex()
idx2[H(3)] = 2**32 - 123456, 6, 7 idx2[H(3)] = 2**32 - 123456, 6, 7
idx1.merge(idx2) idx1.merge(idx2)
assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7) assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7)
class NSIndexTestCase(BaseTestCase): class NSIndexTestCase(BaseTestCase):
def test_nsindex_segment_limit(self): def test_nsindex_segment_limit(self):
idx = NSIndex() idx = NSIndex()
with self.assert_raises(AssertionError): with self.assert_raises(AssertionError):
idx[H(1)] = hashindex.MAX_VALUE + 1, 0 idx[H(1)] = NSIndex.MAX_VALUE + 1, 0
assert H(1) not in idx assert H(1) not in idx
idx[H(2)] = hashindex.MAX_VALUE, 0 idx[H(2)] = NSIndex.MAX_VALUE, 0
assert H(2) in idx assert H(2) in idx
class AllIndexTestCase(BaseTestCase):
def test_max_load_factor(self):
assert NSIndex.MAX_LOAD_FACTOR < 1.0
assert ChunkIndex.MAX_LOAD_FACTOR < 1.0