diff --git a/.coafile b/.coafile new file mode 100644 index 00000000..7b0d953a --- /dev/null +++ b/.coafile @@ -0,0 +1,38 @@ +[all] +# note: put developer specific settings into ~/.coarc (e.g. editor = ...) +max_line_length = 255 +use_spaces = True + +[all.general] +files = src/borg/**/*.(py|pyx|c) +ignore = src/borg/(chunker.c|compress.c|hashindex.c|item.c), + src/borg/algorithms/(checksums.c|crc32.c), + src/borg/algorithms/blake2/*, + src/borg/algorithms/xxh64/*, + src/borg/crypto/low_level.c, + src/borg/platform/*.c +bears = SpaceConsistencyBear, FilenameBear, InvalidLinkBear, LineLengthBear +file_naming_convention = snake + + +[all.python] +files = src/borg/**/*.py +bears = PEP8Bear, PyDocStyleBear, PyLintBear +pep_ignore = E123,E125,E126,E127,E128,E226,E301,E309,E402,F401,F405,F811,W690 +pylint_disable = C0103, C0111, C0112, C0123, C0301, C0302, C0325, C0330, C0411, C0412, C0413, C1801, + W0102, W0104, W0106, W0108, W0120, W0201, W0212, W0221, W0231, W0401, W0404, + W0511, W0603, W0611, W0612, W0613, W0614, W0621, W0622, W0702, W0703, + W1201, W1202, W1401, + R0101, R0201, R0204, R0901, R0902, R0903, R0904, R0911, R0912, R0913, R0914, R0915, R0916, R1701, R1704, R1705, R1706, + E0102, E0202, E0401, E0601, E0611, E0702, E1101, E1102, E1120, E1129, E1130 +pydocstyle_ignore = D100, D101, D102, D103, D104, D105, D200, D201, D202, D203, D204, D205, D209, D210, + D212, D213, D300, D301, D400, D401, D402, D403, D404 + +[all.c] +files = src/borg/**/*.c +bears = CPPCheckBear + +[all.html] +files = src/borg/**/*.html +bears = HTMLLintBear +htmllint_ignore = * diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..81d95c43 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +# EditorConfig is awesome: http://EditorConfig.org + +root = true + +[*] +end_of_line = lf +charset = utf-8 +indent_style = space +indent_size = 4 +insert_final_newline = true +trim_trailing_whitespace = true diff --git a/.gitattributes b/.gitattributes index a97e7297..2657d942 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,5 @@ borg/_version.py export-subst + +*.py diff=python +docs/usage/*.rst.inc merge=ours +docs/man/* merge=ours diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE new file mode 100644 index 00000000..f01ff53c --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE @@ -0,0 +1,8 @@ +Thank you for contributing code to Borg, your help is appreciated! + +Please, before you submit a pull request, make sure it complies with the +guidelines given in our documentation: + +https://borgbackup.readthedocs.io/en/latest/development.html#contributions + +**Please remove all above text before submitting your pull request.** diff --git a/.gitignore b/.gitignore index ab9f0f3b..11c906d8 100644 --- a/.gitignore +++ b/.gitignore @@ -4,26 +4,28 @@ build dist borg-env .tox -hashindex.c -chunker.c -compress.c -crypto.c -platform_darwin.c -platform_freebsd.c -platform_linux.c -platform_windows.c -platform_posix.c +src/borg/compress.c +src/borg/crypto/low_level.c +src/borg/hashindex.c +src/borg/item.c +src/borg/chunker.c +src/borg/algorithms/checksums.c +src/borg/platform/darwin.c +src/borg/platform/freebsd.c +src/borg/platform/linux.c +src/borg/platform/posix.c +src/borg/platform/windows.c +src/borg/_version.py *.egg-info *.pyc -*.pyo *.so .idea/ .cache/ -src/borg/_version.py borg.build/ borg.dist/ borg.exe *.dll .coverage +.coverage.* .vagrant .eggs diff --git a/.mailmap b/.mailmap new file mode 100644 index 00000000..88d728a6 --- /dev/null +++ b/.mailmap @@ -0,0 +1,14 @@ +Abdel-Rahman +Brian Johnson +Carlo Teubner +Mark Edgington +Leo Famulari +Marian Beermann +Thomas Waldmann +Dan Christensen +Antoine Beaupré +Hartmut Goebel +Michael Gajda +Milkey Mouse +Ronny Pfannschmidt +Stefan Tatschner diff --git a/.travis.yml b/.travis.yml index 853ad541..8f3d9b2e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,37 +6,45 @@ cache: directories: - $HOME/.cache/pip -# note: use py 3.5.2, it has lzma support. 3.5(.0) on travis.org/trusty does not. matrix: include: - - python: 3.4 - os: linux - dist: trusty - env: TOXENV=py34 - - python: 3.5.2 + - python: 3.5 os: linux dist: trusty env: TOXENV=py35 - - python: nightly + - python: 3.6 os: linux dist: trusty env: TOXENV=py36 - - python: 3.4 + - python: 3.5 os: linux dist: trusty env: TOXENV=flake8 - - language: generic - os: osx - osx_image: xcode6.4 - env: TOXENV=py34 + - python: "3.6-dev" + os: linux + dist: trusty + env: TOXENV=py36 - language: generic os: osx osx_image: xcode6.4 env: TOXENV=py35 - allow_failures: - - python: nightly + - language: generic + os: osx + osx_image: xcode6.4 + env: TOXENV=py36 + +before_install: +- | + test $TRAVIS_EVENT_TYPE != "pull_request" || { + echo Checking whether $TRAVIS_COMMIT_RANGE changed only docs + git diff --name-only $TRAVIS_COMMIT_RANGE | grep --quiet --invert-match --extended-regexp '(AUTHORS|README\.rst|^(docs)/)' || { + echo "Only docs were updated, stopping build process." + exit + } + } install: + - git fetch --unshallow --tags - ./.travis/install.sh script: diff --git a/.travis/install.sh b/.travis/install.sh index 1f86ee38..5bad1226 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -4,8 +4,6 @@ set -e set -x if [[ "$(uname -s)" == 'Darwin' ]]; then - brew update || brew update - if [[ "${OPENSSL}" != "0.9.8" ]]; then brew outdated openssl || brew upgrade openssl fi @@ -17,27 +15,33 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then brew install lz4 brew install xz # required for python lzma module brew outdated pyenv || brew upgrade pyenv + brew install pkg-config + brew install Caskroom/cask/osxfuse case "${TOXENV}" in - py34) - pyenv install 3.4.3 - pyenv global 3.4.3 - ;; py35) - pyenv install 3.5.1 - pyenv global 3.5.1 + pyenv install 3.5.2 + pyenv global 3.5.2 + ;; + py36) + pyenv install 3.6.0 + pyenv global 3.6.0 ;; esac pyenv rehash - python -m pip install --user 'virtualenv<14.0' + python -m pip install --user virtualenv else - pip install 'virtualenv<14.0' + pip install virtualenv + sudo apt-get update + sudo apt-get install -y fakeroot sudo apt-get install -y liblz4-dev sudo apt-get install -y libacl1-dev + sudo apt-get install -y libfuse-dev fuse pkg-config # optional, for FUSE support fi python -m virtualenv ~/.venv source ~/.venv/bin/activate pip install -r requirements.d/development.txt pip install codecov -pip install -e . +python setup.py --version +pip install -e .[fuse] diff --git a/.travis/upload_coverage.sh b/.travis/upload_coverage.sh index 4cb8273c..d0b54524 100755 --- a/.travis/upload_coverage.sh +++ b/.travis/upload_coverage.sh @@ -4,9 +4,8 @@ set -e set -x NO_COVERAGE_TOXENVS=(pep8) -if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then +if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ ${TOXENV} ]]; then source ~/.venv/bin/activate - ln .tox/.coverage .coverage # on osx, tests run as root, need access to .coverage sudo chmod 666 .coverage codecov -e TRAVIS_OS_NAME TOXENV diff --git a/AUTHORS b/AUTHORS index 2c795c50..bfb56cfe 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,5 +1,5 @@ -Borg Contributors ("The Borg Collective") -========================================= +Borg authors ("The Borg Collective") +------------------------------------ - Thomas Waldmann - Antoine Beaupré @@ -8,8 +8,10 @@ Borg Contributors ("The Borg Collective") - Michael Hanselmann - Teemu Toivanen - Marian Beermann +- Martin Hostettler - Daniel Reichelt - Lauri Niskanen +- Abdel-Rahman A. (Abogical) Borg is a fork of Attic. @@ -31,3 +33,27 @@ Attic Patches and Suggestions - Johann Klähn - Petros Moisiadis - Thomas Waldmann + +BLAKE2 +------ + +Borg includes BLAKE2: Copyright 2012, Samuel Neves , licensed under the terms +of the CC0, the OpenSSL Licence, or the Apache Public License 2.0. + +Slicing CRC32 +------------- + +Borg includes a fast slice-by-8 implementation of CRC32, Copyright 2011-2015 Stephan Brumme, +licensed under the terms of a zlib license. See http://create.stephan-brumme.com/crc32/ + +Folding CRC32 +------------- + +Borg includes an extremely fast folding implementation of CRC32, Copyright 2013 Intel Corporation, +licensed under the terms of the zlib license. + +xxHash +------ + +XXH64, a fast non-cryptographic hash algorithm. Copyright 2012-2016 Yann Collet, +licensed under a BSD 2-clause license. diff --git a/LICENSE b/LICENSE index 251e7027..1928806f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2015-2016 The Borg Collective (see AUTHORS file) +Copyright (C) 2015-2017 The Borg Collective (see AUTHORS file) Copyright (C) 2010-2014 Jonas Borgström All rights reserved. @@ -16,14 +16,14 @@ are met: products derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN -IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in index f58b579a..bdabc64d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,10 +1,8 @@ include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in -graft src -recursive-exclude src *.pyc -recursive-exclude src *.pyo -recursive-include docs * -recursive-exclude docs *.pyc -recursive-exclude docs *.pyo -prune docs/_build +exclude .coafile .coveragerc .gitattributes .gitignore .travis.yml Vagrantfile prune .travis -exclude .coveragerc .gitattributes .gitignore .travis.yml Vagrantfile +prune .github +graft src +graft docs +prune docs/_build +global-exclude *.pyc *.orig *.so *.dll diff --git a/README.rst b/README.rst index 035a38d9..6762017f 100644 --- a/README.rst +++ b/README.rst @@ -1,9 +1,9 @@ -|screencast| +|screencast_basic| -.. highlight:: bash +More screencasts: `installation`_, `advanced usage`_ What is BorgBackup? -=================== +------------------- BorgBackup (short: Borg) is a deduplicating backup program. Optionally, it supports compression and authenticated encryption. @@ -20,27 +20,31 @@ downloaded Borg, ``docs/installation.rst`` to get started with Borg. .. _installation manual: https://borgbackup.readthedocs.org/en/stable/installation.html Main features -------------- +~~~~~~~~~~~~~ **Space efficient storage** Deduplication based on content-defined chunking is used to reduce the number of bytes stored: each file is split into a number of variable length chunks and only chunks that have never been seen before are added to the repository. + A chunk is considered duplicate if its id_hash value is identical. + A cryptographically strong hash or MAC function is used as id_hash, e.g. + (hmac-)sha256. + To deduplicate, all the chunks in the same repository are considered, no matter whether they come from different machines, from previous backups, from the same backup or even from the same single file. Compared to other deduplication approaches, this method does NOT depend on: - * file/directory names staying the same: So you can move your stuff around + * file/directory names staying the same: So you can move your stuff around without killing the deduplication, even between machines sharing a repo. - * complete files or time stamps staying the same: If a big file changes a - little, only a few new chunks need to be stored - this is great for VMs or + * complete files or time stamps staying the same: If a big file changes a + little, only a few new chunks need to be stored - this is great for VMs or raw disks. - * The absolute position of a data chunk inside a file: Stuff may get shifted + * The absolute position of a data chunk inside a file: Stuff may get shifted and will still be found by the deduplication algorithm. **Speed** @@ -75,24 +79,25 @@ Main features * FreeBSD * OpenBSD and NetBSD (no xattrs/ACLs support or binaries yet) * Cygwin (not supported, no binaries yet) + * Linux Subsystem of Windows 10 (not supported) **Free and Open Source Software** * security and functionality can be audited independently - * licensed under the BSD (3-clause) license - + * licensed under the BSD (3-clause) license, see `License`_ for the + complete license Easy to use ------------ +~~~~~~~~~~~ -Initialize a new backup repository and create a backup archive:: +Initialize a new backup repository (see ``borg init --help`` for encryption options):: + + $ borg init -e repokey /path/to/repo + +Create a backup archive:: - $ borg init /path/to/repo $ borg create /path/to/repo::Saturday1 ~/Documents -Now doing another backup, just to show off the great deduplication: - -.. code-block:: none - :emphasize-lines: 11 +Now doing another backup, just to show off the great deduplication:: $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents ----------------------------------------------------------------------------- @@ -114,51 +119,32 @@ Now doing another backup, just to show off the great deduplication: For a graphical frontend refer to our complementary project `BorgWeb `_. +Helping, Donations and Bounties +------------------------------- + +Your help is always welcome! +Spread the word, give feedback, help with documentation, testing or development. + +You can also give monetary support to the project, see there for details: + +https://borgbackup.readthedocs.io/en/stable/support.html#bounties-and-fundraisers + Links -===== +----- * `Main Web Site `_ * `Releases `_, `PyPI packages `_ and `ChangeLog `_ -* `GitHub `_, - `Issue Tracker `_ and - `Bounties & Fundraisers `_ +* `GitHub `_ and + `Issue Tracker `_. * `Web-Chat (IRC) `_ and `Mailing List `_ * `License `_ +* `Security contact `_ -Notes ------ - -Borg is a fork of `Attic`_ and maintained by "`The Borg collective`_". - -.. _Attic: https://github.com/jborg/attic -.. _The Borg collective: https://borgbackup.readthedocs.org/en/latest/authors.html - -Differences between Attic and Borg -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Here's a (incomplete) list of some major changes: - -* more open, faster paced development (see `issue #1 `_) -* lots of attic issues fixed (see `issue #5 `_) -* less chunk management overhead (less memory and disk usage for chunks index) -* faster remote cache resync (useful when backing up multiple machines into same repo) -* compression: no, lz4, zlib or lzma compression, adjustable compression levels -* repokey replaces problematic passphrase mode (you can't change the passphrase nor the pbkdf2 iteration count in "passphrase" mode) -* simple sparse file support, great for virtual machine disk files -* can read special files (e.g. block devices) or from stdin, write to stdout -* mkdir-based locking is more compatible than attic's posix locking -* uses fadvise to not spoil / blow up the fs cache -* better error messages / exception handling -* better logging, screen output, progress indication -* tested on misc. Linux systems, 32 and 64bit, FreeBSD, OpenBSD, NetBSD, Mac OS X - -Please read the `ChangeLog`_ (or ``docs/changes.rst`` in the source distribution) for more -information. - -BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC (but there is a one-way conversion). +Compatibility notes +------------------- EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER CHANGES (like when going from 0.x.y to 1.0.0 or from 1.x.y to 2.0.0). @@ -167,9 +153,16 @@ NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. -Borg is distributed under a 3-clause BSD license, see `License`_ for the complete license. +Security issues should be reported to the `Security contact`_ (or +see ``docs/suppport.rst`` in the source distribution). -|doc| |build| |coverage| +.. start-badges + +|doc| |build| |coverage| |bestpractices| |bounties| + +.. |bounties| image:: https://api.bountysource.com/badge/team?team_id=78284&style=bounties_posted + :alt: Bounty Source + :target: https://www.bountysource.com/teams/borgbackup .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable :alt: Documentation @@ -183,6 +176,16 @@ Borg is distributed under a 3-clause BSD license, see `License`_ for the complet :alt: Test Coverage :target: https://codecov.io/github/borgbackup/borg?branch=master -.. |screencast| image:: https://asciinema.org/a/28691.png - :alt: BorgBackup Installation and Basic Usage - :target: https://asciinema.org/a/28691?autoplay=1&speed=2 +.. |screencast_basic| image:: https://asciinema.org/a/133292.png + :alt: BorgBackup Basic Usage + :target: https://asciinema.org/a/133292?autoplay=1&speed=1 + +.. _installation: https://asciinema.org/a/133291?autoplay=1&speed=1 + +.. _advanced usage: https://asciinema.org/a/133293?autoplay=1&speed=1 + +.. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge + :alt: Best Practices Score + :target: https://bestpractices.coreinfrastructure.org/projects/271 + +.. end-badges diff --git a/Vagrantfile b/Vagrantfile index e3056c33..393e8ba5 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -3,154 +3,43 @@ # Automated creation of testing environments / binaries on misc. platforms -def packages_prepare_wheezy - return <<-EOF - # debian 7 wheezy does not have lz4, but it is available from wheezy-backports: - echo "deb http://http.debian.net/debian wheezy-backports main" > /etc/apt/sources.list.d/wheezy-backports.list - EOF -end +$cpus = Integer(ENV.fetch('VMCPUS', '4')) # create VMs with that many cpus +$xdistn = Integer(ENV.fetch('XDISTN', '4')) # dispatch tests to that many pytest workers +$wmem = $xdistn * 256 # give the VM additional memory for workers [MB] -def packages_debianoid +def packages_debianoid(user) return <<-EOF apt-get update # install all the (security and other) updates apt-get dist-upgrade -y # for building borgbackup and dependencies: apt-get install -y libssl-dev libacl1-dev liblz4-dev libfuse-dev fuse pkg-config - usermod -a -G fuse vagrant + usermod -a -G fuse #{user} + chgrp fuse /dev/fuse + chmod 666 /dev/fuse apt-get install -y fakeroot build-essential git apt-get install -y python3-dev python3-setuptools # for building python: apt-get install -y zlib1g-dev libbz2-dev libncurses5-dev libreadline-dev liblzma-dev libsqlite3-dev - # this way it works on older dists (like ubuntu 12.04) also: - # for python 3.2 on ubuntu 12.04 we need pip<8 and virtualenv<14 as - # newer versions are not compatible with py 3.2 any more. - easy_install3 'pip<8.0' - pip3 install 'virtualenv<14.0' - touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile + easy_install3 'pip' + pip3 install 'virtualenv' EOF end -def packages_redhatted +def packages_arch return <<-EOF - yum install -y epel-release - yum update -y - # for building borgbackup and dependencies: - yum install -y openssl-devel openssl libacl-devel libacl lz4-devel fuse-devel fuse pkgconfig - usermod -a -G fuse vagrant - yum install -y fakeroot gcc git patch - # needed to compile msgpack-python (otherwise it will use slow fallback code): - yum install -y gcc-c++ - # for building python: - yum install -y zlib-devel bzip2-devel ncurses-devel readline-devel xz xz-devel sqlite-devel - #yum install -y python-pip - #pip install virtualenv - touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile - EOF -end - -def packages_darwin - return <<-EOF - # install all the (security and other) updates - sudo softwareupdate --install --all - # get osxfuse 3.x pre-release code from github: - curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.3.3/osxfuse-3.3.3.dmg >osxfuse.dmg - MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \ - && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.3.3.pkg" -target / - sudo chown -R vagrant /usr/local # brew must be able to create stuff here - ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" - brew update - brew install openssl - brew install lz4 - brew install xz # required for python lzma module - brew install fakeroot - brew install git - brew install pkgconfig - touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile - EOF -end - -def packages_freebsd - return <<-EOF - # install all the (security and other) updates, base system - freebsd-update --not-running-from-cron fetch install - # for building borgbackup and dependencies: - pkg install -y openssl liblz4 fusefs-libs pkgconf - pkg install -y fakeroot git bash - # for building python: - pkg install -y sqlite3 - # make bash default / work: - chsh -s bash vagrant - mount -t fdescfs fdesc /dev/fd - echo 'fdesc /dev/fd fdescfs rw 0 0' >> /etc/fstab - # make FUSE work - echo 'fuse_load="YES"' >> /boot/loader.conf - echo 'vfs.usermount=1' >> /etc/sysctl.conf - kldload fuse - sysctl vfs.usermount=1 - pw groupmod operator -M vagrant - touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile - # install all the (security and other) updates, packages - pkg update - yes | pkg upgrade - EOF -end - -def packages_openbsd - return <<-EOF - . ~/.profile - mkdir -p /home/vagrant/borg - rsync -aH /vagrant/borg/ /home/vagrant/borg/ - rm -rf /vagrant/borg - ln -sf /home/vagrant/borg /vagrant/ - pkg_add bash - chsh -s /usr/local/bin/bash vagrant - pkg_add openssl - pkg_add lz4 - # pkg_add fuse # does not install, sdl dependency missing - pkg_add git # no fakeroot - pkg_add py3-setuptools - ln -sf /usr/local/bin/python3.4 /usr/local/bin/python3 - ln -sf /usr/local/bin/python3.4 /usr/local/bin/python - easy_install-3.4 pip - pip3 install virtualenv - touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile - EOF -end - -def packages_netbsd - return <<-EOF - hostname netbsd # the box we use has an invalid hostname - PKG_PATH="ftp://ftp.NetBSD.org/pub/pkgsrc/packages/NetBSD/amd64/6.1.5/All/" - export PKG_PATH - pkg_add mozilla-rootcerts lz4 git bash - chsh -s bash vagrant - mkdir -p /usr/local/opt/lz4/include - mkdir -p /usr/local/opt/lz4/lib - ln -s /usr/pkg/include/lz4*.h /usr/local/opt/lz4/include/ - ln -s /usr/pkg/lib/liblz4* /usr/local/opt/lz4/lib/ - touch /etc/openssl/openssl.cnf # avoids a flood of "can't open ..." - mozilla-rootcerts install - pkg_add pkg-config # avoids some "pkg-config missing" error msg, even without fuse - # pkg_add fuse # llfuse supports netbsd, but is still buggy. - # https://bitbucket.org/nikratio/python-llfuse/issues/70/perfuse_open-setsockopt-no-buffer-space - pkg_add python34 py34-setuptools - ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python - ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python3 - easy_install-3.4 pip - pip install virtualenv - touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile + chown vagrant.vagrant /vagrant + pacman --sync --noconfirm python-virtualenv python-pip EOF end def install_pyenv(boxname) return <<-EOF curl -s -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash - echo 'export PATH="$HOME/.pyenv/bin:/vagrant/borg:$PATH"' >> ~/.bash_profile + echo 'export PATH="$HOME/.pyenv/bin:$PATH"' >> ~/.bash_profile echo 'eval "$(pyenv init -)"' >> ~/.bash_profile echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bash_profile echo 'export PYTHON_CONFIGURE_OPTS="--enable-shared"' >> ~/.bash_profile - echo 'export LANG=en_US.UTF-8' >> ~/.bash_profile EOF end @@ -163,9 +52,9 @@ end def install_pythons(boxname) return <<-EOF . ~/.bash_profile - pyenv install 3.4.0 # tests pyenv install 3.5.0 # tests - pyenv install 3.5.2 # binary build, use latest 3.5.x release + pyenv install 3.6.0 # tests + pyenv install 3.6.2 # binary build, use latest 3.6.x release pyenv rehash EOF end @@ -182,71 +71,50 @@ def build_pyenv_venv(boxname) return <<-EOF . ~/.bash_profile cd /vagrant/borg - # use the latest 3.5 release - pyenv global 3.5.2 - pyenv virtualenv 3.5.2 borg-env + # use the latest 3.6 release + pyenv global 3.6.2 + pyenv virtualenv 3.6.2 borg-env ln -s ~/.pyenv/versions/borg-env . EOF end -def install_borg(boxname) - return <<-EOF +def install_borg(fuse) + script = <<-EOF . ~/.bash_profile cd /vagrant/borg . borg-env/bin/activate pip install -U wheel # upgrade wheel, too old for 3.5 cd borg # clean up (wrong/outdated) stuff we likely got via rsync: - rm -f borg/*.so borg/*.cpy* - rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c - rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__ + rm -rf __pycache__ + find src -name '__pycache__' -exec rm -rf {} \\; pip install -r requirements.d/development.txt - # by using [fuse], setup.py can handle different fuse requirements: - pip install -e .[fuse] + python setup.py clean EOF + if fuse + script += <<-EOF + # by using [fuse], setup.py can handle different FUSE requirements: + pip install -e .[fuse] + EOF + else + script += <<-EOF + pip install -e . + # do not install llfuse into the virtualenvs built by tox: + sed -i.bak '/fuse.txt/d' tox.ini + EOF + end + return script end -def install_borg_no_fuse(boxname) +def install_pyinstaller() return <<-EOF . ~/.bash_profile cd /vagrant/borg . borg-env/bin/activate - pip install -U wheel # upgrade wheel, too old for 3.5 - cd borg - # clean up (wrong/outdated) stuff we likely got via rsync: - rm -f borg/*.so borg/*.cpy* - rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c - rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__ - pip install -r requirements.d/development.txt - pip install -e . - EOF -end - -def install_pyinstaller(boxname) - return <<-EOF - . ~/.bash_profile - cd /vagrant/borg - . borg-env/bin/activate - git clone https://github.com/pyinstaller/pyinstaller.git + git clone https://github.com/thomaswaldmann/pyinstaller.git cd pyinstaller - git checkout v3.1.1 - pip install -e . - EOF -end - -def install_pyinstaller_bootloader(boxname) - return <<-EOF - . ~/.bash_profile - cd /vagrant/borg - . borg-env/bin/activate - git clone https://github.com/pyinstaller/pyinstaller.git - cd pyinstaller - git checkout v3.1.1 - # build bootloader, if it is not included - cd bootloader - python ./waf all - cd .. - pip install -e . + git checkout v3.3-fixed + python setup.py install EOF end @@ -256,7 +124,8 @@ def build_binary_with_pyinstaller(boxname) cd /vagrant/borg . borg-env/bin/activate cd borg - pyinstaller -F -n borg.exe --distpath=/vagrant/borg --clean src/borg/__main__.py --hidden-import=borg.platform.posix + pyinstaller --clean --distpath=/vagrant/borg scripts/borg.exe.spec + echo 'export PATH="/vagrant/borg:$PATH"' >> ~/.bash_profile EOF end @@ -265,12 +134,13 @@ def run_tests(boxname) . ~/.bash_profile cd /vagrant/borg/borg . ../borg-env/bin/activate - if which pyenv > /dev/null; then + if which pyenv 2> /dev/null; then # for testing, use the earliest point releases of the supported python versions: - pyenv global 3.4.0 3.5.0 + pyenv global 3.5.0 3.6.0 + pyenv local 3.5.0 3.6.0 fi # otherwise: just use the system python - if which fakeroot > /dev/null; then + if which fakeroot 2> /dev/null; then echo "Running tox WITH fakeroot -u" fakeroot -u tox --skip-missing-interpreters else @@ -280,242 +150,65 @@ def run_tests(boxname) EOF end -def fix_perms +def fs_init(user) return <<-EOF - # . ~/.profile - chown -R vagrant /vagrant/borg - EOF -end - -def windows_requirements - return <<-EOF - $url = "https://eternallybored.org/misc/wget/releases/wget-1.18-win64.zip" - (New-Object System.Net.WebClient).DownloadFile($url, "./wget.zip") - & { Add-Type -A 'System.IO.Compression.FileSystem'; [IO.Compression.ZipFile]::ExtractToDirectory('wget.zip', './wget'); } - wget\\wget.exe "http://gnuwin32.sourceforge.net/downlinks/tar-bin-zip.php" -O tar-bin.zip - wget\\wget.exe "http://gnuwin32.sourceforge.net/downlinks/tar-dep-zip.php" -O tar-dep.zip - & { Add-Type -A 'System.IO.Compression.FileSystem'; [IO.Compression.ZipFile]::ExtractToDirectory('tar-bin.zip', './tar'); } - & { Add-Type -A 'System.IO.Compression.FileSystem'; [IO.Compression.ZipFile]::ExtractToDirectory('tar-dep.zip', './tar'); } - wget\\wget.exe http://tukaani.org/xz/xz-5.2.1-windows.zip -O xz.zip - & { Add-Type -A 'System.IO.Compression.FileSystem'; [IO.Compression.ZipFile]::ExtractToDirectory('xz.zip', './xz'); } - wget\\wget.exe -Axz "https://sourceforge.net/projects/msys2/files/Base/x86_64/msys2-base-x86_64-20160205.tar.xz/download" -O msys.tar.xz - xz\\bin_x86-64\\xz -d msys.tar.xz - tar\\bin\\tar -xf msys.tar - msys64\\msys2.exe -c | more - Start-Sleep -s 10 - msys64\\usr\\bin\\pacman --noconfirm -S mingw-w64-x86_64-python3 git mingw-w64-x86_64-lz4 mingw-w64-x86_64-python3-pip mingw-w64-x86_64-cython mingw-w64-x86_64-gcc mingw-w64-x86_64-ntldd-git - python3 -m pip install --upgrade pip - EOF -end - -def windows_build - return <<-EOF - $msyspath = $(get-location).path + "\\msys64" - $Env:Path = $Env:Path + ";" + $msyspath + "\\mingw64\\bin;" + $msyspath + "\\usr\\bin" - net use x: \\\\vboxsvr\\vagrant - x: - pip3 install -r requirements.d/development.txt - pip3 install -e . - python3 deployment/windows/buildwin32.py + chown -R #{user} /vagrant/borg + touch ~#{user}/.bash_profile ; chown #{user} ~#{user}/.bash_profile + echo 'export LANG=en_US.UTF-8' >> ~#{user}/.bash_profile + echo 'export LC_CTYPE=en_US.UTF-8' >> ~#{user}/.bash_profile + echo 'export XDISTN=#{$xdistn}' >> ~#{user}/.bash_profile EOF end Vagrant.configure(2) do |config| + # use rsync to copy content to the folder + config.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"], :rsync__chown => false + # do not let the VM access . on the host machine via the default shared folder! + config.vm.synced_folder ".", "/vagrant", disabled: true + config.vm.provider :virtualbox do |v| #v.gui = true - v.cpus = 1 - end - - # Linux - config.vm.define "centos7_64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "centos/7" - b.vm.provider :virtualbox do |v| - v.memory = 768 - end - b.vm.provision "install system packages", :type => :shell, :inline => packages_redhatted - b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos7_64") - b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos7_64") - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos7_64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos7_64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos7_64") - end - - config.vm.define "centos6_32" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "centos6-32" - b.vm.provision "install system packages", :type => :shell, :inline => packages_redhatted - b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_32") - b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_32") - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_32") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_32") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_32") - end - - config.vm.define "centos6_64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "centos6-64" - b.vm.provider :virtualbox do |v| - v.memory = 768 - end - b.vm.provision "install system packages", :type => :shell, :inline => packages_redhatted - b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_64") - b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_64") - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_64") + v.cpus = $cpus end config.vm.define "xenial64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms b.vm.box = "ubuntu/xenial64" b.vm.provider :virtualbox do |v| - v.memory = 768 + v.memory = 1024 + $wmem end - b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("trusty64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("trusty64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("trusty64") + b.vm.provision "fs init", :type => :shell, :inline => fs_init("ubuntu") + b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid("ubuntu") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("xenial64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("xenial64") end - config.vm.define "trusty64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "ubuntu/trusty64" + config.vm.define "stretch64" do |b| + b.vm.box = "debian/stretch64" b.vm.provider :virtualbox do |v| - v.memory = 768 + v.memory = 1024 + $wmem end - b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("trusty64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("trusty64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("trusty64") + b.vm.provision "fs init", :type => :shell, :inline => fs_init("vagrant") + b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid("vagrant") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("stretch64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller() + b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("stretch64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("stretch64") end - config.vm.define "jessie64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "debian/jessie64" + config.vm.define "arch64" do |b| + b.vm.box = "terrywang/archlinux" b.vm.provider :virtualbox do |v| - v.memory = 768 + v.memory = 1024 + $wmem end - b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("jessie64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("jessie64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("jessie64") + b.vm.provision "fs init", :type => :shell, :inline => fs_init("vagrant") + b.vm.provision "packages arch", :type => :shell, :privileged => true, :inline => packages_arch + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("arch64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("arch64") end - config.vm.define "wheezy32" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "boxcutter/debian79-i386" - b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy - b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32") - b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy32") - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy32") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy32") - b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy32") - b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy32") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy32") - end - - config.vm.define "wheezy64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "boxcutter/debian79" - b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy - b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64") - b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy64") - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy64") - b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy64") - b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy64") - end - - # OS X - config.vm.define "darwin64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "jhcook/yosemite-clitools" - b.vm.provision "packages darwin", :type => :shell, :privileged => false, :inline => packages_darwin - b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("darwin64") - b.vm.provision "fix pyenv", :type => :shell, :privileged => false, :inline => fix_pyenv_darwin("darwin64") - b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("darwin64") - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("darwin64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("darwin64") - b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("darwin64") - b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("darwin64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("darwin64") - end - - # BSD - config.vm.define "freebsd64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "geoffgarside/freebsd-10.2" - b.vm.provider :virtualbox do |v| - v.memory = 768 - end - b.vm.provision "install system packages", :type => :shell, :inline => packages_freebsd - b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("freebsd") - b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("freebsd") - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("freebsd") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("freebsd") - b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller_bootloader("freebsd") - b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("freebsd") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd") - end - - config.vm.define "openbsd64" do |b| - b.vm.box = "kaorimatz/openbsd-5.9-amd64" - b.vm.provider :virtualbox do |v| - v.memory = 768 - end - b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("openbsd64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("openbsd64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("openbsd64") - end - - config.vm.define "netbsd64" do |b| - b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] - b.vm.synced_folder ".", "/vagrant", disabled: true - b.vm.provision "fix perms", :type => :shell, :inline => fix_perms - b.vm.box = "alex-skimlinks/netbsd-6.1.5-amd64" - b.vm.provider :virtualbox do |v| - v.memory = 768 - end - b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("netbsd64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("netbsd64") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("netbsd64") - end - - config.vm.define "windows" do |b| - b.vm.communicator = "winrm" - # Defaults for vagrant box from Microsoft - # b.winrm.username = "IEUser" - # b.winrm.password = "Passw0rd!" - # b.ssh.username = "IEUser" - # b.ssh.password = "Passw0rd!" - b.vm.box = "joeapearson/win10-x64" - b.vm.provision "install msys2", :type => :shell, :privileged => false, :inline => windows_requirements - b.vm.provision "build", :type => :shell, :privileged => false, :inline => windows_build - end + # TODO: create more VMs with python 3.5+ and openssl 1.1. + # See branch 1.1-maint for a better equipped Vagrantfile (but still on py34 and openssl 1.0). end diff --git a/appveyor.yml b/appveyor.yml index 805a956e..8cf5fa49 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,8 +1,5 @@ version: '{build}' -init: - - ps: Update-AppveyorBuild -Version "1.0.3-$($env:appveyor_repo_commit.substring(0,7))" - environment: matrix: - compiler: msys2 @@ -16,6 +13,8 @@ install: - 'pip3 install -r requirements.d/development.txt' - 'pip3 install setuptools_scm' build_script: + - ps: $version = git describe --long + - ps: Update-AppveyorBuild -Version $version - 'pip3 install -e .' - 'python3 deployment/windows/buildwin32.py' test_script: diff --git a/conftest.py b/conftest.py index b5f9d982..cc428be1 100644 --- a/conftest.py +++ b/conftest.py @@ -1,23 +1,79 @@ +import os + +import pytest + +# IMPORTANT keep this above all other borg imports to avoid inconsistent values +# for `from borg.constants import PBKDF2_ITERATIONS` (or star import) usages before +# this is executed +from borg import constants +# no fixture-based monkey-patching since star-imports are used for the constants module +constants.PBKDF2_ITERATIONS = 1 + + +# needed to get pretty assertion failures in unit tests: +if hasattr(pytest, 'register_assert_rewrite'): + pytest.register_assert_rewrite('borg.testsuite') + + +import borg.cache from borg.logger import setup_logging # Ensure that the loggers exist for all tests setup_logging() -from borg.testsuite import has_lchflags, no_lchlfags_because, has_llfuse -from borg.testsuite.platform import fakeroot_detected -from borg import xattr, constants +from borg.testsuite import has_lchflags, has_llfuse +from borg.testsuite import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported +from borg.testsuite.platform import fakeroot_detected, are_acls_working +from borg import xattr -def pytest_configure(config): - # no fixture-based monkey-patching since star-imports are used for the constants module - constants.PBKDF2_ITERATIONS = 1 +@pytest.fixture(autouse=True) +def clean_env(tmpdir_factory, monkeypatch): + # avoid that we access / modify the user's normal .config / .cache directory: + monkeypatch.setenv('XDG_CONFIG_HOME', tmpdir_factory.mktemp('xdg-config-home')) + monkeypatch.setenv('XDG_CACHE_HOME', tmpdir_factory.mktemp('xdg-cache-home')) + # also avoid to use anything from the outside environment: + keys = [key for key in os.environ if key.startswith('BORG_')] + for key in keys: + monkeypatch.delenv(key, raising=False) def pytest_report_header(config, startdir): - yesno = ['no', 'yes'] - flags = 'Testing BSD-style flags: %s %s' % (yesno[has_lchflags], no_lchlfags_because) - fakeroot = 'fakeroot: %s (>=1.20.2: %s)' % ( - yesno[fakeroot_detected()], - yesno[xattr.XATTR_FAKEROOT]) - llfuse = 'Testing fuse: %s' % yesno[has_llfuse] - return '\n'.join((flags, llfuse, fakeroot)) + tests = { + "BSD flags": has_lchflags, + "fuse": has_llfuse, + "root": not fakeroot_detected(), + "symlinks": are_symlinks_supported(), + "hardlinks": are_hardlinks_supported(), + "atime/mtime": is_utime_fully_supported(), + "modes": "BORG_TESTS_IGNORE_MODES" not in os.environ + } + enabled = [] + disabled = [] + for test in tests: + if tests[test]: + enabled.append(test) + else: + disabled.append(test) + output = "Tests enabled: " + ", ".join(enabled) + "\n" + output += "Tests disabled: " + ", ".join(disabled) + return output + + +class DefaultPatches: + def __init__(self, request): + self.org_cache_wipe_cache = borg.cache.LocalCache.wipe_cache + + def wipe_should_not_be_called(*a, **kw): + raise AssertionError("Cache wipe was triggered, if this is part of the test add @pytest.mark.allow_cache_wipe") + if 'allow_cache_wipe' not in request.keywords: + borg.cache.LocalCache.wipe_cache = wipe_should_not_be_called + request.addfinalizer(self.undo) + + def undo(self): + borg.cache.LocalCache.wipe_cache = self.org_cache_wipe_cache + + +@pytest.fixture(autouse=True) +def default_patches(request): + return DefaultPatches(request) diff --git a/deployment/windows/buildwin32.py b/deployment/windows/buildwin32.py index 1d215e61..01f7def2 100644 --- a/deployment/windows/buildwin32.py +++ b/deployment/windows/buildwin32.py @@ -10,6 +10,8 @@ import zipfile builddir = 'win32exe' +pythonversion = str(sys.version_info[0]) + '.' + str(sys.version_info[1]) + if os.path.exists(builddir): shutil.rmtree(builddir) os.mkdir(builddir) @@ -30,7 +32,7 @@ if gccpath == '': source = open('wrapper.c', 'w') source.write( """ -#include +#include #include #include #include @@ -59,12 +61,12 @@ int wmain(int argc , wchar_t *argv[] ) } """) source.close() -subprocess.check_call('g++ wrapper.c -lpython3.5m -lshlwapi -municode -o ' + builddir + '/borg.exe') +subprocess.check_call('g++ wrapper.c -lpython' + pythonversion + 'm -lshlwapi -municode -o ' + builddir + '/borg.exe') os.remove('wrapper.c') print('Searching modules') -modulepath = os.path.abspath(os.path.join(gccpath, '../lib/python3.5/')) +modulepath = os.path.abspath(os.path.join(gccpath, '../lib/python' + pythonversion + '/')) # Bundle all encodings - In theory user may use any encoding in command prompt for file in os.listdir(os.path.join(modulepath, 'encodings')): @@ -76,7 +78,8 @@ finder.run_script('src/borg/__main__.py') # For some reason modulefinder does not find these, add them manually extramodules = [os.path.join(modulepath, 'site.py'), os.path.join(modulepath, 'encodings/idna.py'), - os.path.join(modulepath, 'stringprep.py')] + os.path.join(modulepath, 'stringprep.py'), os.path.join(modulepath, 'ctypes/wintypes.py'), + os.path.join(modulepath, 'lib-dynload/_sysconfigdata_m_win32_.py')] for module in extramodules: finder.run_script(module) @@ -101,6 +104,7 @@ def finddlls(exe): re.append(dll) return re + items = finder.modules.items() for name, mod in items: file = mod.__file__ @@ -113,7 +117,7 @@ for name, mod in items: os.makedirs(os.path.join(builddir, 'bin', os.path.split(relpath)[0]), exist_ok=True) shutil.copyfile(file, os.path.join(builddir, 'bin', relpath)) else: - relativepath = file[file.find('lib')+len('lib/python3.5/'):] + relativepath = file[file.find('lib')+len('lib/python' + pythonversion + '/'):] if 'encodings' in file: continue if relativepath not in library.namelist(): @@ -130,10 +134,13 @@ for dll in finddlls(os.path.join(builddir, "borg.exe")): shutil.copyfile(dll, os.path.join(builddir, os.path.split(dll)[1])) shutil.copyfile(os.path.join('src', 'borg', '__main__.py'), os.path.join(builddir, 'bin', 'borg', '__main__.py')) -library.write(os.path.join(modulepath, 'site.py'), 'site.py') +library.write(os.path.join(modulepath, 'lib-dynload/_sysconfigdata_m_win32_.py'), '_sysconfigdata_m_win32_.py') +library.write(os.path.join(modulepath, 'ctypes/wintypes.py'), 'ctypes/wintypes.py') -for extmodule in ['src/borg/chunker-cpython-35m.dll', 'src/borg/compress-cpython-35m.dll', - 'src/borg/crypto-cpython-35m.dll', 'src/borg/hashindex-cpython-35m.dll']: +for extmodule in ['src/borg/chunker-cpython-' + str(sys.version_info[0]) + str(sys.version_info[1]) + 'm.dll', + 'src/borg/compress-cpython-' + str(sys.version_info[0]) + str(sys.version_info[1]) + 'm.dll', + 'src/borg/item-cpython-' + str(sys.version_info[0]) + str(sys.version_info[1]) + 'm.dll', + 'src/borg/hashindex-cpython-' + str(sys.version_info[0]) + str(sys.version_info[1]) + 'm.dll']: for dll in finddlls(extmodule): if builddir not in dll: shutil.copyfile(dll, os.path.join(builddir, os.path.split(dll)[1])) diff --git a/docs/_static/Makefile b/docs/_static/Makefile new file mode 100644 index 00000000..6ede1346 --- /dev/null +++ b/docs/_static/Makefile @@ -0,0 +1,11 @@ + +all: logo.pdf logo.png + +logo.pdf: logo.svg + inkscape logo.svg --export-pdf=logo.pdf + +logo.png: logo.svg + inkscape logo.svg --export-png=logo.png --export-dpi=72,72 + +clean: + rm -f logo.pdf logo.png diff --git a/docs/_static/logo.pdf b/docs/_static/logo.pdf new file mode 100644 index 00000000..0b24bd79 --- /dev/null +++ b/docs/_static/logo.pdf @@ -0,0 +1,72 @@ +%PDF-1.4 +% +3 0 obj +<< /Length 4 0 R + /Filter /FlateDecode +>> +stream +x}TAr! +_@PyB!{H5ʞi + +'?ƏvI0&t=ގZ=N*Q,J|DP ? ÜA-\ +$k@ֽdхY3ko6T_,30^'w4=BJ5~AƑ`3WhF%$ acܒļRx|k]MX)`xLpRZ:>4V'}8Y( G||⥾ hs?$ J}( VR%Že3鯵"4 5)`OĹ){-֗Y<˹uPm؎mmV)jVtn y"9C$.}_T~ԫwXx/QXj^}5Rx'b.n5}S +endstream +endobj +4 0 obj + 430 +endobj +2 0 obj +<< + /ExtGState << + /a0 << /CA 1 /ca 1 >> + >> +>> +endobj +5 0 obj +<< /Type /Page + /Parent 1 0 R + /MediaBox [ 0 0 240 100 ] + /Contents 3 0 R + /Group << + /Type /Group + /S /Transparency + /I true + /CS /DeviceRGB + >> + /Resources 2 0 R +>> +endobj +1 0 obj +<< /Type /Pages + /Kids [ 5 0 R ] + /Count 1 +>> +endobj +6 0 obj +<< /Creator (cairo 1.14.8 (http://cairographics.org)) + /Producer (cairo 1.14.8 (http://cairographics.org)) +>> +endobj +7 0 obj +<< /Type /Catalog + /Pages 1 0 R +>> +endobj +xref +0 8 +0000000000 65535 f +0000000830 00000 n +0000000544 00000 n +0000000015 00000 n +0000000522 00000 n +0000000616 00000 n +0000000895 00000 n +0000001022 00000 n +trailer +<< /Size 8 + /Root 7 0 R + /Info 6 0 R +>> +startxref +1074 +%%EOF diff --git a/docs/_static/logo.png b/docs/_static/logo.png index 000191b7..39a47ea4 100644 Binary files a/docs/_static/logo.png and b/docs/_static/logo.png differ diff --git a/docs/_static/logo.svg b/docs/_static/logo.svg new file mode 100644 index 00000000..8117facb --- /dev/null +++ b/docs/_static/logo.svg @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/docs/_templates/globaltoc.html b/docs/_templates/globaltoc.html new file mode 100644 index 00000000..d10f5f77 --- /dev/null +++ b/docs/_templates/globaltoc.html @@ -0,0 +1,20 @@ + diff --git a/docs/_templates/logo-text.html b/docs/_templates/logo-text.html new file mode 100644 index 00000000..dde5c929 --- /dev/null +++ b/docs/_templates/logo-text.html @@ -0,0 +1,5 @@ + diff --git a/docs/api.rst b/docs/api.rst deleted file mode 100644 index 8dfc8cce..00000000 --- a/docs/api.rst +++ /dev/null @@ -1,80 +0,0 @@ -.. highlight:: python - -API Documentation -================= - -.. automodule:: borg.archiver - :members: - :undoc-members: - -.. automodule:: borg.archive - :members: - :undoc-members: - -.. automodule:: borg.repository - :members: - :undoc-members: - -.. automodule:: borg.remote - :members: - :undoc-members: - -.. automodule:: borg.cache - :members: - :undoc-members: - -.. automodule:: borg.key - :members: - :undoc-members: - -.. automodule:: borg.logger - :members: - :undoc-members: - -.. automodule:: borg.helpers - :members: - :undoc-members: - -.. automodule:: borg.locking - :members: - :undoc-members: - -.. automodule:: borg.shellpattern - :members: - :undoc-members: - -.. automodule:: borg.lrucache - :members: - :undoc-members: - -.. automodule:: borg.fuse - :members: - :undoc-members: - -.. automodule:: borg.xattr - :members: - :undoc-members: - -.. automodule:: borg.platform - :members: - :undoc-members: - -.. automodule:: borg.platform_linux - :members: - :undoc-members: - -.. automodule:: borg.hashindex - :members: - :undoc-members: - -.. automodule:: borg.compress - :members: get_compressor, Compressor, CompressorBase - :undoc-members: - -.. automodule:: borg.chunker - :members: - :undoc-members: - -.. automodule:: borg.crypto - :members: - :undoc-members: diff --git a/docs/authors.rst b/docs/authors.rst index c368035d..bca9b5de 100644 --- a/docs/authors.rst +++ b/docs/authors.rst @@ -1,5 +1,8 @@ .. include:: global.rst.inc +Authors +======= + .. include:: ../AUTHORS License diff --git a/docs/book.rst b/docs/book.rst new file mode 100644 index 00000000..a1a1b9fe --- /dev/null +++ b/docs/book.rst @@ -0,0 +1,19 @@ +.. include:: global.rst.inc + +Borg documentation +================== + +.. when you add an element here, do not forget to add it to index.rst +.. Note: Some things are in appendices (see latex_appendices in conf.py) + +.. toctree:: + :maxdepth: 2 + + introduction + installation + quickstart + usage + deployment + faq + internals + development diff --git a/docs/borg_theme/css/borg.css b/docs/borg_theme/css/borg.css index c4d8688f..f8f900b6 100644 --- a/docs/borg_theme/css/borg.css +++ b/docs/borg_theme/css/borg.css @@ -1,22 +1,181 @@ @import url("theme.css"); -/* The Return of the Borg. - * - * Have a bit green and grey and darkness (and if only in the upper left corner). - */ - -.wy-side-nav-search { - background-color: #000000 !important; -} - -.wy-side-nav-search > a { - color: rgba(255, 255, 255, 0.5); -} - -.wy-side-nav-search > div.version { - color: rgba(255, 255, 255, 0.5); -} - -#usage dt code { +dt code { font-weight: normal; } + +#internals .toctree-wrapper > ul { + column-count: 3; + -webkit-column-count: 3; +} + +#internals .toctree-wrapper > ul > li { + display: inline-block; + font-weight: bold; +} + +#internals .toctree-wrapper > ul > li > ul { + font-weight: normal; +} + +/* bootstrap has a .container class which clashes with docutils' container class. */ +.docutils.container { + width: auto; + margin: 0; + padding: 0; +} + +/* the default (38px) produces a jumpy baseline in Firefox on Linux. */ +h1 { + font-size: 36px; +} + +.text-logo { + background-color: #000200; + color: #00dd00; +} + +.text-logo:hover, +.text-logo:active, +.text-logo:focus { + color: #5afe57; +} + +/* by default the top and bottom margins are unequal which looks a bit unbalanced. */ +.sidebar-block { + padding: 0; + margin: 14px 0 24px 0; +} + +#borg-documentation h1 + p .external img { + width: 100%; +} + +.container.experimental, +#debugging-facilities, +#borg-recreate { + /* don't change text dimensions */ + margin: 0 -30px; /* padding below + border width */ + padding: 0 10px; /* 10 px visual margin between edge of text and the border */ + /* fallback for browsers that don't have repeating-linear-gradient: thick, red lines */ + border-left: 20px solid red; + border-right: 20px solid red; + /* fancy red stripes */ + border-image: repeating-linear-gradient( + -45deg,rgba(255,0,0,0.1) 0,rgba(255,0,0,0.75) 10px,rgba(0,0,0,0) 10px,rgba(0,0,0,0) 20px,rgba(255,0,0,0.75) 20px) 0 20 repeat; +} + +.topic { + margin: 0 1em; + padding: 0 1em; + /* #4e4a4a = background of the ToC sidebar */ + border-left: 2px solid #4e4a4a;; + border-right: 2px solid #4e4a4a;; +} + +table.docutils:not(.footnote) td, +table.docutils:not(.footnote) th { + padding: .2em; +} + +table.docutils:not(.footnote) { + border-collapse: collapse; + border: none; +} + +table.docutils:not(.footnote) td, +table.docutils:not(.footnote) th { + border: 1px solid #ddd; +} + +table.docutils:not(.footnote) tr:first-child th, +table.docutils:not(.footnote) tr:first-child td { + border-top: 0; +} + +table.docutils:not(.footnote) tr:last-child td { + border-bottom: 0; +} + +table.docutils:not(.footnote) tr td:first-child, +table.docutils:not(.footnote) tr th:first-child { + border-left: 0; +} + +table.docutils:not(.footnote) tr td:last-child, +table.docutils:not(.footnote) tr th:last-child, +table.docutils.borg-options-table tr td { + border-right: 0; +} + +table.docutils.option-list tr td, +table.docutils.borg-options-table tr td { + border-left: 0; + border-right: 0; +} + +table.docutils.borg-options-table tr td:first-child:not([colspan="3"]) { + border-top: 0; + border-bottom: 0; +} + +.borg-options-table td[colspan="3"] p { + margin: 0; +} + +.borg-options-table { + width: 100%; +} + +kbd, /* used in usage pages for options */ +code, +.rst-content tt.literal, +.rst-content tt.literal, +.rst-content code.literal, +.rst-content tt, +.rst-content code, +p .literal, +p .literal span { + border: none; + padding: 0; + color: black; /* slight contrast with #404040 of regular text */ + background: none; +} + +kbd { + box-shadow: none; + line-height: 23px; + word-wrap: normal; + font-size: 15px; + font-family: Consolas, monospace; +} + +.borg-options-table tr td:nth-child(2) .pre { + white-space: nowrap; +} + +.borg-options-table tr td:first-child { + width: 2em; +} + +cite { + white-space: nowrap; + color: black; /* slight contrast with #404040 of regular text */ + font-family: Consolas, "Andale Mono WT", "Andale Mono", "Lucida Console", "Lucida Sans Typewriter", + "DejaVu Sans Mono", "Bitstream Vera Sans Mono", "Liberation Mono", "Nimbus Mono L", Monaco, "Courier New", Courier, monospace; + font-style: normal; + text-decoration: underline; +} + +.borg-common-opt-ref { + font-weight: bold; +} + +.sidebar-toc ul li.toctree-l2 a, +.sidebar-toc ul li.toctree-l3 a { + padding-right: 25px; +} + +#common-options .option { + white-space: nowrap; +} diff --git a/docs/changes.rst b/docs/changes.rst index 18d50976..9d32ccfd 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,8 +1,85 @@ -Changelog -========= -Important note about pre-1.0.4 potential repo corruption --------------------------------------------------------- +.. _important_notes: + +Important notes +=============== + +This section provides information about security and corruption issues. + +.. _tam_vuln: + +Pre-1.0.9 manifest spoofing vulnerability (CVE-2016-10099) +---------------------------------------------------------- + +A flaw in the cryptographic authentication scheme in Borg allowed an attacker +to spoof the manifest. The attack requires an attacker to be able to + +1. insert files (with no additional headers) into backups +2. gain write access to the repository + +This vulnerability does not disclose plaintext to the attacker, nor does it +affect the authenticity of existing archives. + +The vulnerability allows an attacker to create a spoofed manifest (the list of archives). +Creating plausible fake archives may be feasible for small archives, but is unlikely +for large archives. + +The fix adds a separate authentication tag to the manifest. For compatibility +with prior versions this authentication tag is *not* required by default +for existing repositories. Repositories created with 1.0.9 and later require it. + +Steps you should take: + +1. Upgrade all clients to 1.0.9 or later. +2. Run ``borg upgrade --tam `` *on every client* for *each* repository. +3. This will list all archives, including archive IDs, for easy comparison with your logs. +4. Done. + +Prior versions can access and modify repositories with this measure enabled, however, +to 1.0.9 or later their modifications are indiscernible from an attack and will +raise an error until the below procedure is followed. We are aware that this can +be be annoying in some circumstances, but don't see a way to fix the vulnerability +otherwise. + +In case a version prior to 1.0.9 is used to modify a repository where above procedure +was completed, and now you get an error message from other clients: + +1. ``borg upgrade --tam --force `` once with *any* client suffices. + +This attack is mitigated by: + +- Noting/logging ``borg list``, ``borg info``, or ``borg create --stats``, which + contain the archive IDs. + +We are not aware of others having discovered, disclosed or exploited this vulnerability. + +Vulnerability time line: + +* 2016-11-14: Vulnerability and fix discovered during review of cryptography by Marian Beermann (@enkore) +* 2016-11-20: First patch +* 2016-12-20: Released fixed version 1.0.9 +* 2017-01-02: CVE was assigned +* 2017-01-15: Released fixed version 1.1.0b3 (fix was previously only available from source) + +.. _attic013_check_corruption: + +Pre-1.0.9 potential data loss +----------------------------- + +If you have archives in your repository that were made with attic <= 0.13 +(and later migrated to borg), running borg check would report errors in these +archives. See issue #1837. + +The reason for this is a invalid (and useless) metadata key that was +always added due to a bug in these old attic versions. + +If you run borg check --repair, things escalate quickly: all archive items +with invalid metadata will be killed. Due to that attic bug, that means all +items in all archives made with these old attic versions. + + +Pre-1.0.4 potential repo corruption +----------------------------------- Some external errors (like network or disk I/O errors) could lead to corruption of the backup repository due to issue #1138. @@ -49,63 +126,937 @@ The best check that everything is ok is to run a dry-run extraction:: borg extract -v --dry-run REPO::ARCHIVE +.. _changelog: -Version 1.1.0 (not released yet) --------------------------------- +Changelog +========= + +Version 1.2.0dev0 (not released yet) +------------------------------------ + +Compatibility notes: + +- dropped support and testing for Python 3.4, minimum requirement is 3.5.0. + In case your OS does not provide Python >= 3.5, consider using our binary, + which does not need an external Python interpreter. +- list: corrected mix-up of "isomtime" and "mtime" formats. Previously, + "isomtime" was the default but produced a verbose human format, + while "mtime" produced a ISO-8601-like format. + The behaviours have been swapped (so "mtime" is human, "isomtime" is ISO-like), + and the default is now "mtime". + "isomtime" is now a real ISO-8601 format ("T" between date and time, not a space). + +Version 1.1.0rc1 (2017-07-24) +----------------------------- + +Compatibility notes: + +- delete: removed short option for --cache-only New features: -- borg check: will not produce the "Checking segments" output unless - new --progress option is passed, #824. -- options that imply output (--show-rc, --show-version, --list, --stats, - --progress) don't need -v/--info to have that output displayed, #865 -- borg recreate: re-create existing archives, #787 #686 #630 #70, also see - #757, #770. +- support borg list repo --format {comment} {bcomment} {end}, #2081 +- key import: allow reading from stdin, #2760 - - selectively remove files/dirs from old archives - - re-compress data - - re-chunkify data, e.g. to have upgraded Attic / Borg 0.xx archives - deduplicate with Borg 1.x archives or to experiment with chunker-params. -- create: visit files in inode order (better speed, esp. for large directories - and rotating disks) -- borg diff: show differences between archives -- borg list improved: +Fixes: + +- with-lock: avoid creating segment files that might be overwritten later, #1867 +- prune: fix checkpoints processing with --glob-archives +- FUSE: versions view: keep original file extension at end, #2769 +- fix --last, --first: do not accept values <= 0, + fix reversed archive ordering with --last +- include testsuite data (attic.tar.gz) when installing the package +- use limited unpacker for outer key, for manifest (both security precautions), + #2174 #2175 +- fix bashism in shell scripts, #2820, #2816 +- cleanup endianness detection, create _endian.h, + fixes build on alpine linux, #2809 +- fix crash with --no-cache-sync (give known chunk size to chunk_incref), #2853 + +Other changes: + +- FUSE: versions view: linear numbering by archive time +- split up interval parsing from filtering for --keep-within, #2610 +- add a basic .editorconfig, #2734 +- use archive creation time as mtime for FUSE mount, #2834 +- upgrade FUSE for macOS (osxfuse) from 3.5.8 to 3.6.3, #2706 +- hashindex: speed up by replacing modulo with "if" to check for wraparound +- coala checker / pylint: fixed requirements and .coafile, more ignores +- borg upgrade: name backup directories as 'before-upgrade', #2811 +- add .mailmap +- some minor changes suggested by lgtm.com +- docs: + + - better explanation of the --ignore-inode option relevance, #2800 + - fix openSUSE command and add openSUSE section + - simplify ssh authorized_keys file using "restrict", add legacy note, #2121 + - mount: show usage of archive filters + - mount: add repository example, #2462 + - info: update and add examples, #2765 + - prune: include example + - improved style / formatting + - improved/fixed segments_per_dir docs + - recreate: fix wrong "remove unwanted files" example + - reference list of status chars in borg recreate --filter description + - update source-install docs about doc build dependencies, #2795 + - cleanup installation docs + - file system requirements, update segs per dir + - fix checkpoints/parts reference in FAQ, #2859 +- code: + + - hashindex: don't pass side effect into macro + - crypto low_level: don't mutate local bytes() + - use dash_open function to open file or "-" for stdin/stdout + - archiver: argparse cleanup / refactoring + - shellpattern: add match_end arg +- tests: added some additional unit tests, some fixes, #2700 #2710 +- vagrant: fix setup of cygwin, add Debian 9 "stretch" +- travis: don't perform full travis build on docs-only changes, #2531 + + +Version 1.1.0b6 (2017-06-18) +---------------------------- + +Compatibility notes: + +- Running "borg init" via a "borg serve --append-only" server will *not* create + an append-only repository anymore. Use "borg init --append-only" to initialize + an append-only repository. + +- Repositories in the "repokey" and "repokey-blake2" modes with an empty passphrase + are now treated as unencrypted repositories for security checks (e.g. + BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK). + + Previously there would be no prompts nor messages if an unknown repository + in one of these modes with an empty passphrase was encountered. This would + allow an attacker to swap a repository, if one assumed that the lack of + password prompts was due to a set BORG_PASSPHRASE. + + Since the "trick" does not work if BORG_PASSPHRASE is set, this does generally + not affect scripts. + +- Repositories in the "authenticated" mode are now treated as the unencrypted + repositories they are. + +- The client-side temporary repository cache now holds unencrypted data for better speed. + +- borg init: removed the short form of --append-only (-a). + +- borg upgrade: removed the short form of --inplace (-i). + +New features: + +- reimplemented the RepositoryCache, size-limited caching of decrypted repo + contents, integrity checked via xxh64. #2515 +- reduced space usage of chunks.archive.d. Existing caches are migrated during + a cache sync. #235 #2638 +- integrity checking using xxh64 for important files used by borg, #1101: + + - repository: index and hints files + - cache: chunks and files caches, chunks.archive.d +- improve cache sync speed, #1729 +- create: new --no-cache-sync option +- add repository mandatory feature flags infrastructure, #1806 +- Verify most operations against SecurityManager. Location, manifest timestamp + and key types are now checked for almost all non-debug commands. #2487 +- implement storage quotas, #2517 +- serve: add --restrict-to-repository, #2589 +- BORG_PASSCOMMAND: use external tool providing the key passphrase, #2573 +- borg export-tar, #2519 +- list: --json-lines instead of --json for archive contents, #2439 +- add --debug-profile option (and also "borg debug convert-profile"), #2473 +- implement --glob-archives/-a, #2448 +- normalize authenticated key modes for better naming consistency: + + - rename "authenticated" to "authenticated-blake2" (uses blake2b) + - implement "authenticated" mode (uses hmac-sha256) + +Fixes: + +- hashindex: read/write indices >2 GiB on 32bit systems, better error + reporting, #2496 +- repository URLs: implement IPv6 address support and also more informative + error message when parsing fails. +- mount: check whether llfuse is installed before asking for passphrase, #2540 +- mount: do pre-mount checks before opening repository, #2541 +- FUSE: + + - fix crash if empty (None) xattr is read, #2534 + - fix read(2) caching data in metadata cache + - fix negative uid/gid crash (fix crash when mounting archives + of external drives made on cygwin), #2674 + - redo ItemCache, on top of object cache + - use decrypted cache + - remove unnecessary normpaths +- serve: ignore --append-only when initializing a repository (borg init), #2501 +- serve: fix incorrect type of exception_short for Errors, #2513 +- fix --exclude and --exclude-from recursing into directories, #2469 +- init: don't allow creating nested repositories, #2563 +- --json: fix encryption[mode] not being the cmdline name +- remote: propagate Error.traceback correctly +- fix remote logging and progress, #2241 + + - implement --debug-topic for remote servers + - remote: restore "Remote:" prefix (as used in 1.0.x) + - rpc negotiate: enable v3 log protocol only for supported clients + - fix --progress and logging in general for remote +- fix parse_version, add tests, #2556 +- repository: truncate segments (and also some other files) before unlinking, #2557 +- recreate: keep timestamps as in original archive, #2384 +- recreate: if single archive is not processed, exit 2 +- patterns: don't recurse with ! / --exclude for pf:, #2509 +- cache sync: fix n^2 behaviour in lookup_name +- extract: don't write to disk with --stdout (affected non-regular-file items), #2645 +- hashindex: implement KeyError, more tests + +Other changes: + +- remote: show path in PathNotAllowed +- consider repokey w/o passphrase == unencrypted, #2169 +- consider authenticated mode == unencrypted, #2503 +- restrict key file names, #2560 +- document follow_symlinks requirements, check libc, use stat and chown + with follow_symlinks=False, #2507 +- support common options on the main command, #2508 +- support common options on mid-level commands (e.g. borg *key* export) +- make --progress a common option +- increase DEFAULT_SEGMENTS_PER_DIR to 1000 +- chunker: fix invalid use of types (function only used by tests) +- chunker: don't do uint32_t >> 32 +- FUSE: + + - add instrumentation (--debug and SIGUSR1/SIGINFO) + - reduced memory usage for repository mounts by lazily instantiating archives + - improved archive load times +- info: use CacheSynchronizer & HashIndex.stats_against (better performance) +- docs: + + - init: document --encryption as required + - security: OpenSSL usage + - security: used implementations; note python libraries + - security: security track record of OpenSSL and msgpack + - patterns: document denial of service (regex, wildcards) + - init: note possible denial of service with "none" mode + - init: document SHA extension is supported in OpenSSL and thus SHA is + faster on AMD Ryzen than blake2b. + - book: use A4 format, new builder option format. + - book: create appendices + - data structures: explain repository compaction + - data structures: add chunk layout diagram + - data structures: integrity checking + - data structures: demingle cache and repo index + - Attic FAQ: separate section for attic stuff + - FAQ: I get an IntegrityError or similar - what now? + - FAQ: Can I use Borg on SMR hard drives?, #2252 + - FAQ: specify "using inline shell scripts" + - add systemd warning regarding placeholders, #2543 + - xattr: document API + - add docs/misc/borg-data-flow data flow chart + - debugging facilities + - README: how to help the project, #2550 + - README: add bountysource badge, #2558 + - fresh new theme + tweaking + - logo: vectorized (PDF and SVG) versions + - frontends: use headlines - you can link to them + - mark --pattern, --patterns-from as experimental + - highlight experimental features in online docs + - remove regex based pattern examples, #2458 + - nanorst for "borg help TOPIC" and --help + - split deployment + - deployment: hosting repositories + - deployment: automated backups to a local hard drive + - development: vagrant, windows10 requirements + - development: update docs remarks + - split usage docs, #2627 + - usage: avoid bash highlight, [options] instead of + - usage: add benchmark page + - helpers: truncate_and_unlink doc + - don't suggest to leak BORG_PASSPHRASE + - internals: columnize rather long ToC [webkit fixup] + internals: manifest & feature flags + - internals: more HashIndex details + - internals: fix ASCII art equations + - internals: edited obj graph related sections a bit + - internals: layers image + description + - fix way too small figures in pdf + - index: disable syntax highlight (bash) + - improve options formatting, fix accidental block quotes + +- testing / checking: + + - add support for using coala, #1366 + - testsuite: add ArchiverCorruptionTestCase + - do not test logger name, #2504 + - call setup_logging after destroying logging config + - testsuite.archiver: normalise pytest.raises vs. assert_raises + - add test for preserved intermediate folder permissions, #2477 + - key: add round-trip test + - remove attic dependency of the tests, #2505 + - enable remote tests on cygwin + - tests: suppress tar's future timestamp warning + - cache sync: add more refcount tests + - repository: add tests, including corruption tests + +- vagrant: + + - control VM cpus and pytest workers via env vars VMCPUS and XDISTN + - update cleaning workdir + - fix openbsd shell + - add OpenIndiana + +- packaging: + + - binaries: don't bundle libssl + - setup.py clean to remove compiled files + - fail in borg package if version metadata is very broken (setuptools_scm) + +- repo / code structure: + + - create borg.algorithms and borg.crypto packages + - algorithms: rename crc32 to checksums + - move patterns to module, #2469 + - gitignore: complete paths for src/ excludes + - cache: extract CacheConfig class + - implement IntegrityCheckedFile + Detached variant, #2502 #1688 + - introduce popen_with_error_handling to handle common user errors + + +Version 1.1.0b5 (2017-04-30) +---------------------------- + +Compatibility notes: + +- BORG_HOSTNAME_IS_UNIQUE is now on by default. +- removed --compression-from feature +- recreate: add --recompress flag, unify --always-recompress and + --recompress + +Fixes: + +- catch exception for os.link when hardlinks are not supported, #2405 +- borg rename / recreate: expand placeholders, #2386 +- generic support for hardlinks (files, devices, FIFOs), #2324 +- extract: also create parent dir for device files, if needed, #2358 +- extract: if a hardlink master is not in the to-be-extracted subset, + the "x" status was not displayed for it, #2351 +- embrace y2038 issue to support 32bit platforms: clamp timestamps to int32, + #2347 +- verify_data: fix IntegrityError handling for defect chunks, #2442 +- allow excluding parent and including child, #2314 + +Other changes: + +- refactor compression decision stuff +- change global compression default to lz4 as well, to be consistent + with --compression defaults. +- placeholders: deny access to internals and other unspecified stuff +- clearer error message for unrecognized placeholder +- more clear exception if borg check does not help, #2427 +- vagrant: upgrade FUSE for macOS to 3.5.8, #2346 +- linux binary builds: get rid of glibc 2.13 dependency, #2430 +- docs: + + - placeholders: document escaping + - serve: env vars in original commands are ignored + - tell what kind of hardlinks we support + - more docs about compression + - LICENSE: use canonical formulation + ("copyright holders and contributors" instead of "author") + - document borg init behaviour via append-only borg serve, #2440 + - be clear about what buzhash is used for, #2390 + - add hint about chunker params, #2421 + - clarify borg upgrade docs, #2436 + - FAQ to explain warning when running borg check --repair, #2341 + - repository file system requirements, #2080 + - pre-install considerations + - misc. formatting / crossref fixes +- tests: + + - enhance travis setuptools_scm situation + - add extra test for the hashindex + - fix invalid param issue in benchmarks + +These belong to 1.1.0b4 release, but did not make it into changelog by then: + +- vagrant: increase memory for parallel testing +- lz4 compress: lower max. buffer size, exception handling +- add docstring to do_benchmark_crud +- patterns help: mention path full-match in intro + + +Version 1.1.0b4 (2017-03-27) +---------------------------- + +Compatibility notes: + +- init: the --encryption argument is mandatory now (there are several choices) +- moved "borg migrate-to-repokey" to "borg key migrate-to-repokey". +- "borg change-passphrase" is deprecated, use "borg key change-passphrase" + instead. +- the --exclude-if-present option now supports tagging a folder with any + filesystem object type (file, folder, etc), instead of expecting only files + as tags, #1999 +- the --keep-tag-files option has been deprecated in favor of the new + --keep-exclude-tags, to account for the change mentioned above. +- use lz4 compression by default, #2179 + +New features: + +- JSON API to make developing frontends and automation easier + (see :ref:`json_output`) + + - add JSON output to commands: `borg create/list/info --json ...`. + - add --log-json option for structured logging output. + - add JSON progress information, JSON support for confirmations (yes()). +- add two new options --pattern and --patterns-from as discussed in #1406 +- new path full match pattern style (pf:) for very fast matching, #2334 +- add 'debug dump-manifest' and 'debug dump-archive' commands +- add 'borg benchmark crud' command, #1788 +- new 'borg delete --force --force' to delete severely corrupted archives, #1975 +- info: show utilization of maximum archive size, #1452 +- list: add dsize and dcsize keys, #2164 +- paperkey.html: Add interactive html template for printing key backups. +- key export: add qr html export mode +- securely erase config file (which might have old encryption key), #2257 +- archived file items: add size to metadata, 'borg extract' and 'borg check' do + check the file size for consistency, FUSE uses precomputed size from Item. + +Fixes: + +- fix remote speed regression introduced in 1.1.0b3, #2185 +- fix regression handling timestamps beyond 2262 (revert bigint removal), + introduced in 1.1.0b3, #2321 +- clamp (nano)second values to unproblematic range, #2304 +- hashindex: rebuild hashtable if we have too little empty buckets + (performance fix), #2246 +- Location regex: fix bad parsing of wrong syntax +- ignore posix_fadvise errors in repository.py, #2095 +- borg rpc: use limited msgpack.Unpacker (security precaution), #2139 +- Manifest: Make sure manifest timestamp is strictly monotonically increasing. +- create: handle BackupOSError on a per-path level in one spot +- create: clarify -x option / meaning of "same filesystem" +- create: don't create hard link refs to failed files +- archive check: detect and fix missing all-zero replacement chunks, #2180 +- files cache: update inode number when --ignore-inode is used, #2226 +- fix decompression exceptions crashing ``check --verify-data`` and others + instead of reporting integrity error, #2224 #2221 +- extract: warning for unextracted big extended attributes, #2258, #2161 +- mount: umount on SIGINT/^C when in foreground +- mount: handle invalid hard link refs +- mount: fix huge RAM consumption when mounting a repository (saves number of + archives * 8 MiB), #2308 +- hashindex: detect mingw byte order #2073 +- hashindex: fix wrong skip_hint on hashindex_set when encountering tombstones, + the regression was introduced in #1748 +- fix ChunkIndex.__contains__ assertion for big-endian archs +- fix borg key/debug/benchmark crashing without subcommand, #2240 +- Location: accept //servername/share/path +- correct/refactor calculation of unique/non-unique chunks +- extract: fix missing call to ProgressIndicator.finish +- prune: fix error msg, it is --keep-within, not --within +- fix "auto" compression mode bug (not compressing), #2331 +- fix symlink item fs size computation, #2344 + +Other changes: + +- remote repository: improved async exception processing, #2255 #2225 +- with --compression auto,C, only use C if lz4 achieves at least 3% compression +- PatternMatcher: only normalize path once, #2338 +- hashindex: separate endian-dependent defs from endian detection +- migrate-to-repokey: ask using canonical_path() as we do everywhere else. +- SyncFile: fix use of fd object after close +- make LoggedIO.close_segment reentrant +- creating a new segment: use "xb" mode, #2099 +- redo key_creator, key_factory, centralise key knowledge, #2272 +- add return code functions, #2199 +- list: only load cache if needed +- list: files->items, clarifications +- list: add "name" key for consistency with info cmd +- ArchiveFormatter: add "start" key for compatibility with "info" +- RemoteRepository: account rx/tx bytes +- setup.py build_usage/build_man/build_api fixes +- Manifest.in: simplify, exclude .so, .dll and .orig, #2066 +- FUSE: get rid of chunk accounting, st_blocks = ceil(size / blocksize). +- tests: + + - help python development by testing 3.6-dev + - test for borg delete --force +- vagrant: + + - freebsd: some fixes, #2067 + - darwin64: use osxfuse 3.5.4 for tests / to build binaries + - darwin64: improve VM settings + - use python 3.5.3 to build binaries, #2078 + - upgrade pyinstaller from 3.1.1+ to 3.2.1 + - pyinstaller: use fixed AND freshly compiled bootloader, #2002 + - pyinstaller: automatically builds bootloader if missing +- docs: + + - create really nice man pages + - faq: mention --remote-ratelimit in bandwidth limit question + - fix caskroom link, #2299 + - docs/security: reiterate that RPC in Borg does no networking + - docs/security: counter tracking, #2266 + - docs/development: update merge remarks + - address SSH batch mode in docs, #2202 #2270 + - add warning about running build_usage on Python >3.4, #2123 + - one link per distro in the installation page + - improve --exclude-if-present and --keep-exclude-tags, #2268 + - improve automated backup script in doc, #2214 + - improve remote-path description + - update docs for create -C default change (lz4) + - document relative path usage, #1868 + - document snapshot usage, #2178 + - corrected some stuff in internals+security + - internals: move toctree to after the introduction text + - clarify metadata kind, manifest ops + - key enc: correct / clarify some stuff, link to internals/security + - datas: enc: 1.1.x mas different MACs + - datas: enc: correct factual error -- no nonce involved there. + - make internals.rst an index page and edit it a bit + - add "Cryptography in Borg" and "Remote RPC protocol security" sections + - document BORG_HOSTNAME_IS_UNIQUE, #2087 + - FAQ by categories as proposed by @anarcat in #1802 + - FAQ: update Which file types, attributes, etc. are *not* preserved? + - development: new branching model for git repository + - development: define "ours" merge strategy for auto-generated files + - create: move --exclude note to main doc + - create: move item flags to main doc + - fix examples using borg init without -e/--encryption + - list: don't print key listings in fat (html + man) + - remove Python API docs (were very incomplete, build problems on RTFD) + - added FAQ section about backing up root partition + + +Version 1.0.10 (2017-02-13) +--------------------------- + +Bug fixes: + +- Manifest timestamps are now monotonically increasing, + this fixes issues when the system clock jumps backwards + or is set inconsistently across computers accessing the same repository, #2115 +- Fixed testing regression in 1.0.10rc1 that lead to a hard dependency on + py.test >= 3.0, #2112 + +New features: + +- "key export" can now generate a printable HTML page with both a QR code and + a human-readable "paperkey" representation (and custom text) through the + ``--qr-html`` option. + + The same functionality is also available through `paperkey.html `_, + which is the same HTML page generated by ``--qr-html``. It works with existing + "key export" files and key files. + +Other changes: + +- docs: + + - language clarification - "borg create --one-file-system" option does not respect + mount points, but considers different file systems instead, #2141 +- setup.py: build_api: sort file list for determinism + + +Version 1.1.0b3 (2017-01-15) +---------------------------- + +Compatibility notes: + +- borg init: removed the default of "--encryption/-e", #1979 + This was done so users do a informed decision about -e mode. + +Bug fixes: + +- borg recreate: don't rechunkify unless explicitly told so +- borg info: fixed bug when called without arguments, #1914 +- borg init: fix free space check crashing if disk is full, #1821 +- borg debug delete/get obj: fix wrong reference to exception +- fix processing of remote ~/ and ~user/ paths (regressed since 1.1.0b1), #1759 +- posix platform module: only build / import on non-win32 platforms, #2041 + +New features: + +- new CRC32 implementations that are much faster than the zlib one used previously, #1970 +- add blake2b key modes (use blake2b as MAC). This links against system libb2, + if possible, otherwise uses bundled code +- automatically remove stale locks - set BORG_HOSTNAME_IS_UNIQUE env var + to enable stale lock killing. If set, stale locks in both cache and + repository are deleted. #562 #1253 +- borg info : print general repo information, #1680 +- borg check --first / --last / --sort / --prefix, #1663 +- borg mount --first / --last / --sort / --prefix, #1542 +- implement "health" item formatter key, #1749 +- BORG_SECURITY_DIR to remember security related infos outside the cache. + Key type, location and manifest timestamp checks now survive cache + deletion. This also means that you can now delete your cache and avoid + previous warnings, since Borg can still tell it's safe. +- implement BORG_NEW_PASSPHRASE, #1768 + +Other changes: + +- borg recreate: + + - remove special-cased --dry-run + - update --help + - remove bloat: interruption blah, autocommit blah, resuming blah + - re-use existing checkpoint functionality + - archiver tests: add check_cache tool - lints refcounts + +- fixed cache sync performance regression from 1.1.0b1 onwards, #1940 +- syncing the cache without chunks.archive.d (see :ref:`disable_archive_chunks`) + now avoids any merges and is thus faster, #1940 +- borg check --verify-data: faster due to linear on-disk-order scan +- borg debug-xxx commands removed, we use "debug xxx" subcommands now, #1627 +- improve metadata handling speed +- shortcut hashindex_set by having hashindex_lookup hint about address +- improve / add progress displays, #1721 +- check for index vs. segment files object count mismatch +- make RPC protocol more extensible: use named parameters. +- RemoteRepository: misc. code cleanups / refactors +- clarify cache/repository README file + +- docs: + + - quickstart: add a comment about other (remote) filesystems + - quickstart: only give one possible ssh url syntax, all others are + documented in usage chapter. + - mention file:// + - document repo URLs / archive location + - clarify borg diff help, #980 + - deployment: synthesize alternative --restrict-to-path example + - improve cache / index docs, esp. files cache docs, #1825 + - document using "git merge 1.0-maint -s recursive -X rename-threshold=20%" + for avoiding troubles when merging the 1.0-maint branch into master. + +- tests: + + - FUSE tests: catch ENOTSUP on freebsd + - FUSE tests: test troublesome xattrs last + - fix byte range error in test, #1740 + - use monkeypatch to set env vars, but only on pytest based tests. + - point XDG_*_HOME to temp dirs for tests, #1714 + - remove all BORG_* env vars from the outer environment + + +Version 1.0.10rc1 (2017-01-29) +------------------------------ + +Bug fixes: + +- borg serve: fix transmission data loss of pipe writes, #1268 + This affects only the cygwin platform (not Linux, BSD, OS X). +- Avoid triggering an ObjectiveFS bug in xattr retrieval, #1992 +- When running out of buffer memory when reading xattrs, only skip the + current file, #1993 +- Fixed "borg upgrade --tam" crashing with unencrypted repositories. Since + :ref:`the issue ` is not relevant for unencrypted repositories, + it now does nothing and prints an error, #1981. +- Fixed change-passphrase crashing with unencrypted repositories, #1978 +- Fixed "borg check repo::archive" indicating success if "archive" does not exist, #1997 +- borg check: print non-exit-code warning if --last or --prefix aren't fulfilled +- fix bad parsing of wrong repo location syntax +- create: don't create hard link refs to failed files, + mount: handle invalid hard link refs, #2092 +- detect mingw byte order, #2073 +- creating a new segment: use "xb" mode, #2099 +- mount: umount on SIGINT/^C when in foreground, #2082 + +Other changes: + +- binary: use fixed AND freshly compiled pyinstaller bootloader, #2002 +- xattr: ignore empty names returned by llistxattr(2) et al +- Enable the fault handler: install handlers for the SIGSEGV, SIGFPE, SIGABRT, + SIGBUS and SIGILL signals to dump the Python traceback. +- Also print a traceback on SIGUSR2. +- borg change-passphrase: print key location (simplify making a backup of it) +- officially support Python 3.6 (setup.py: add Python 3.6 qualifier) +- tests: + + - vagrant / travis / tox: add Python 3.6 based testing + - vagrant: fix openbsd repo, #2042 + - vagrant: fix the freebsd64 machine, #2037 #2067 + - vagrant: use python 3.5.3 to build binaries, #2078 + - vagrant: use osxfuse 3.5.4 for tests / to build binaries + vagrant: improve darwin64 VM settings + - travis: fix osxfuse install (fixes OS X testing on Travis CI) + - travis: require succeeding OS X tests, #2028 + - travis: use latest pythons for OS X based testing + - use pytest-xdist to parallelize testing + - fix xattr test race condition, #2047 + - setup.cfg: fix pytest deprecation warning, #2050 +- docs: + + - language clarification - VM backup FAQ + - borg create: document how to backup stdin, #2013 + - borg upgrade: fix incorrect title levels + - add CVE numbers for issues fixed in 1.0.9, #2106 +- fix typos (taken from Debian package patch) +- remote: include data hexdump in "unexpected RPC data" error message +- remote: log SSH command line at debug level +- API_VERSION: use numberspaces, #2023 +- remove .github from pypi package, #2051 +- add pip and setuptools to requirements file, #2030 +- SyncFile: fix use of fd object after close (cosmetic) +- Manifest.in: simplify, exclude \*.{so,dll,orig}, #2066 +- ignore posix_fadvise errors in repository.py, #2095 + (works around issues with docker on ARM) +- make LoggedIO.close_segment reentrant, avoid reentrance + + +Version 1.0.9 (2016-12-20) +-------------------------- + +Security fixes: + +- A flaw in the cryptographic authentication scheme in Borg allowed an attacker + to spoof the manifest. See :ref:`tam_vuln` above for the steps you should + take. + + CVE-2016-10099 was assigned to this vulnerability. +- borg check: When rebuilding the manifest (which should only be needed very rarely) + duplicate archive names would be handled on a "first come first serve" basis, allowing + an attacker to apparently replace archives. + + CVE-2016-10100 was assigned to this vulnerability. + +Bug fixes: + +- borg check: + + - rebuild manifest if it's corrupted + - skip corrupted chunks during manifest rebuild +- fix TypeError in integrity error handler, #1903, #1894 +- fix location parser for archives with @ char (regression introduced in 1.0.8), #1930 +- fix wrong duration/timestamps if system clock jumped during a create +- fix progress display not updating if system clock jumps backwards +- fix checkpoint interval being incorrect if system clock jumps + +Other changes: + +- docs: + + - add python3-devel as a dependency for cygwin-based installation + - clarify extract is relative to current directory + - FAQ: fix link to changelog + - markup fixes +- tests: + + - test_get\_(cache|keys)_dir: clean env state, #1897 + - get back pytest's pretty assertion failures, #1938 +- setup.py build_usage: + + - fixed build_usage not processing all commands + - fixed build_usage not generating includes for debug commands + + +Version 1.0.9rc1 (2016-11-27) +----------------------------- + +Bug fixes: + +- files cache: fix determination of newest mtime in backup set (which is + used in cache cleanup and led to wrong "A" [added] status for unchanged + files in next backup), #1860. + +- borg check: + + - fix incorrectly reporting attic 0.13 and earlier archives as corrupt + - handle repo w/o objects gracefully and also bail out early if repo is + *completely* empty, #1815. +- fix tox/pybuild in 1.0-maint +- at xattr module import time, loggers are not initialized yet + +New features: + +- borg umount + exposed already existing umount code via the CLI api, so users can use it, + which is more consistent than using borg to mount and fusermount -u (or + umount) to un-mount, #1855. +- implement borg create --noatime --noctime, fixes #1853 + +Other changes: + +- docs: + + - display README correctly on PyPI + - improve cache / index docs, esp. files cache docs, fixes #1825 + - different pattern matching for --exclude, #1779 + - datetime formatting examples for {now} placeholder, #1822 + - clarify passphrase mode attic repo upgrade, #1854 + - clarify --umask usage, #1859 + - clarify how to choose PR target branch + - clarify prune behavior for different archive contents, #1824 + - fix PDF issues, add logo, fix authors, headings, TOC + - move security verification to support section + - fix links in standalone README (:ref: tags) + - add link to security contact in README + - add FAQ about security + - move fork differences to FAQ + - add more details about resource usage +- tests: skip remote tests on cygwin, #1268 +- travis: + + - allow OS X failures until the brew cask osxfuse issue is fixed + - caskroom osxfuse-beta gone, it's osxfuse now (3.5.3) +- vagrant: + + - upgrade OSXfuse / FUSE for macOS to 3.5.3 + - remove llfuse from tox.ini at a central place + - do not try to install llfuse on centos6 + - fix FUSE test for darwin, #1546 + - add windows virtual machine with cygwin + - Vagrantfile cleanup / code deduplication + +Version 1.1.0b2 (2016-10-01) +---------------------------- + +Bug fixes: + +- fix incorrect preservation of delete tags, leading to "object count mismatch" + on borg check, #1598. This only occurred with 1.1.0b1 (not with 1.0.x) and is + normally fixed by running another borg create/delete/prune. +- fix broken --progress for double-cell paths (e.g. CJK), #1624 +- borg recreate: also catch SIGHUP +- FUSE: + + - fix hardlinks in versions view, #1599 + - add parameter check to ItemCache.get to make potential failures more clear + +New features: + +- Archiver, RemoteRepository: add --remote-ratelimit (send data) +- borg help compression, #1582 +- borg check: delete chunks with integrity errors, #1575, so they can be + "repaired" immediately and maybe healed later. +- archives filters concept (refactoring/unifying older code) + + - covers --first/--last/--prefix/--sort-by options + - currently used for borg list/info/delete + +Other changes: + +- borg check --verify-data slightly tuned (use get_many()) +- change {utcnow} and {now} to ISO-8601 format ("T" date/time separator) +- repo check: log transaction IDs, improve object count mismatch diagnostic +- Vagrantfile: use TW's fresh-bootloader pyinstaller branch +- fix module names in api.rst +- hashindex: bump api_version + + +Version 1.1.0b1 (2016-08-28) +---------------------------- + +New features: + +- new commands: + + - borg recreate: re-create existing archives, #787 #686 #630 #70, also see + #757, #770. + + - selectively remove files/dirs from old archives + - re-compress data + - re-chunkify data, e.g. to have upgraded Attic / Borg 0.xx archives + deduplicate with Borg 1.x archives or to experiment with chunker-params. + - borg diff: show differences between archives + - borg with-lock: execute a command with the repository locked, #990 +- borg create: + + - Flexible compression with pattern matching on path/filename, + and LZ4 heuristic for deciding compressibility, #810, #1007 + - visit files in inode order (better speed, esp. for large directories and rotating disks) + - in-file checkpoints, #1217 + - increased default checkpoint interval to 30 minutes (was 5 minutes), #896 + - added uuid archive format tag, #1151 + - save mountpoint directories with --one-file-system, makes system restore easier, #1033 + - Linux: added support for some BSD flags, #1050 + - add 'x' status for excluded paths, #814 + + - also means files excluded via UF_NODUMP, #1080 +- borg check: + + - will not produce the "Checking segments" output unless new --progress option is passed, #824. + - --verify-data to verify data cryptographically on the client, #975 +- borg list, #751, #1179 - removed {formatkeys}, see "borg list --help" - --list-format is deprecated, use --format instead + - --format now also applies to listing archives, not only archive contents, #1179 - now supports the usual [PATH [PATHS…]] syntax and excludes - new keys: csize, num_chunks, unique_chunks, NUL - supports guaranteed_available hashlib hashes - (to avoid varying functionality depending on environment) -- prune: + (to avoid varying functionality depending on environment), + which includes the SHA1 and SHA2 family as well as MD5 +- borg prune: - to better visualize the "thinning out", we now list all archives in reverse time order. rephrase and reorder help text. - implement --keep-last N via --keep-secondly N, also --keep-minutely. assuming that there is not more than 1 backup archive made in 1s, --keep-last N and --keep-secondly N are equivalent, #537 -- borg comment: add archive comments, #842 -- provide "borgfs" wrapper for borg mount, enables usage via fstab, #743 -- create: add 'x' status for excluded paths, #814 -- --show-version: shows/logs the borg version, #725 + - cleanup checkpoints except the latest, #1008 +- borg extract: + + - added --progress, #1449 + - Linux: limited support for BSD flags, #1050 +- borg info: + + - output is now more similar to borg create --stats, #977 +- borg mount: + + - provide "borgfs" wrapper for borg mount, enables usage via fstab, #743 + - "versions" mount option - when used with a repository mount, this gives + a merged, versioned view of the files in all archives, #729 +- repository: + + - added progress information to commit/compaction phase (often takes some time when deleting/pruning), #1519 + - automatic recovery for some forms of repository inconsistency, #858 + - check free space before going forward with a commit, #1336 + - improved write performance (esp. for rotating media), #985 + + - new IO code for Linux + - raised default segment size to approx 512 MiB + - improved compaction performance, #1041 + - reduced client CPU load and improved performance for remote repositories, #940 + +- options that imply output (--show-rc, --show-version, --list, --stats, + --progress) don't need -v/--info to have that output displayed, #865 +- add archive comments (via borg (re)create --comment), #842 - borg list/prune/delete: also output archive id, #731 +- --show-version: shows/logs the borg version, #725 +- added --debug-topic for granular debug logging, #1447 +- use atomic file writing/updating for configuration and key files, #1377 +- BORG_KEY_FILE environment variable, #1001 +- self-testing module, #970 + Bug fixes: +- list: fixed default output being produced if --format is given with empty parameter, #1489 +- create: fixed overflowing progress line with CJK and similar characters, #1051 +- prune: fixed crash if --prefix resulted in no matches, #1029 - init: clean up partial repo if passphrase input is aborted, #850 - info: quote cmdline arguments that have spaces in them -- failing hashindex tests on netbsd, #804 -- fix links failing for extracting subtrees, #761 +- fix hardlinks failing in some cases for extracting subtrees, #761 Other changes: - replace stdlib hmac with OpenSSL, zero-copy decrypt (10-15% increase in performance of hash-lists and extract). +- improved chunker performance, #1021 +- open repository segment files in exclusive mode (fail-safe), #1134 +- improved error logging, #1440 - Source: - pass meta-data around, #765 - move some constants to new constants module - better readability and less errors with namedtuples, #823 + - moved source tree into src/ subdirectory, #1016 + - made borg.platform a package, #1113 + - removed dead crypto code, #1032 + - improved and ported parts of the test suite to py.test, #912 + - created data classes instead of passing dictionaries around, #981, #1158, #1161 + - cleaned up imports, #1112 - Docs: - better help texts and sphinx reproduction of usage help: @@ -116,6 +1067,8 @@ Other changes: - chunker: added some insights by "Voltara", #903 - clarify what "deduplicated size" means - fix / update / add package list entries + - added a SaltStack usage example, #956 + - expanded FAQ - new contributors in AUTHORS! - Tests: @@ -123,6 +1076,286 @@ Other changes: - ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945 +Version 1.0.8 (2016-10-29) +-------------------------- + +Bug fixes: + +- RemoteRepository: Fix busy wait in call_many, #940 + +New features: + +- implement borgmajor/borgminor/borgpatch placeholders, #1694 + {borgversion} was already there (full version string). With the new + placeholders you can now also get e.g. 1 or 1.0 or 1.0.8. + +Other changes: + +- avoid previous_location mismatch, #1741 + + due to the changed canonicalization for relative paths in PR #1711 / #1655 + (implement /./ relpath hack), there would be a changed repo location warning + and the user would be asked if this is ok. this would break automation and + require manual intervention, which is unwanted. + + thus, we automatically fix the previous_location config entry, if it only + changed in the expected way, but still means the same location. + +- docs: + + - deployment.rst: do not use bare variables in ansible snippet + - add clarification about append-only mode, #1689 + - setup.py: add comment about requiring llfuse, #1726 + - update usage.rst / api.rst + - repo url / archive location docs + typo fix + - quickstart: add a comment about other (remote) filesystems + +- vagrant / tests: + + - no chown when rsyncing (fixes boxes w/o vagrant group) + - fix FUSE permission issues on linux/freebsd, #1544 + - skip FUSE test for borg binary + fakeroot + - ignore security.selinux xattrs, fixes tests on centos, #1735 + + +Version 1.0.8rc1 (2016-10-17) +----------------------------- + +Bug fixes: + +- fix signal handling (SIGINT, SIGTERM, SIGHUP), #1620 #1593 + Fixes e.g. leftover lock files for quickly repeated signals (e.g. Ctrl-C + Ctrl-C) or lost connections or systemd sending SIGHUP. +- progress display: adapt formatting to narrow screens, do not crash, #1628 +- borg create --read-special - fix crash on broken symlink, #1584. + also correctly processes broken symlinks. before this regressed to a crash + (5b45385) a broken symlink would've been skipped. +- process_symlink: fix missing backup_io() + Fixes a chmod/chown/chgrp/unlink/rename/... crash race between getting + dirents and dispatching to process_symlink. +- yes(): abort on wrong answers, saying so, #1622 +- fixed exception borg serve raised when connection was closed before reposiory + was openend. add an error message for this. +- fix read-from-closed-FD issue, #1551 + (this seems not to get triggered in 1.0.x, but was discovered in master) +- hashindex: fix iterators (always raise StopIteration when exhausted) + (this seems not to get triggered in 1.0.x, but was discovered in master) +- enable relative paths in ssh:// repo URLs, via /./relpath hack, #1655 +- allow repo paths with colons, #1705 +- update changed repo location immediately after acceptance, #1524 +- fix debug get-obj / delete-obj crash if object not found and remote repo, + #1684 +- pyinstaller: use a spec file to build borg.exe binary, exclude osxfuse dylib + on Mac OS X (avoids mismatch lib <-> driver), #1619 + +New features: + +- add "borg key export" / "borg key import" commands, #1555, so users are able + to backup / restore their encryption keys more easily. + + Supported formats are the keyfile format used by borg internally and a + special "paper" format with by line checksums for printed backups. For the + paper format, the import is an interactive process which checks each line as + soon as it is input. +- add "borg debug-refcount-obj" to determine a repo objects' referrer counts, + #1352 + +Other changes: + +- add "borg debug ..." subcommands + (borg debug-* still works, but will be removed in borg 1.1) +- setup.py: Add subcommand support to build_usage. +- remote: change exception message for unexpected RPC data format to indicate + dataflow direction. +- improved messages / error reporting: + + - IntegrityError: add placeholder for message, so that the message we give + appears not only in the traceback, but also in the (short) error message, + #1572 + - borg.key: include chunk id in exception msgs, #1571 + - better messages for cache newer than repo, #1700 +- vagrant (testing/build VMs): + + - upgrade OSXfuse / FUSE for macOS to 3.5.2 + - update Debian Wheezy boxes, #1686 + - openbsd / netbsd: use own boxes, fixes misc rsync installation and + FUSE/llfuse related testing issues, #1695 #1696 #1670 #1671 #1728 +- docs: + + - add docs for "key export" and "key import" commands, #1641 + - fix inconsistency in FAQ (pv-wrapper). + - fix second block in "Easy to use" section not showing on GitHub, #1576 + - add bestpractices badge + - link reference docs and faq about BORG_FILES_CACHE_TTL, #1561 + - improve borg info --help, explain size infos, #1532 + - add release signing key / security contact to README, #1560 + - add contribution guidelines for developers + - development.rst: add sphinx_rtd_theme to the sphinx install command + - adjust border color in borg.css + - add debug-info usage help file + - internals.rst: fix typos + - setup.py: fix build_usage to always process all commands + - added docs explaining multiple --restrict-to-path flags, #1602 + - add more specific warning about write-access debug commands, #1587 + - clarify FAQ regarding backup of virtual machines, #1672 +- tests: + + - work around FUSE xattr test issue with recent fakeroot + - simplify repo/hashindex tests + - travis: test FUSE-enabled borg, use trusty to have a recent FUSE + - re-enable FUSE tests for RemoteArchiver (no deadlocks any more) + - clean env for pytest based tests, #1714 + - fuse_mount contextmanager: accept any options + + +Version 1.0.7 (2016-08-19) +-------------------------- + +Security fixes: + +- borg serve: fix security issue with remote repository access, #1428 + If you used e.g. --restrict-to-path /path/client1/ (with or without trailing + slash does not make a difference), it acted like a path prefix match using + /path/client1 (note the missing trailing slash) - the code then also allowed + working in e.g. /path/client13 or /path/client1000. + + As this could accidentally lead to major security/privacy issues depending on + the paths you use, the behaviour was changed to be a strict directory match. + That means --restrict-to-path /path/client1 (with or without trailing slash + does not make a difference) now uses /path/client1/ internally (note the + trailing slash here!) for matching and allows precisely that path AND any + path below it. So, /path/client1 is allowed, /path/client1/repo1 is allowed, + but not /path/client13 or /path/client1000. + + If you willingly used the undocumented (dangerous) previous behaviour, you + may need to rearrange your --restrict-to-path paths now. We are sorry if + that causes work for you, but we did not want a potentially dangerous + behaviour in the software (not even using a for-backwards-compat option). + +Bug fixes: + +- fixed repeated LockTimeout exceptions when borg serve tried to write into + a already write-locked repo (e.g. by a borg mount), #502 part b) + This was solved by the fix for #1220 in 1.0.7rc1 already. +- fix cosmetics + file leftover for "not a valid borg repository", #1490 +- Cache: release lock if cache is invalid, #1501 +- borg extract --strip-components: fix leak of preloaded chunk contents +- Repository, when a InvalidRepository exception happens: + + - fix spurious, empty lock.roster + - fix repo not closed cleanly + +New features: + +- implement borg debug-info, fixes #1122 + (just calls already existing code via cli, same output as below tracebacks) + +Other changes: + +- skip the O_NOATIME test on GNU Hurd, fixes #1315 + (this is a very minor issue and the GNU Hurd project knows the bug) +- document using a clean repo to test / build the release + + +Version 1.0.7rc2 (2016-08-13) +----------------------------- + +Bug fixes: + +- do not write objects to repository that are bigger than the allowed size, + borg will reject reading them, #1451. + + Important: if you created archives with many millions of files or + directories, please verify if you can open them successfully, + e.g. try a "borg list REPO::ARCHIVE". +- lz4 compression: dynamically enlarge the (de)compression buffer, the static + buffer was not big enough for archives with extremely many items, #1453 +- larger item metadata stream chunks, raise archive item limit by 8x, #1452 +- fix untracked segments made by moved DELETEs, #1442 + + Impact: Previously (metadata) segments could become untracked when deleting data, + these would never be cleaned up. +- extended attributes (xattrs) related fixes: + + - fixed a race condition in xattrs querying that led to the entire file not + being backed up (while logging the error, exit code = 1), #1469 + - fixed a race condition in xattrs querying that led to a crash, #1462 + - raise OSError including the error message derived from errno, deal with + path being a integer FD + +Other changes: + +- print active env var override by default, #1467 +- xattr module: refactor code, deduplicate, clean up +- repository: split object size check into too small and too big +- add a transaction_id assertion, so borg init on a broken (inconsistent) + filesystem does not look like a coding error in borg, but points to the + real problem. +- explain confusing TypeError caused by compat support for old servers, #1456 +- add forgotten usage help file from build_usage +- refactor/unify buffer code into helpers.Buffer class, add tests +- docs: + + - document archive limitation, #1452 + - improve prune examples + + +Version 1.0.7rc1 (2016-08-05) +----------------------------- + +Bug fixes: + +- fix repo lock deadlocks (related to lock upgrade), #1220 +- catch unpacker exceptions, resync, #1351 +- fix borg break-lock ignoring BORG_REPO env var, #1324 +- files cache performance fixes (fixes unnecessary re-reading/chunking/ + hashing of unmodified files for some use cases): + + - fix unintended file cache eviction, #1430 + - implement BORG_FILES_CACHE_TTL, update FAQ, raise default TTL from 10 + to 20, #1338 +- FUSE: + + - cache partially read data chunks (performance), #965, #966 + - always create a root dir, #1125 +- use an OrderedDict for helptext, making the build reproducible, #1346 +- RemoteRepository init: always call close on exceptions, #1370 (cosmetic) +- ignore stdout/stderr broken pipe errors (cosmetic), #1116 + +New features: + +- better borg versions management support (useful esp. for borg servers + wanting to offer multiple borg versions and for clients wanting to choose + a specific server borg version), #1392: + + - add BORG_VERSION environment variable before executing "borg serve" via ssh + - add new placeholder {borgversion} + - substitute placeholders in --remote-path + +- borg init --append-only option (makes using the more secure append-only mode + more convenient. when used remotely, this requires 1.0.7+ also on the borg + server), #1291. + +Other changes: + +- Vagrantfile: + + - darwin64: upgrade to FUSE for macOS 3.4.1 (aka osxfuse), #1378 + - xenial64: use user "ubuntu", not "vagrant" (as usual), #1331 +- tests: + + - fix FUSE tests on OS X, #1433 +- docs: + + - FAQ: add backup using stable filesystem names recommendation + - FAQ about glibc compatibility added, #491, glibc-check improved + - FAQ: 'A' unchanged file; remove ambiguous entry age sentence. + - OS X: install pkg-config to build with FUSE support, fixes #1400 + - add notes about shell/sudo pitfalls with env. vars, #1380 + - added platform feature matrix +- implement borg debug-dump-repo-objs + + Version 1.0.6 (2016-07-12) -------------------------- @@ -172,7 +1405,7 @@ Other changes: - tests: - add more FUSE tests, #1284 - - deduplicate fuse (u)mount code + - deduplicate FUSE (u)mount code - fix borg binary test issues, #862 - docs: @@ -358,7 +1591,7 @@ Bug fixes: - add overflow and range checks for 1st (special) uint32 of the hashindex values, switch from int32 to uint32. - fix so that refcount will never overflow, but just stick to max. value after - a overflow would have occured. + a overflow would have occurred. - borg delete: fix --cache-only for broken caches, #874 Makes --cache-only idempotent: it won't fail if the cache is already deleted. @@ -457,7 +1690,7 @@ Compatibility notes: - disambiguate -p option, #563: - -p now is same as --progress - - -P now is same as --prefix + - -P now is same as --prefix - remove deprecated "borg verify", use "borg extract --dry-run" instead - cleanup environment variable semantics, #355 @@ -522,7 +1755,7 @@ New features: - format options for location: user, pid, fqdn, hostname, now, utcnow, user - borg list --list-format - borg prune -v --list enables the keep/prune list output, #658 - + Bug fixes: - fix _open_rb noatime handling, #657 @@ -540,14 +1773,14 @@ Other changes: - Vagrant: drop Ubuntu Precise (12.04) - does not have Python >= 3.4 - Vagrant: use pyinstaller v3.1.1 to build binaries - docs: - + - borg upgrade: add to docs that only LOCAL repos are supported - borg upgrade also handles borg 0.xx -> 1.0 - use pip extras or requirements file to install llfuse - fix order in release process - updated usage docs and other minor / cosmetic fixes - verified borg examples in docs, #644 - - freebsd dependency installation and fuse configuration, #649 + - freebsd dependency installation and FUSE configuration, #649 - add example how to restore a raw device, #671 - add a hint about the dev headers needed when installing from source - add examples for delete (and handle delete after list, before prune), #656 @@ -1392,7 +2625,7 @@ Version 0.7 - Ported to FreeBSD - Improved documentation -- Experimental: Archives mountable as fuse filesystems. +- Experimental: Archives mountable as FUSE filesystems. - The "user." prefix is no longer stripped from xattrs on Linux diff --git a/docs/conf.py b/docs/conf.py index 51670275..283a2c7a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,8 +19,6 @@ sys.path.insert(0, os.path.abspath('../src')) from borg import __version__ as sw_version -on_rtd = os.environ.get('READTHEDOCS', None) == 'True' - # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. @@ -44,14 +42,15 @@ master_doc = 'index' # General information about the project. project = 'Borg - Deduplicating Archiver' -copyright = '2010-2014 Jonas Borgström, 2015-2016 The Borg Collective (see AUTHORS file)' +copyright = '2010-2014 Jonas Borgström, 2015-2017 The Borg Collective (see AUTHORS file)' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = sw_version.split('-')[0] +split_char = '+' if '+' in sw_version else '-' +version = sw_version.split(split_char)[0] # The full version, including alpha/beta/rc tags. release = version @@ -74,6 +73,10 @@ exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None +# The Borg docs contain no or very little Python docs. +# Thus, the primary domain is rst. +primary_domain = 'rst' + # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True @@ -96,25 +99,29 @@ pygments_style = 'sphinx' # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -#html_theme = '' -if not on_rtd: # only import and set the theme if we're building docs locally - import sphinx_rtd_theme - html_theme = 'sphinx_rtd_theme' - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - html_style = 'css/borg.css' -else: - html_context = { - 'css_files': [ - 'https://media.readthedocs.org/css/sphinx_rtd_theme.css', - 'https://media.readthedocs.org/css/readthedocs-doc-embed.css', - '_static/css/borg.css', - ], - } +import guzzle_sphinx_theme + +html_theme_path = guzzle_sphinx_theme.html_theme_path() +html_theme = 'guzzle_sphinx_theme' + + +def set_rst_settings(app): + app.env.settings.update({ + 'field_name_limit': 0, + 'option_limit': 0, + }) + + +def setup(app): + app.add_stylesheet('css/borg.css') + app.connect('builder-inited', set_rst_settings) # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +html_theme_options = { + 'project_nav_name': 'Borg %s' % version, +} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = ['_themes'] @@ -128,7 +135,7 @@ else: # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = '_static/logo.png' +html_logo = '_static/logo.svg' # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 @@ -140,19 +147,21 @@ html_favicon = '_static/favicon.ico' # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['borg_theme'] +html_extra_path = ['../src/borg/paperkey.html'] + # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. html_last_updated_fmt = '%Y-%m-%d' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +html_use_smartypants = True # Custom sidebar templates, maps document names to template names. html_sidebars = { - 'index': ['sidebarlogo.html', 'sidebarusefullinks.html', 'searchbox.html'], - '**': ['sidebarlogo.html', 'relations.html', 'searchbox.html', 'localtoc.html', 'sidebarusefullinks.html'] + '**': ['logo-text.html', 'searchbox.html', 'globaltoc.html'], } + # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} @@ -189,22 +198,22 @@ htmlhelp_basename = 'borgdoc' # -- Options for LaTeX output -------------------------------------------------- -# The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' - # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'Borg.tex', 'Borg Documentation', - 'see "AUTHORS" file', 'manual'), + ('book', 'Borg.tex', 'Borg Documentation', + 'The Borg Collective', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +latex_logo = '_static/logo.pdf' + +latex_elements = { + 'papersize': 'a4paper', + 'pointsize': '10pt', + 'figure_align': 'H', +} # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. @@ -214,13 +223,18 @@ latex_documents = [ #latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +latex_show_urls = 'footnote' # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. -#latex_appendices = [] +latex_appendices = [ + 'support', + 'resources', + 'changes', + 'authors', +] # If false, no module index is generated. #latex_domain_indices = True @@ -237,7 +251,13 @@ man_pages = [ 1), ] -extensions = ['sphinx.ext.extlinks', 'sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] +extensions = [ + 'sphinx.ext.extlinks', + 'sphinx.ext.autodoc', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.viewcode', +] extlinks = { 'issue': ('https://github.com/borgbackup/borg/issues/%s', '#'), diff --git a/docs/deployment.rst b/docs/deployment.rst index a29d09cd..7b1caf92 100644 --- a/docs/deployment.rst +++ b/docs/deployment.rst @@ -1,221 +1,14 @@ .. include:: global.rst.inc .. highlight:: none -.. _deployment: Deployment ========== -This chapter will give an example how to setup a borg repository server for multiple -clients. +This chapter details deployment strategies for the following scenarios. -Machines --------- +.. toctree:: + :titlesonly: -There are multiple machines used in this chapter and will further be named by their -respective fully qualified domain name (fqdn). - -* The backup server: `backup01.srv.local` -* The clients: - - - John Doe's desktop: `johndoe.clnt.local` - - Webserver 01: `web01.srv.local` - - Application server 01: `app01.srv.local` - -User and group --------------- - -The repository server needs to have only one UNIX user for all the clients. -Recommended user and group with additional settings: - -* User: `backup` -* Group: `backup` -* Shell: `/bin/bash` (or other capable to run the `borg serve` command) -* Home: `/home/backup` - -Most clients shall initiate a backup from the root user to catch all -users, groups and permissions (e.g. when backing up `/home`). - -Folders -------- - -The following folder tree layout is suggested on the repository server: - -* User home directory, /home/backup -* Repositories path (storage pool): /home/backup/repos -* Clients restricted paths (`/home/backup/repos/`): - - - johndoe.clnt.local: `/home/backup/repos/johndoe.clnt.local` - - web01.srv.local: `/home/backup/repos/web01.srv.local` - - app01.srv.local: `/home/backup/repos/app01.srv.local` - -Restrictions ------------- - -Borg is instructed to restrict clients into their own paths: -``borg serve --restrict-to-path /home/backup/repos/`` - -There is only one ssh key per client allowed. Keys are added for ``johndoe.clnt.local``, ``web01.srv.local`` and -``app01.srv.local``. But they will access the backup under only one UNIX user account as: -``backup@backup01.srv.local``. Every key in ``$HOME/.ssh/authorized_keys`` has a -forced command and restrictions applied as shown below: - -:: - - command="cd /home/backup/repos/; - borg serve --restrict-to-path /home/backup/repos/", - no-port-forwarding,no-X11-forwarding,no-pty, - no-agent-forwarding,no-user-rc - -.. note:: The text shown above needs to be written on a single line! - -The options which are added to the key will perform the following: - -1. Change working directory -2. Run ``borg serve`` restricted to the client base path -3. Restrict ssh and do not allow stuff which imposes a security risk - -Due to the ``cd`` command we use, the server automatically changes the current -working directory. Then client doesn't need to have knowledge of the absolute -or relative remote repository path and can directly access the repositories at -``@:``. - -.. note:: The setup above ignores all client given commandline parameters - which are normally appended to the `borg serve` command. - -Client ------- - -The client needs to initialize the `pictures` repository like this: - - borg init backup@backup01.srv.local:pictures - -Or with the full path (should actually never be used, as only for demonstrational purposes). -The server should automatically change the current working directory to the `` folder. - - borg init backup@backup01.srv.local:/home/backup/repos/johndoe.clnt.local/pictures - -When `johndoe.clnt.local` tries to access a not restricted path the following error is raised. -John Doe tries to backup into the Web 01 path: - - borg init backup@backup01.srv.local:/home/backup/repos/web01.srv.local/pictures - -:: - - ~~~ SNIP ~~~ - Remote: borg.remote.PathNotAllowed: /home/backup/repos/web01.srv.local/pictures - ~~~ SNIP ~~~ - Repository path not allowed - -Ansible -------- - -Ansible takes care of all the system-specific commands to add the user, create the -folder. Even when the configuration is changed the repository server configuration is -satisfied and reproducible. - -Automate setting up an repository server with the user, group, folders and -permissions a Ansible playbook could be used. Keep in mind the playbook -uses the Arch Linux `pacman `_ -package manager to install and keep borg up-to-date. - -:: - - - hosts: backup01.srv.local - vars: - user: backup - group: backup - home: /home/backup - pool: "{{ home }}/repos" - auth_users: - - host: johndoe.clnt.local - key: "{{ lookup('file', '/path/to/keys/johndoe.clnt.local.pub') }}" - - host: web01.clnt.local - key: "{{ lookup('file', '/path/to/keys/web01.clnt.local.pub') }}" - - host: app01.clnt.local - key: "{{ lookup('file', '/path/to/keys/app01.clnt.local.pub') }}" - tasks: - - pacman: name=borg state=latest update_cache=yes - - group: name="{{ group }}" state=present - - user: name="{{ user }}" shell=/bin/bash home="{{ home }}" createhome=yes group="{{ group }}" groups= state=present - - file: path="{{ home }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory - - file: path="{{ home }}/.ssh" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory - - file: path="{{ pool }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory - - authorized_key: user="{{ user }}" - key="{{ item.key }}" - key_options='command="cd {{ pool }}/{{ item.host }};borg serve --restrict-to-path {{ pool }}/{{ item.host }}",no-port-forwarding,no-X11-forwarding,no-pty,no-agent-forwarding,no-user-rc' - with_items: auth_users - - file: path="{{ home }}/.ssh/authorized_keys" owner="{{ user }}" group="{{ group }}" mode=0600 state=file - - file: path="{{ pool }}/{{ item.host }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory - with_items: auth_users - -Salt ----- - -This is a configuration similar to the one above, configured to be deployed with -Salt running on a Debian system. - -:: - - Install borg backup from pip: - pkg.installed: - - pkgs: - - python3 - - python3-dev - - python3-pip - - python-virtualenv - - libssl-dev - - openssl - - libacl1-dev - - libacl1 - - liblz4-dev - - liblz4-1 - - build-essential - - libfuse-dev - - fuse - - pkg-config - pip.installed: - - pkgs: ["borgbackup"] - - bin_env: /usr/bin/pip3 - - Setup backup user: - user.present: - - name: backup - - fullname: Backup User - - home: /home/backup - - shell: /bin/bash - # CAUTION! - # If you change the ssh command= option below, it won't necessarily get pushed to the backup - # server correctly unless you delete the ~/.ssh/authorized_keys file and re-create it! - {% for host in backupclients %} - Give backup access to {{host}}: - ssh_auth.present: - - user: backup - - source: salt://conf/ssh-pubkeys/{{host}}-backup.id_ecdsa.pub - - options: - - command="cd /home/backup/repos/{{host}}; borg serve --restrict-to-path /home/backup/repos/{{host}}" - - no-port-forwarding - - no-X11-forwarding - - no-pty - - no-agent-forwarding - - no-user-rc - {% endfor %} - - -Enhancements ------------- - -As this chapter only describes a simple and effective setup it could be further -enhanced when supporting (a limited set) of client supplied commands. A wrapper -for starting `borg serve` could be written. Or borg itself could be enhanced to -autodetect it runs under SSH by checking the `SSH_ORIGINAL_COMMAND` environment -variable. This is left open for future improvements. - -When extending ssh autodetection in borg no external wrapper script is necessary -and no other interpreter or application has to be deployed. - -See also --------- - -* `SSH Daemon manpage `_ -* `Ansible `_ -* `Salt `_ + deployment/central-backup-server + deployment/hosting-repositories + deployment/automated-local diff --git a/docs/deployment/automated-local.rst b/docs/deployment/automated-local.rst new file mode 100644 index 00000000..a64cec23 --- /dev/null +++ b/docs/deployment/automated-local.rst @@ -0,0 +1,231 @@ +.. include:: ../global.rst.inc +.. highlight:: none + +Automated backups to a local hard drive +======================================= + +This guide shows how to automate backups to a hard drive directly connected +to your computer. If a backup hard drive is connected, backups are automatically +started, and the drive shut-down and disconnected when they are done. + +This guide is written for a Linux-based operating system and makes use of +systemd and udev. + +Overview +-------- + +An udev rule is created to trigger on the addition of block devices. The rule contains a tag +that triggers systemd to start a oneshot service. The oneshot service executes a script in +the standard systemd service environment, which automatically captures stdout/stderr and +logs it to the journal. + +The script mounts the added block device, if it is a registered backup drive, and creates +backups on it. When done, it optionally unmounts the file system and spins the drive down, +so that it may be physically disconnected. + +Configuring the system +---------------------- + +First, create the ``/etc/backups`` directory (as root). +All configuration goes into this directory. + +Then, create ``etc/backups/40-backup.rules`` with the following content (all on one line):: + + ACTION=="add", SUBSYSTEM=="bdi", DEVPATH=="/devices/virtual/bdi/*", + TAG+="systemd", ENV{SYSTEMD_WANTS}="automatic-backup.service" + +.. topic:: Finding a more precise udev rule + + If you always connect the drive(s) to the same physical hardware path, e.g. the same + eSATA port, then you can make a more precise udev rule. + + Execute ``udevadm monitor`` and connect a drive to the port you intend to use. + You should see a flurry of events, find those regarding the `block` subsystem. + Pick the event whose device path ends in something similar to a device file name, + typically`sdX/sdXY`. Use the event's device path and replace `sdX/sdXY` after the + `/block/` part in the path with a star (\*). For example: + `DEVPATH=="/devices/pci0000:00/0000:00:11.0/ata3/host2/target2:0:0/2:0:0:0/block/*"`. + + Reboot a few times to ensure that the hardware path does not change: on some motherboards + components of it can be random. In these cases you cannot use a more accurate rule, + or need to insert additional stars for matching the path. + +The "systemd" tag in conjunction with the SYSTEMD_WANTS environment variable has systemd +launch the "automatic-backup" service, which we will create next, as the +``/etc/backups/automatic-backup.service`` file: + +.. code-block:: ini + + [Service] + Type=oneshot + ExecStart=/etc/backups/run.sh + +Now, create the main backup script, ``/etc/backups/run.sh``. Below is a template, +modify it to suit your needs (e.g. more backup sets, dumping databases etc.). + +.. code-block:: bash + + #!/bin/bash -ue + + # The udev rule is not terribly accurate and may trigger our service before + # the kernel has finished probing partitions. Sleep for a bit to ensure + # the kernel is done. + # + # This can be avoided by using a more precise udev rule, e.g. matching + # a specific hardware path and partition. + sleep 5 + + # + # Script configuration + # + + # The backup partition is mounted there + MOUNTPOINT=/mnt/backup + + # This is the location of the Borg repository + TARGET=$MOUNTPOINT/borg-backups/backup.borg + + # Archive name schema + DATE=$(date --iso-8601)-$(hostname) + + # This is the file that will later contain UUIDs of registered backup drives + DISKS=/etc/backups/backup.disks + + # Find whether the connected block device is a backup drive + for uuid in $(lsblk --noheadings --list --output uuid) + do + if grep --quiet --fixed-strings $uuid $DISKS; then + break + fi + uuid= + done + + if [ ! $uuid ]; then + echo "No backup disk found, exiting" + exit 0 + fi + + echo "Disk $uuid is a backup disk" + partition_path=/dev/disk/by-uuid/$uuid + # Mount file system if not already done. This assumes that if something is already + # mounted at $MOUNTPOINT, it is the backup drive. It won't find the drive if + # it was mounted somewhere else. + (mount | grep $MOUNTPOINT) || mount $partition_path $MOUNTPOINT + drive=$(lsblk --inverse --noheadings --list --paths --output name $partition_path | head --lines 1) + echo "Drive path: $drive" + + # + # Create backups + # + + # Options for borg create + BORG_OPTS="--stats --one-file-system --compression lz4 --checkpoint-interval 86400" + + # Set BORG_PASSPHRASE or BORG_PASSCOMMAND somewhere around here, using export, + # if encryption is used. + + # No one can answer if Borg asks these questions, it is better to just fail quickly + # instead of hanging. + export BORG_RELOCATED_REPO_ACCESS_IS_OK=no + export BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=no + + # Log Borg version + borg --version + + echo "Starting backup for $DATE" + + # This is just an example, change it however you see fit + borg create $BORG_OPTS \ + --exclude /root/.cache \ + --exclude /var/cache \ + --exclude /var/lib/docker/devicemapper \ + $TARGET::$DATE-$$-system \ + / /boot + + # /home is often a separate partition / file system. + # Even if it isn't (add --exclude /home above), it probably makes sense + # to have /home in a separate archive. + borg create $BORG_OPTS \ + --exclude 'sh:/home/*/.cache' \ + $TARGET::$DATE-$$-home \ + /home/ + + echo "Completed backup for $DATE" + + # Just to be completely paranoid + sync + + if [ -f /etc/backups/autoeject ]; then + umount $MOUNTPOINT + hdparm -Y $drive + fi + + if [ -f /etc/backups/backup-suspend ]; then + systemctl suspend + fi + +Create the ``/etc/backups/autoeject`` file to have the script automatically eject the drive +after creating the backup. Rename the file to something else (e.g. ``/etc/backup/autoeject-no``) +when you want to do something with the drive after creating backups (e.g running check). + +Create the ``/etc/backups/backup-suspend`` file if the machine should suspend after completing +the backup. Don't forget to physically disconnect the device before resuming, +otherwise you'll enter a cycle. You can also add an option to power down instead. + +Create an empty ``/etc/backups/backup.disks`` file, you'll register your backup drives +there. + +The last part is to actually enable the udev rules and services: + +.. code-block:: bash + + ln -s /etc/backups/40-backup.rules /etc/udev/rules.d/40-backup.rules + ln -s /etc/backups/automatic-backup.service /etc/systemd/system/automatic-backup.service + systemctl daemon-reload + udevadm control --reload + +Adding backup hard drives +------------------------- + +Connect your backup hard drive. Format it, if not done already. +Find the UUID of the file system that backups should be stored on:: + + lsblk -o+uuid,label + +Note the UUID into the ``/etc/backup/backup.disks`` file. + +Mount the drive to /mnt/backup. + +Initialize a Borg repository at the location indicated by ``TARGET``:: + + borg init --encryption ... /mnt/backup/borg-backups/backup.borg + +Unmount and reconnect the drive, or manually start the ``automatic-backup`` service +to start the first backup:: + + systemctl start --no-block automatic-backup + +See backup logs using journalctl:: + + journalctl -fu automatic-backup [-n number-of-lines] + +Security considerations +----------------------- + +The script as shown above will mount any file system with an UUID listed in +``/etc/backup/backup.disks``. The UUID check is a safety / annoyance-reduction +mechanism to keep the script from blowing up whenever a random USB thumb drive is connected. +It is not meant as a security mechanism. Mounting file systems and reading repository +data exposes additional attack surfaces (kernel file system drivers, +possibly user space services and Borg itself). On the other hand, someone +standing right next to your computer can attempt a lot of attacks, most of which +are easier to do than e.g. exploiting file systems (installing a physical key logger, +DMA attacks, stealing the machine, ...). + +Borg ensures that backups are not created on random drives that "just happen" +to contain a Borg repository. If an unknown unencrypted repository is encountered, +then the script aborts (BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=no). + +Backups are only created on hard drives that contain a Borg repository that is +either known (by ID) to your machine or you are using encryption and the +passphrase of the repository has to match the passphrase supplied to Borg. diff --git a/docs/deployment/central-backup-server.rst b/docs/deployment/central-backup-server.rst new file mode 100644 index 00000000..26e71898 --- /dev/null +++ b/docs/deployment/central-backup-server.rst @@ -0,0 +1,221 @@ +.. include:: ../global.rst.inc +.. highlight:: none + +Central repository server with Ansible or Salt +============================================== + +This section will give an example how to setup a borg repository server for multiple +clients. + +Machines +-------- + +There are multiple machines used in this section and will further be named by their +respective fully qualified domain name (fqdn). + +* The backup server: `backup01.srv.local` +* The clients: + + - John Doe's desktop: `johndoe.clnt.local` + - Webserver 01: `web01.srv.local` + - Application server 01: `app01.srv.local` + +User and group +-------------- + +The repository server needs to have only one UNIX user for all the clients. +Recommended user and group with additional settings: + +* User: `backup` +* Group: `backup` +* Shell: `/bin/bash` (or other capable to run the `borg serve` command) +* Home: `/home/backup` + +Most clients shall initiate a backup from the root user to catch all +users, groups and permissions (e.g. when backing up `/home`). + +Folders +------- + +The following folder tree layout is suggested on the repository server: + +* User home directory, /home/backup +* Repositories path (storage pool): /home/backup/repos +* Clients restricted paths (`/home/backup/repos/`): + + - johndoe.clnt.local: `/home/backup/repos/johndoe.clnt.local` + - web01.srv.local: `/home/backup/repos/web01.srv.local` + - app01.srv.local: `/home/backup/repos/app01.srv.local` + +Restrictions +------------ + +Borg is instructed to restrict clients into their own paths: +``borg serve --restrict-to-path /home/backup/repos/`` + +The client will be able to access any file or subdirectory inside of ``/home/backup/repos/`` +but no other directories. You can allow a client to access several separate directories by passing multiple +``--restrict-to-path`` flags, for instance: ``borg serve --restrict-to-path /home/backup/repos/ --restrict-to-path /home/backup/repos/``, +which could make sense if multiple machines belong to one person which should then have access to all the +backups of their machines. + +There is only one ssh key per client allowed. Keys are added for ``johndoe.clnt.local``, ``web01.srv.local`` and +``app01.srv.local``. But they will access the backup under only one UNIX user account as: +``backup@backup01.srv.local``. Every key in ``$HOME/.ssh/authorized_keys`` has a +forced command and restrictions applied as shown below: + +:: + + command="cd /home/backup/repos/; + borg serve --restrict-to-path /home/backup/repos/", + restrict + +.. note:: The text shown above needs to be written on a single line! + +The options which are added to the key will perform the following: + +1. Change working directory +2. Run ``borg serve`` restricted to the client base path +3. Restrict ssh and do not allow stuff which imposes a security risk + +Due to the ``cd`` command we use, the server automatically changes the current +working directory. Then client doesn't need to have knowledge of the absolute +or relative remote repository path and can directly access the repositories at +``@:``. + +.. note:: The setup above ignores all client given commandline parameters + which are normally appended to the `borg serve` command. + +Client +------ + +The client needs to initialize the `pictures` repository like this: + + borg init backup@backup01.srv.local:pictures + +Or with the full path (should actually never be used, as only for demonstrational purposes). +The server should automatically change the current working directory to the `` folder. + + borg init backup@backup01.srv.local:/home/backup/repos/johndoe.clnt.local/pictures + +When `johndoe.clnt.local` tries to access a not restricted path the following error is raised. +John Doe tries to backup into the Web 01 path: + + borg init backup@backup01.srv.local:/home/backup/repos/web01.srv.local/pictures + +:: + + ~~~ SNIP ~~~ + Remote: borg.remote.PathNotAllowed: /home/backup/repos/web01.srv.local/pictures + ~~~ SNIP ~~~ + Repository path not allowed + +Ansible +------- + +Ansible takes care of all the system-specific commands to add the user, create the +folder. Even when the configuration is changed the repository server configuration is +satisfied and reproducible. + +Automate setting up an repository server with the user, group, folders and +permissions a Ansible playbook could be used. Keep in mind the playbook +uses the Arch Linux `pacman `_ +package manager to install and keep borg up-to-date. + +:: + + - hosts: backup01.srv.local + vars: + user: backup + group: backup + home: /home/backup + pool: "{{ home }}/repos" + auth_users: + - host: johndoe.clnt.local + key: "{{ lookup('file', '/path/to/keys/johndoe.clnt.local.pub') }}" + - host: web01.clnt.local + key: "{{ lookup('file', '/path/to/keys/web01.clnt.local.pub') }}" + - host: app01.clnt.local + key: "{{ lookup('file', '/path/to/keys/app01.clnt.local.pub') }}" + tasks: + - pacman: name=borg state=latest update_cache=yes + - group: name="{{ group }}" state=present + - user: name="{{ user }}" shell=/bin/bash home="{{ home }}" createhome=yes group="{{ group }}" groups= state=present + - file: path="{{ home }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory + - file: path="{{ home }}/.ssh" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory + - file: path="{{ pool }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory + - authorized_key: user="{{ user }}" + key="{{ item.key }}" + key_options='command="cd {{ pool }}/{{ item.host }};borg serve --restrict-to-path {{ pool }}/{{ item.host }}",restrict' + with_items: "{{ auth_users }}" + - file: path="{{ home }}/.ssh/authorized_keys" owner="{{ user }}" group="{{ group }}" mode=0600 state=file + - file: path="{{ pool }}/{{ item.host }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory + with_items: "{{ auth_users }}" + +Salt +---- + +This is a configuration similar to the one above, configured to be deployed with +Salt running on a Debian system. + +:: + + Install borg backup from pip: + pkg.installed: + - pkgs: + - python3 + - python3-dev + - python3-pip + - python-virtualenv + - libssl-dev + - openssl + - libacl1-dev + - libacl1 + - liblz4-dev + - liblz4-1 + - build-essential + - libfuse-dev + - fuse + - pkg-config + pip.installed: + - pkgs: ["borgbackup"] + - bin_env: /usr/bin/pip3 + + Setup backup user: + user.present: + - name: backup + - fullname: Backup User + - home: /home/backup + - shell: /bin/bash + # CAUTION! + # If you change the ssh command= option below, it won't necessarily get pushed to the backup + # server correctly unless you delete the ~/.ssh/authorized_keys file and re-create it! + {% for host in backupclients %} + Give backup access to {{host}}: + ssh_auth.present: + - user: backup + - source: salt://conf/ssh-pubkeys/{{host}}-backup.id_ecdsa.pub + - options: + - command="cd /home/backup/repos/{{host}}; borg serve --restrict-to-path /home/backup/repos/{{host}}" + - restrict + {% endfor %} + + +Enhancements +------------ + +As this section only describes a simple and effective setup it could be further +enhanced when supporting (a limited set) of client supplied commands. A wrapper +for starting `borg serve` could be written. Or borg itself could be enhanced to +autodetect it runs under SSH by checking the `SSH_ORIGINAL_COMMAND` environment +variable. This is left open for future improvements. + +When extending ssh autodetection in borg no external wrapper script is necessary +and no other interpreter or application has to be deployed. + +See also +-------- + +* `SSH Daemon manpage `_ +* `Ansible `_ +* `Salt `_ diff --git a/docs/deployment/hosting-repositories.rst b/docs/deployment/hosting-repositories.rst new file mode 100644 index 00000000..6e3161ee --- /dev/null +++ b/docs/deployment/hosting-repositories.rst @@ -0,0 +1,73 @@ +.. include:: ../global.rst.inc +.. highlight:: none + +Hosting repositories +==================== + +This sections shows how to securely provide repository storage for users. +Optionally, each user can have a storage quota. + +Repositories are accessed through SSH. Each user of the service should +have her own login which is only able to access the user's files. +Technically it would be possible to have multiple users share one login, +however, separating them is better. Separate logins increase isolation +and are thus an additional layer of security and safety for both the +provider and the users. + +For example, if a user manages to breach ``borg serve`` then she can +only damage her own data (assuming that the system does not have further +vulnerabilities). + +Use the standard directory structure of the operating system. Each user +is assigned a home directory and repositories of the user reside in her +home directory. + +The following ``~user/.ssh/authorized_keys`` file is the most important +piece for a correct deployment. It allows the user to login via +their public key (which must be provided by the user), and restricts +SSH access to safe operations only. + +:: + + command="borg serve --restrict-to-repository /home//repository",restrict + + +.. note:: The text shown above needs to be written on a **single** line! + +.. warning:: + + If this file should be automatically updated (e.g. by a web console), + pay **utmost attention** to sanitizing user input. Strip all whitespace + around the user-supplied key, ensure that it **only** contains ASCII + with no control characters and that it consists of three parts separated + by a single space. Ensure that no newlines are contained within the key. + +The `restrict` keyword enables all restrictions, i.e. disables port, agent +and X11 forwarding, as well as disabling PTY allocation and execution of ~/.ssh/rc. +If any future restriction capabilities are added to authorized_keys +files they will be included in this set. + +The `command` keyword forces execution of the specified command line +upon login. This must be ``borg serve``. The `--restrict-to-repository` +option permits access to exactly **one** repository. It can be given +multiple times to permit access to more than one repository. + +The repository may not exist yet; it can be initialized by the user, +which allows for encryption. + +**Storage quotas** can be enabled by adding the ``--storage-quota`` option +to the ``borg serve`` command line:: + + restrict,command="borg serve --storage-quota 20G ..." ... + +The storage quotas of repositories are completely independent. If a +client is able to access multiple repositories, each repository +can be filled to the specified quota. + +If storage quotas are used, ensure that all deployed Borg releases +support storage quotas. + +Refer to :ref:`internals_storage_quota` for more details on storage quotas. + +Refer to the `sshd(8) `_ +man page for more details on SSH options. diff --git a/docs/development.rst b/docs/development.rst index 77dcb8b8..98e8d162 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -10,6 +10,115 @@ This chapter will get you started with |project_name| development. |project_name| is written in Python (with a little bit of Cython and C for the performance critical parts). +Contributions +------------- + +... are welcome! + +Some guidance for contributors: + +- discuss changes on the GitHub issue tracker, on IRC or on the mailing list + +- make your PRs on the ``master`` branch (see `Branching Model`_ for details) + +- do clean changesets: + + - focus on some topic, resist changing anything else. + - do not do style changes mixed with functional changes. + - try to avoid refactorings mixed with functional changes. + - if you need to fix something after commit/push: + + - if there are ongoing reviews: do a fixup commit you can + squash into the bad commit later. + - if there are no ongoing reviews or you did not push the + bad commit yet: amend the commit to include your fix or + merge the fixup commit before pushing. + - have a nice, clear, typo-free commit comment + - if you fixed an issue, refer to it in your commit comment + - follow the style guide (see below) + +- if you write new code, please add tests and docs for it + +- run the tests, fix any issues that come up + +- make a pull request on GitHub + +- wait for review by other developers + +Branching model +--------------- + +Borg development happens on the ``master`` branch and uses GitHub pull +requests (if you don't have GitHub or don't want to use it you can +send smaller patches via the borgbackup :ref:`mailing_list` to the maintainers). + +Stable releases are maintained on maintenance branches named ``x.y-maint``, eg. +the maintenance branch of the 1.0.x series is ``1.0-maint``. + +Most PRs should be filed against the ``master`` branch. Only if an +issue affects **only** a particular maintenance branch a PR should be +filed against it directly. + +While discussing / reviewing a PR it will be decided whether the +change should be applied to maintenance branches. Each maintenance +branch has a corresponding *backport/x.y-maint* label, which will then +be applied. + +Changes that are typically considered for backporting: + +- Data loss, corruption and inaccessibility fixes +- Security fixes +- Forward-compatibility improvements +- Documentation corrections + +.. rubric:: Maintainer part + +From time to time a maintainer will backport the changes for a +maintenance branch, typically before a release or if enough changes +were collected: + +1. Notify others that you're doing this to avoid duplicate work. +2. Branch a backporting branch off the maintenance branch. +3. Cherry pick and backport the changes from each labelled PR, remove + the label for each PR you've backported. + + To preserve authorship metadata, do not follow the ``git cherry-pick`` + instructions to use ``git commit`` after resolving conflicts. Instead, + stage conflict resolutions and run ``git cherry-pick --continue``, + much like using ``git rebase``. + + To avoid merge issues (a cherry pick is a form of merge), use + these options (similar to the ``git merge`` options used previously, + the ``-x`` option adds a reference to the original commit):: + + git cherry-pick --strategy recursive -X rename-threshold=5% -x + +4. Make a PR of the backporting branch against the maintenance branch + for backport review. Mention the backported PRs in this PR, e.g.: + + Includes changes from #2055 #2057 #2381 + + This way GitHub will automatically show in these PRs where they + were backported. + +.. rubric:: Historic model + +Previously (until release 1.0.10) Borg used a `"merge upwards" +`_ model where +most minor changes and fixes where committed to a maintenance branch +(eg. 1.0-maint), and the maintenance branch(es) were regularly merged +back into the main development branch. This became more and more +troublesome due to merges growing more conflict-heavy and error-prone. + +Code and issues +--------------- + +Code is stored on GitHub, in the `Borgbackup organization +`_. `Issues +`_ and `pull requests +`_ should be sent there as +well. See also the :ref:`support` section for more details. + Style guide ----------- @@ -19,6 +128,17 @@ instead of 79. We do *not* use form-feed (``^L``) characters to separate sections either. Compliance is tested automatically when you run the tests. +Continuous Integration +---------------------- + +All pull requests go through Travis-CI_, which runs the tests on Linux +and Mac OS X as well as the flake8 style checker. Windows builds run on AppVeyor_, +while additional Unix-like platforms are tested on Golem_. + +.. _AppVeyor: https://ci.appveyor.com/project/borgbackup/borg/ +.. _Golem: https://golem.enkore.de/view/Borg/ +.. _Travis-CI: https://travis-ci.org/borgbackup/borg + Output and Logging ------------------ When writing logger calls, always use correct log level (debug only for @@ -76,7 +196,7 @@ Some more advanced examples:: # verify a changed tox.ini (run this after any change to tox.ini): fakeroot -u tox --recreate - fakeroot -u tox -e py34 # run all tests, but only on python 3.4 + fakeroot -u tox -e py35 # run all tests, but only on python 3.5 fakeroot -u tox borg.testsuite.locking # only run 1 test module @@ -92,31 +212,52 @@ As tox doesn't run on Windows you have to manually run command:: py.test --cov=borg --cov-config=.coveragerc --benchmark-skip --pyargs borg/testsuite -Regenerate usage files ----------------------- +Running more checks using coala +------------------------------- -Usage and API documentation is currently committed directly to git, -although those files are generated automatically from the source -tree. +First install coala and some checkers ("bears"): -When a new module is added, the ``docs/api.rst`` file needs to be -regenerated:: + pip install -r requirements.d/coala.txt - ./setup.py build_api +You can now run coala from the toplevel directory; it will read its settings +from ``.coafile`` there: -When a command is added, a commandline flag changed, added or removed, + coala + +Some bears have additional requirements and they usually tell you about +them in case they are missing. + +Documentation +------------- + +Generated files +~~~~~~~~~~~~~~~ + +Usage documentation (found in ``docs/usage/``) and man pages +(``docs/man/``) are generated automatically from the command line +parsers declared in the program and their documentation, which is +embedded in the program (see archiver.py). These are committed to git +for easier use by packagers downstream. + +When a command is added, a command line flag changed, added or removed, the usage docs need to be rebuilt as well:: - ./setup.py build_usage + python setup.py build_usage + python setup.py build_man + +However, we prefer to do this as part of our :ref:`releasing` +preparations, so it is generally not necessary to update these when +submitting patches that change something about the command line. Building the docs with Sphinx ------------------------------ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The documentation (in reStructuredText format, .rst) is in docs/. -To build the html version of it, you need to have sphinx installed:: +To build the html version of it, you need to have Sphinx installed +(in your Borg virtualenv with Python 3):: - pip3 install sphinx # important: this will install sphinx with Python 3 + pip install -r requirements.d/docs.txt Now run:: @@ -125,7 +266,7 @@ Now run:: Then point a web browser at docs/_build/html/index.html. -The website is updated automatically through Github web hooks on the +The website is updated automatically by ReadTheDocs through GitHub web hooks on the main repository. Using Vagrant @@ -137,10 +278,14 @@ standalone binaries for various platforms. For better security, there is no automatic sync in the VM to host direction. The plugin `vagrant-scp` is useful to copy stuff from the VMs to the host. +The "windows10" box requires the `reload` plugin (``vagrant plugin install vagrant-reload``). + Usage:: # To create and provision the VM: vagrant up OS + # same, but use 6 VM cpus and 12 workers for pytest: + VMCPUS=6 XDISTN=12 vagrant up OS # To create an ssh session to the VM: vagrant ssh OS # To execute a command via ssh in the VM: @@ -175,6 +320,8 @@ On Windows use `python buildwin32.py` to build standalone executable in `win32ex with all necessary files to run. +.. _releasing: + Creating a new release ---------------------- @@ -187,25 +334,36 @@ Checklist: - update ``CHANGES.rst``, based on ``git log $PREVIOUS_RELEASE..`` - check version number of upcoming release in ``CHANGES.rst`` - verify that ``MANIFEST.in`` and ``setup.py`` are complete -- ``python setup.py build_api ; python setup.py build_usage`` and commit +- ``python setup.py build_usage ; python setup.py build_man`` and + commit (be sure to build with Python 3.5 as Python 3.6 added `more + guaranteed hashing algorithms + `_) - tag the release:: git tag -s -m "tagged/signed release X.Y.Z" X.Y.Z +- create a clean repo and use it for the following steps:: + + git clone borg borg-clean + + This makes sure no uncommitted files get into the release archive. + It will also reveal uncommitted required files. + Moreover, it makes sure the vagrant machines only get committed files and + do a fresh start based on that. - run tox and/or binary builds on all supported platforms via vagrant, check for test failures - create a release on PyPi:: python setup.py register sdist upload --identity="Thomas Waldmann" --sign -- close release milestone on Github +- close the release milestone on GitHub - announce on: - Mailing list - - Twitter (follow @ThomasJWaldmann for these tweets) + - Twitter - IRC channel (change ``/topic``) -- create a Github release, include: +- create a GitHub release, include: * standalone binaries (see above for how to create them) diff --git a/docs/faq.rst b/docs/faq.rst index 3eee339a..442df6f6 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -5,6 +5,9 @@ Frequently asked questions ========================== +Usage & Limitations +################### + Can I backup VM disk images? ---------------------------- @@ -12,6 +15,39 @@ Yes, the `deduplication`_ technique used by |project_name| makes sure only the modified parts of the file are stored. Also, we have optional simple sparse file support for extract. +If you use non-snapshotting backup tools like Borg to back up virtual machines, +then the VMs should be turned off for the duration of the backup. Backing up live VMs can (and will) +result in corrupted or inconsistent backup contents: a VM image is just a regular file to +Borg with the same issues as regular files when it comes to concurrent reading and writing from +the same file. + +For backing up live VMs use file system snapshots on the VM host, which establishes +crash-consistency for the VM images. This means that with most file systems +(that are journaling) the FS will always be fine in the backup (but may need a +journal replay to become accessible). + +Usually this does not mean that file *contents* on the VM are consistent, since file +contents are normally not journaled. Notable exceptions are ext4 in data=journal mode, +ZFS and btrfs (unless nodatacow is used). + +Applications designed with crash-consistency in mind (most relational databases +like PostgreSQL, SQLite etc. but also for example Borg repositories) should always +be able to recover to a consistent state from a backup created with +crash-consistent snapshots (even on ext4 with data=writeback or XFS). + +Hypervisor snapshots capturing most of the VM's state can also be used for backups +and can be a better alternative to pure file system based snapshots of the VM's disk, +since no state is lost. Depending on the application this can be the easiest and most +reliable way to create application-consistent backups. + +Other applications may require a lot of work to reach application-consistency: +It's a broad and complex issue that cannot be explained in entirety here. + +Borg doesn't intend to address these issues due to their huge complexity +and platform/software dependency. Combining Borg with the mechanisms provided +by the platform (snapshots, hypervisor features) will be the best approach +to start tackling them. + Can I backup from multiple servers into a single repository? ------------------------------------------------------------ @@ -31,7 +67,7 @@ Can I copy or synchronize my repo to another location? ------------------------------------------------------ Yes, you could just copy all the files. Make sure you do that while no -backup is running. So what you get here is this: +backup is running (use `borg with-lock ...`). So what you get here is this: - client machine ---borg create---> repo1 - repo1 ---copy---> repo2 @@ -47,25 +83,6 @@ If you want to have 2 independent backups, it is better to do it like this: - client machine ---borg create---> repo1 - client machine ---borg create---> repo2 -Which file types, attributes, etc. are preserved? -------------------------------------------------- - - * Directories - * Regular files - * Hardlinks (considering all files in the same archive) - * Symlinks (stored as symlink, the symlink is not followed) - * Character and block device files - * FIFOs ("named pipes") - * Name - * Contents - * Timestamps in nanosecond precision: mtime, atime, ctime - * IDs of owning user and owning group - * Names of owning user and owning group (if the IDs can be resolved) - * Unix Mode/Permissions (u/g/o permissions, suid, sgid, sticky) - * Extended Attributes (xattrs) on Linux, OS X and FreeBSD - * Access Control Lists (ACL_) on Linux, OS X and FreeBSD - * BSD flags on OS X and FreeBSD - Which file types, attributes, etc. are *not* preserved? ------------------------------------------------------- @@ -79,34 +96,215 @@ Which file types, attributes, etc. are *not* preserved? backed up as (deduplicated and compressed) runs of zero bytes. Archive extraction has optional support to extract all-zero chunks as holes in a sparse file. - * filesystem specific attributes, like ext4 immutable bit, see :issue:`618`. + * Some filesystem specific attributes, like btrfs NOCOW, see :ref:`platforms`. + * For hardlinked symlinks, the hardlinking can not be archived (and thus, + the hardlinking will not be done at extraction time). The symlinks will + be archived and extracted as non-hardlinked symlinks, see :issue:`2379`. -Why is my backup bigger than with attic? Why doesn't |project_name| do compression by default? ----------------------------------------------------------------------------------------------- +Are there other known limitations? +---------------------------------- -Attic was rather unflexible when it comes to compression, it always -compressed using zlib level 6 (no way to switch compression off or -adjust the level or algorithm). +- A single archive can only reference a limited volume of file/dir metadata, + usually corresponding to tens or hundreds of millions of files/dirs. + When trying to go beyond that limit, you will get a fatal IntegrityError + exception telling that the (archive) object is too big. + An easy workaround is to create multiple archives with less items each. + See also the :ref:`archive_limitation` and :issue:`1452`. -|project_name| offers a lot of different compression algorithms and -levels. Which of them is the best for you pretty much depends on your -use case, your data, your hardware -- so you need to do an informed -decision about whether you want to use compression, which algorithm -and which level you want to use. This is why compression defaults to -none. + :ref:`borg_info` shows how large (relative to the maximum size) existing + archives are. + +.. _checkpoints_parts: + +If a backup stops mid-way, does the already-backed-up data stay there? +---------------------------------------------------------------------- + +Yes, |project_name| supports resuming backups. + +During a backup a special checkpoint archive named ``.checkpoint`` +is saved every checkpoint interval (the default value for this is 30 +minutes) containing all the data backed-up until that point. + +This checkpoint archive is a valid archive, +but it is only a partial backup (not all files that you wanted to backup are +contained in it). Having it in the repo until a successful, full backup is +completed is useful because it references all the transmitted chunks up +to the checkpoint. This means that in case of an interruption, you only need to +retransfer the data since the last checkpoint. + +If a backup was interrupted, you do not need to do any special considerations, +just invoke ``borg create`` as you always do. You may use the same archive name +as in previous attempt or a different one (e.g. if you always include the current +datetime), it does not matter. + +|project_name| always does full single-pass backups, so it will start again +from the beginning - but it will be much faster, because some of the data was +already stored into the repo (and is still referenced by the checkpoint +archive), so it does not need to get transmitted and stored again. + +Once your backup has finished successfully, you can delete all +``.checkpoint`` archives. If you run ``borg prune``, it will +also care for deleting unneeded checkpoints. + +Note: the checkpointing mechanism creates hidden, partial files in an archive, +so that checkpoints even work while a big file is being processed. +They are named ``.borg_part_`` and all operations usually ignore +these files, but you can make them considered by giving the option +``--consider-part-files``. You usually only need that option if you are +really desperate (e.g. if you have no completed backup of that file and you'ld +rather get a partial file extracted than nothing). You do **not** want to give +that option under any normal circumstances. + +How can I backup huge file(s) over a unstable connection? +--------------------------------------------------------- + +This is not a problem anymore. + +For more details, see :ref:`checkpoints_parts`. + +How can I restore huge file(s) over an unstable connection? +----------------------------------------------------------- + +If you cannot manage to extract the whole big file in one go, you can extract +all the part files and manually concatenate them together. + +For more details, see :ref:`checkpoints_parts`. + +Can |project_name| add redundancy to the backup data to deal with hardware malfunction? +--------------------------------------------------------------------------------------- + +No, it can't. While that at first sounds like a good idea to defend against +some defect HDD sectors or SSD flash blocks, dealing with this in a +reliable way needs a lot of low-level storage layout information and +control which we do not have (and also can't get, even if we wanted). + +So, if you need that, consider RAID or a filesystem that offers redundant +storage or just make backups to different locations / different hardware. + +See also :issue:`225`. + +Can |project_name| verify data integrity of a backup archive? +------------------------------------------------------------- + +Yes, if you want to detect accidental data damage (like bit rot), use the +``check`` operation. It will notice corruption using CRCs and hashes. +If you want to be able to detect malicious tampering also, use an encrypted +repo. It will then be able to check using CRCs and HMACs. + +Can I use Borg on SMR hard drives? +---------------------------------- + +SMR (shingled magnetic recording) hard drives are very different from +regular hard drives. Applications have to behave in certain ways or +performance will be heavily degraded. + +Borg 1.1 ships with default settings suitable for SMR drives, +and has been successfully tested on *Seagate Archive v2* drives +using the ext4 file system. + +Some Linux kernel versions between 3.19 and 4.5 had various bugs +handling device-managed SMR drives, leading to IO errors, unresponsive +drives and unreliable operation in general. + +For more details, refer to :issue:`2252`. + +.. _faq-integrityerror: + +I get an IntegrityError or similar - what now? +---------------------------------------------- + +A single error does not necessarily indicate bad hardware or a Borg +bug. All hardware exhibits a bit error rate (BER). Hard drives are typically +specified as exhibiting less than one error every 12 to 120 TB +(one bit error in 10e14 to 10e15 bits). The specification is often called +*unrecoverable read error rate* (URE rate). + +Apart from these very rare errors there are two main causes of errors: + +(i) Defective hardware: described below. +(ii) Bugs in software (Borg, operating system, libraries): + Ensure software is up to date. + Check whether the issue is caused by any fixed bugs described in :ref:`important_notes`. + + +.. rubric:: Finding defective hardware + +.. note:: + + Hardware diagnostics are operating system dependent and do not + apply universally. The commands shown apply for popular Unix-like + systems. Refer to your operating system's manual. + +Checking hard drives + Find the drive containing the repository and use *findmnt*, *mount* or *lsblk* + to learn the device path (typically */dev/...*) of the drive. + Then, smartmontools can retrieve self-diagnostics of the drive in question:: + + # smartctl -a /dev/sdSomething + + The *Offline_Uncorrectable*, *Current_Pending_Sector* and *Reported_Uncorrect* + attributes indicate data corruption. A high *UDMA_CRC_Error_Count* usually + indicates a bad cable. + + I/O errors logged by the system (refer to the system journal or + dmesg) can point to issues as well. I/O errors only affecting the + file system easily go unnoticed, since they are not reported to + applications (e.g. Borg), while these errors can still corrupt data. + + Drives can corrupt some sectors in one event, while remaining + reliable otherwise. Conversely, drives can fail completely with no + advance warning. If in doubt, copy all data from the drive in + question to another drive -- just in case it fails completely. + + If any of these are suspicious, a self-test is recommended:: + + # smartctl -t long /dev/sdSomething + + Running ``fsck`` if not done already might yield further insights. + +Checking memory + Intermittent issues, such as ``borg check`` finding errors + inconsistently between runs, are frequently caused by bad memory. + + Run memtest86+ (or an equivalent memory tester) to verify that + the memory subsystem is operating correctly. + +Checking processors + Processors rarely cause errors. If they do, they are usually overclocked + or otherwise operated outside their specifications. We do not recommend to + operate hardware outside its specifications for productive use. + + Tools to verify correct processor operation include Prime95 (mprime), linpack, + and the `Intel Processor Diagnostic Tool + `_ + (applies only to Intel processors). + +.. rubric:: Repairing a damaged repository + +With any defective hardware found and replaced, the damage done to the repository +needs to be ascertained and fixed. + +:ref:`borg_check` provides diagnostics and ``--repair`` options for repositories with +issues. We recommend to first run without ``--repair`` to assess the situation. +If the found issues and proposed repairs seem right, re-run "check" with ``--repair`` enabled. + +Security +######## How can I specify the encryption passphrase programmatically? ------------------------------------------------------------- -The encryption passphrase can be specified programmatically using the -`BORG_PASSPHRASE` environment variable. This is convenient when setting up -automated encrypted backups. Another option is to use -key file based encryption with a blank passphrase. See -:ref:`encrypted_repos` for more details. +There are several ways to specify a passphrase without human intervention: -.. _password_env: -.. note:: Be careful how you set the environment; using the ``env`` +Setting ``BORG_PASSPHRASE`` + The passphrase can be specified using the ``BORG_PASSPHRASE`` enviroment variable. + This is often the simplest option, but can be insecure if the script that sets it + is world-readable. + + .. _password_env: + .. note:: Be careful how you set the environment; using the ``env`` command, a ``system()`` call or using inline shell scripts + (e.g. ``BORG_PASSPHRASE=hunter2 borg ...``) might expose the credentials in the process list directly and they will be readable to all users on a system. Using ``export`` in a shell script file should be safe, however, as @@ -114,6 +312,84 @@ key file based encryption with a blank passphrase. See user `_. +Using ``BORG_PASSCOMMAND`` with a properly permissioned file + Another option is to create a file with a password in it in your home + directory and use permissions to keep anyone else from reading it. For + example, first create a key:: + + head -c 1024 /dev/urandom | base64 > ~/.borg-passphrase + chmod 400 ~/.borg-passphrase + + Then in an automated script one can put:: + + export BORG_PASSCOMMAND="cat ~/.borg-passphrase" + + and Borg will automatically use that passphrase. + +Using keyfile-based encryption with a blank passphrase + It is possible to encrypt your repository in ``keyfile`` mode instead of the default + ``repokey`` mode and use a blank passphrase for the key file (simply press Enter twice + when ``borg init`` asks for the password). See :ref:`encrypted_repos` + for more details. + +Using ``BORG_PASSCOMMAND`` with macOS Keychain + macOS has a native manager for secrets (such as passphrases) which is safer + than just using a file as it is encrypted at rest and unlocked manually + (fortunately, the login keyring automatically unlocks when you login). With + the built-in ``security`` command, you can access it from the command line, + making it useful for ``BORG_PASSCOMMAND``. + + First generate a passphrase and use ``security`` to save it to your login + (default) keychain:: + + security add-generic-password -D secret -U -a $USER -s borg-passphrase -w $(head -c 1024 /dev/urandom | base64) + + In your backup script retrieve it in the ``BORG_PASSCOMMAND``:: + + export BORG_PASSCOMMAND="security find-generic-password -a $USER -s borg-passphrase -w" + +Using ``BORG_PASSCOMMAND`` with GNOME Keyring + GNOME also has a keyring daemon that can be used to store a Borg passphrase. + First ensure ``libsecret-tools``, ``gnome-keyring`` and ``libpam-gnome-keyring`` + are installed. If ``libpam-gnome-keyring`` wasn't already installed, ensure it + runs on login:: + + sudo sh -c "echo session optional pam_gnome_keyring.so auto_start >> /etc/pam.d/login" + sudo sh -c "echo password optional pam_gnome_keyring.so >> /etc/pam.d/passwd" + # you may need to relogin afterwards to activate the login keyring + + Then add a secret to the login keyring:: + + head -c 1024 /dev/urandom | base64 | secret-tool store borg-repository repo-name --label="Borg Passphrase" + + If a dialog box pops up prompting you to pick a password for a new keychain, use your + login password. If there is a checkbox for automatically unlocking on login, check it + to allow backups without any user intervention whatsoever. + + Once the secret is saved, retrieve it in a backup script using ``BORG_PASSCOMMAND``:: + + export BORG_PASSCOMMAND="secret-tool lookup borg-repository repo-name" + + .. note:: For this to automatically unlock the keychain it must be run + in the ``dbus`` session of an unlocked terminal; for example, running a backup + script as a ``cron`` job might not work unless you also ``export DISPLAY=:0`` + so ``secret-tool`` can pick up your open session. `It gets even more complicated`__ + when you are running the tool as a different user (e.g. running a backup as root + with the password stored in the user keyring). + +__ https://github.com/borgbackup/borg/pull/2837#discussion_r127641330 + +Using ``BORG_PASSCOMMAND`` with KWallet + KDE also has a keychain feature in the form of KWallet. The command-line tool + ``kwalletcli`` can be used to store and retrieve secrets. Ensure ``kwalletcli`` + is installed, generate a passphrase, and store it in your "wallet":: + + head -c 1024 /dev/urandom | base64 | kwalletcli -Pe borg-passphrase -f Passwords + + Once the secret is saved, retrieve it in a backup script using ``BORG_PASSCOMMAND``:: + + export BORG_PASSCOMMAND="kwalletcli -e borg-passphrase -f Passwords" + When backing up to remote encrypted repos, is encryption done locally? ---------------------------------------------------------------------- @@ -178,6 +454,16 @@ Thus: - have media at another place - have a relatively recent backup on your media +How do I report a security issue with Borg? +------------------------------------------- + +Send a private email to the :ref:`security contact ` +if you think you have discovered a security issue. +Please disclose security issues responsibly. + +Common issues +############# + Why do I get "connection closed by remote" after a while? --------------------------------------------------------- @@ -201,10 +487,12 @@ yet noticed on the server. Try these settings: ClientAliveCountMax 3 If you have multiple borg create ... ; borg create ... commands in a already -serialized way in a single script, you need to give them --lock-wait N (with N +serialized way in a single script, you need to give them ``--lock-wait N`` (with N being a bit more than the time the server needs to terminate broken down connections and release the lock). +.. _disable_archive_chunks: + The borg cache eats way too much disk space, what can I do? ----------------------------------------------------------- @@ -235,60 +523,13 @@ This has some pros and cons, though: The long term plan to improve this is called "borgception", see :issue:`474`. -If a backup stops mid-way, does the already-backed-up data stay there? ----------------------------------------------------------------------- +Can I backup my root partition (/) with Borg? +--------------------------------------------- -Yes, |project_name| supports resuming backups. - -During a backup a special checkpoint archive named ``.checkpoint`` -is saved every checkpoint interval (the default value for this is 5 -minutes) containing all the data backed-up until that point. - -Checkpoints only happen between files (so they don't help for interruptions -happening while a very large file is being processed). - -This checkpoint archive is a valid archive (all files in it are valid and complete), -but it is only a partial backup (not all files that you wanted to backup are -contained in it). Having it in the repo until a successful, full backup is -completed is useful because it references all the transmitted chunks up -to the checkpoint. This means that in case of an interruption, you only need to -retransfer the data since the last checkpoint. - -If a backup was interrupted, you do not need to do any special considerations, -just invoke ``borg create`` as you always do. You may use the same archive name -as in previous attempt or a different one (e.g. if you always include the current -datetime), it does not matter. - -|project_name| always does full single-pass backups, so it will start again -from the beginning - but it will be much faster, because some of the data was -already stored into the repo (and is still referenced by the checkpoint -archive), so it does not need to get transmitted and stored again. - -Once your backup has finished successfully, you can delete all -``.checkpoint`` archives. If you run ``borg prune``, it will -also care for deleting unneeded checkpoints. - -How can I backup huge file(s) over a unstable connection? ---------------------------------------------------------- - -You can use this "split trick" as a workaround for the in-between-files-only -checkpoints (see above), huge files and a instable connection to the repository: - -Split the huge file(s) into parts of manageable size (e.g. 100MB) and create -a temporary archive of them. Borg will create checkpoints now more frequently -than if you try to backup the files in their original form (e.g. 100GB). - -After that, you can remove the parts again and backup the huge file(s) in -their original form. This will now work a lot faster as a lot of content chunks -are already in the repository. - -After you have successfully backed up the huge original file(s), you can remove -the temporary archive you made from the parts. - -We realize that this is just a better-than-nothing workaround, see :issue:`1198` -for a potential solution. - -Please note that this workaround only helps you for backup, not for restore. +Backing up your entire root partition works just fine, but remember to +exclude directories that make no sense to backup, such as /dev, /proc, +/sys, /tmp and /run, and to use ``--one-file-system`` if you only want to +backup the root partition (and not any mounted devices e.g.). If it crashes with a UnicodeError, what can I do? ------------------------------------------------- @@ -309,39 +550,15 @@ If you run into that, try this: the parent directory (or even everything) - mount the repo using FUSE and use some file manager -Can |project_name| add redundancy to the backup data to deal with hardware malfunction? ---------------------------------------------------------------------------------------- - -No, it can't. While that at first sounds like a good idea to defend against -some defect HDD sectors or SSD flash blocks, dealing with this in a -reliable way needs a lot of low-level storage layout information and -control which we do not have (and also can't get, even if we wanted). - -So, if you need that, consider RAID or a filesystem that offers redundant -storage or just make backups to different locations / different hardware. - -See also :issue:`225`. - -Can |project_name| verify data integrity of a backup archive? -------------------------------------------------------------- - -Yes, if you want to detect accidental data damage (like bit rot), use the -``check`` operation. It will notice corruption using CRCs and hashes. -If you want to be able to detect malicious tampering also, use an encrypted -repo. It will then be able to check using CRCs and HMACs. - .. _a_status_oddity: -I am seeing 'A' (added) status for a unchanged file!? ------------------------------------------------------ +I am seeing 'A' (added) status for an unchanged file!? +------------------------------------------------------ The files cache is used to determine whether |project_name| already "knows" / has backed up a file and if so, to skip the file from -chunking. It does intentionally *not* contain files that: - -- have >= 10 as "entry age" (|project_name| has not seen this file for a while) -- have a modification time (mtime) same as the newest mtime in the created - archive +chunking. It does intentionally *not* contain files that have a modification +time (mtime) same as the newest mtime in the created archive. So, if you see an 'A' status for unchanged file(s), they are likely the files with the most recent mtime in that archive. @@ -368,23 +585,58 @@ those files are reported as being added when, really, chunks are already used. +.. _always_chunking: + +It always chunks all my files, even unchanged ones! +--------------------------------------------------- + +|project_name| maintains a files cache where it remembers the mtime, size and +inode of files. When |project_name| does a new backup and starts processing a +file, it first looks whether the file has changed (compared to the values +stored in the files cache). If the values are the same, the file is assumed +unchanged and thus its contents won't get chunked (again). + +|project_name| can't keep an infinite history of files of course, thus entries +in the files cache have a "maximum time to live" which is set via the +environment variable BORG_FILES_CACHE_TTL (and defaults to 20). +Every time you do a backup (on the same machine, using the same user), the +cache entries' ttl values of files that were not "seen" are incremented by 1 +and if they reach BORG_FILES_CACHE_TTL, the entry is removed from the cache. + +So, for example, if you do daily backups of 26 different data sets A, B, +C, ..., Z on one machine (using the default TTL), the files from A will be +already forgotten when you repeat the same backups on the next day and it +will be slow because it would chunk all the files each time. If you set +BORG_FILES_CACHE_TTL to at least 26 (or maybe even a small multiple of that), +it would be much faster. + +Another possible reason is that files don't always have the same path, for +example if you mount a filesystem without stable mount points for each backup or if you are running the backup from a filesystem snapshot whose name is not stable. +If the directory where you mount a filesystem is different every time, +|project_name| assume they are different files. + + Is there a way to limit bandwidth with |project_name|? ------------------------------------------------------ -There is no command line option to limit bandwidth with |project_name|, but -bandwidth limiting can be accomplished with pipeviewer_: +To limit upload (i.e. :ref:`borg_create`) bandwidth, use the +``--remote-ratelimit`` option. + +There is no built-in way to limit *download* +(i.e. :ref:`borg_extract`) bandwidth, but limiting download bandwidth +can be accomplished with pipeviewer_: Create a wrapper script: /usr/local/bin/pv-wrapper :: - #!/bin/bash + #!/bin/sh ## -q, --quiet do not output any transfer information at all ## -L, --rate-limit RATE limit transfer to RATE bytes per second - export RATE=307200 + RATE=307200 pv -q -L $RATE | "$@" Add BORG_RSH environment variable to use pipeviewer wrapper script with ssh. :: - export BORG_RSH='/usr/local/bin/pv-wrapper.sh ssh' + export BORG_RSH='/usr/local/bin/pv-wrapper ssh' Now |project_name| will be bandwidth limited. Nice thing about pv is that you can change rate-limit on the fly: :: @@ -413,6 +665,74 @@ If you can reproduce the issue with the proven filesystem, please file an issue in the |project_name| issue tracker about that. +Why does running 'borg check --repair' warn about data loss? +------------------------------------------------------------ + +Repair usually works for recovering data in a corrupted archive. However, +it's impossible to predict all modes of corruption. In some very rare +instances, such as malfunctioning storage hardware, additional repo +corruption may occur. If you can't afford to lose the repo, it's strongly +recommended that you perform repair on a copy of the repo. + +In other words, the warning is there to emphasize that |project_name|: + - Will perform automated routines that modify your backup repository + - Might not actually fix the problem you are experiencing + - Might, in very rare cases, further corrupt your repository + +In the case of malfunctioning hardware, such as a drive or USB hub +corrupting data when read or written, it's best to diagnose and fix the +cause of the initial corruption before attempting to repair the repo. If +the corruption is caused by a one time event such as a power outage, +running `borg check --repair` will fix most problems. + + +Why isn't there more progress / ETA information displayed? +---------------------------------------------------------- + +Some borg runs take quite a bit, so it would be nice to see a progress display, +maybe even including a ETA (expected time of "arrival" [here rather "completion"]). + +For some functionality, this can be done: if the total amount of work is more or +less known, we can display progress. So check if there is a ``--progress`` option. + +But sometimes, the total amount is unknown (e.g. for ``borg create`` we just do +a single pass over the filesystem, so we do not know the total file count or data +volume before reaching the end). Adding another pass just to determine that would +take additional time and could be incorrect, if the filesystem is changing. + +Even if the fs does not change and we knew count and size of all files, we still +could not compute the ``borg create`` ETA as we do not know the amount of changed +chunks, how the bandwidth of source and destination or system performance might +fluctuate. + +You see, trying to display ETA would be futile. The borg developers prefer to +rather not implement progress / ETA display than doing futile attempts. + +See also: https://xkcd.com/612/ + + +Miscellaneous +############# + +Requirements for the borg single-file binary, esp. (g)libc? +----------------------------------------------------------- + +We try to build the binary on old, but still supported systems - to keep the +minimum requirement for the (g)libc low. The (g)libc can't be bundled into +the binary as it needs to fit your kernel and OS, but Python and all other +required libraries will be bundled into the binary. + +If your system fulfills the minimum (g)libc requirement (see the README that +is released with the binary), there should be no problem. If you are slightly +below the required version, maybe just try. Due to the dynamic loading (or not +loading) of some shared libraries, it might still work depending on what +libraries are actually loaded and used. + +In the borg git repository, there is scripts/glibc_check.py that can determine +(based on the symbols' versions they want to link to) whether a set of given +(Linux) binaries works with a given glibc version. + + Why was Borg forked from Attic? ------------------------------- @@ -441,3 +761,80 @@ Borg intends to be: * do not break compatibility accidentally, without a good reason or without warning. allow compatibility breaking for other cases. * if major version number changes, it may have incompatible changes + +Migrating from Attic +#################### + +What are the differences between Attic and Borg? +------------------------------------------------ + +Borg is a fork of `Attic`_ and maintained by "`The Borg collective`_". + +.. _Attic: https://github.com/jborg/attic +.. _The Borg collective: https://borgbackup.readthedocs.org/en/latest/authors.html + +Here's a (incomplete) list of some major changes: + +* lots of attic issues fixed (see `issue #5 `_), + including critical data corruption bugs and security issues. +* more open, faster paced development (see `issue #1 `_) +* less chunk management overhead (less memory and disk usage for chunks index) +* faster remote cache resync (useful when backing up multiple machines into same repo) +* compression: no, lz4, zlib or lzma compression, adjustable compression levels +* repokey replaces problematic passphrase mode (you can't change the passphrase nor the pbkdf2 iteration count in "passphrase" mode) +* simple sparse file support, great for virtual machine disk files +* can read special files (e.g. block devices) or from stdin, write to stdout +* mkdir-based locking is more compatible than attic's posix locking +* uses fadvise to not spoil / blow up the fs cache +* better error messages / exception handling +* better logging, screen output, progress indication +* tested on misc. Linux systems, 32 and 64bit, FreeBSD, OpenBSD, NetBSD, macOS + +Please read the :ref:`changelog` (or ``docs/changes.rst`` in the source distribution) for more +information. + +Borg is not compatible with original Attic (but there is a one-way conversion). + +How do I migrate from Attic to Borg? +------------------------------------ + +Use :ref:`borg_upgrade`. This is a one-way process that cannot be reversed. + +There are some caveats: + +- The upgrade can only be performed on local repositories. + It cannot be performed on remote repositories. + +- If the repository is in "keyfile" encryption mode, the keyfile must + exist locally or it must be manually moved after performing the upgrade: + + 1. Locate the repository ID, contained in the ``config`` file in the repository. + 2. Locate the attic key file at ``~/.attic/keys/``. The correct key for the + repository starts with the line ``ATTIC_KEY ``. + 3. Copy the attic key file to ``~/.config/borg/keys/`` + 4. Change the first line from ``ATTIC_KEY ...`` to ``BORG_KEY ...``. + 5. Verify that the repository is now accessible (e.g. ``borg list ``). +- Attic and Borg use different :ref:`"chunker params" `. + This means that data added by Borg won't deduplicate with the existing data + stored by Attic. The effect is lessened if the files cache is used with Borg. +- Repositories in "passphrase" mode *must* be migrated to "repokey" mode using + :ref:`borg_key_migrate-to-repokey`. Borg does not support the "passphrase" mode + any other way. + +Why is my backup bigger than with attic? +---------------------------------------- + +Attic was rather unflexible when it comes to compression, it always +compressed using zlib level 6 (no way to switch compression off or +adjust the level or algorithm). + +The default in Borg is lz4, which is fast enough to not use significant CPU time +in most cases, but can only achieve modest compression. It still compresses +easily compressed data fairly well. + +zlib compression with all levels (1-9) as well as LZMA (1-6) are available +as well, for cases where they are worth it. + +Which choice is the best option depends on a number of factors, like +bandwidth to the repository, how well the data compresses, available CPU +power and so on. diff --git a/docs/global.rst.inc b/docs/global.rst.inc index d34f0965..b6b98ab7 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -15,6 +15,8 @@ .. _libacl: https://savannah.nongnu.org/projects/acl/ .. _libattr: https://savannah.nongnu.org/projects/attr/ .. _liblz4: https://github.com/Cyan4973/lz4 +.. _libb2: https://github.com/BLAKE2/libb2 +.. _ZeroMQ: http://zeromq.org/ .. _OpenSSL: https://www.openssl.org/ .. _`Python 3`: https://www.python.org/ .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash diff --git a/docs/index.rst b/docs/index.rst index 89a907de..96afc1f0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,11 +1,13 @@ .. include:: global.rst.inc - +.. highlight:: none Borg Documentation ================== .. include:: ../README.rst +.. when you add an element here, do not forget to add it to book.rst + .. toctree:: :maxdepth: 2 diff --git a/docs/installation.rst b/docs/installation.rst index 82a0cba6..561ce910 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -42,28 +42,25 @@ package which can be installed with the package manager. Distribution Source Command ============ ============================================= ======= Arch Linux `[community]`_ ``pacman -S borg`` -Debian `jessie-backports`_, `stretch`_, `sid`_ ``apt install borgbackup`` +Debian `Debian packages`_ ``apt install borgbackup`` Gentoo `ebuild`_ ``emerge borgbackup`` GNU Guix `GNU Guix`_ ``guix package --install borg`` -Fedora/RHEL `Fedora official repository`_, `EPEL`_ ``dnf install borgbackup`` +Fedora/RHEL `Fedora official repository`_ ``dnf install borgbackup`` FreeBSD `FreeBSD ports`_ ``cd /usr/ports/archivers/py-borgbackup && make install clean`` Mageia `cauldron`_ ``urpmi borgbackup`` NetBSD `pkgsrc`_ ``pkg_add py-borgbackup`` -NixOS `.nix file`_ N/A +NixOS `.nix file`_ ``nix-env -i borgbackup`` OpenBSD `OpenBSD ports`_ ``pkg_add borgbackup`` OpenIndiana `OpenIndiana hipster repository`_ ``pkg install borg`` -openSUSE `openSUSE official repository`_ ``zypper in python3-borgbackup`` +openSUSE `openSUSE official repository`_ ``zypper in borgbackup`` OS X `Brew cask`_ ``brew cask install borgbackup`` Raspbian `Raspbian testing`_ ``apt install borgbackup`` -Ubuntu `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbackup`` +Ubuntu `Ubuntu packages`_, `Ubuntu PPA`_ ``apt install borgbackup`` ============ ============================================= ======= .. _[community]: https://www.archlinux.org/packages/?name=borg -.. _jessie-backports: https://packages.debian.org/jessie-backports/borgbackup -.. _stretch: https://packages.debian.org/stretch/borgbackup -.. _sid: https://packages.debian.org/sid/borgbackup +.. _Debian packages: https://packages.debian.org/search?keywords=borgbackup&searchon=names&exact=1&suite=all§ion=all .. _Fedora official repository: https://apps.fedoraproject.org/packages/borgbackup -.. _EPEL: https://admin.fedoraproject.org/pkgdb/package/rpms/borgbackup/ .. _FreeBSD ports: http://www.freshports.org/archivers/py-borgbackup/ .. _ebuild: https://packages.gentoo.org/packages/app-backup/borgbackup .. _GNU Guix: https://www.gnu.org/software/guix/package-list.html#borg @@ -73,11 +70,10 @@ Ubuntu `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbac .. _OpenBSD ports: http://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/sysutils/borgbackup/ .. _OpenIndiana hipster repository: http://pkg.openindiana.org/hipster/en/search.shtml?token=borg&action=Search .. _openSUSE official repository: http://software.opensuse.org/package/borgbackup -.. _Brew cask: http://caskroom.io/ +.. _Brew cask: https://caskroom.github.io/ .. _Raspbian testing: http://archive.raspbian.org/raspbian/pool/main/b/borgbackup/ -.. _16.04: https://launchpad.net/ubuntu/xenial/+source/borgbackup -.. _15.10: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup -.. _14.04: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup +.. _Ubuntu packages: http://packages.ubuntu.com/xenial/borgbackup +.. _Ubuntu PPA: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup Please ask package maintainers to build a package or, if you can package / submit it yourself, please help us with that! See :issue:`105` on @@ -88,11 +84,14 @@ github to followup on packaging efforts. Standalone Binary ----------------- +.. note:: Releases are signed with an OpenPGP key, see + :ref:`security-contact` for more instructions. + |project_name| binaries (generated with `pyinstaller`_) are available on the releases_ page for the following platforms: -* **Linux**: glibc >= 2.13 (ok for most supported Linux releases). Maybe older - glibc versions also work, if they are compatible to 2.13. +* **Linux**: glibc >= 2.13 (ok for most supported Linux releases). + Older glibc releases are untested and may not work. * **Mac OS X**: 10.10 (does not work with older OS X releases) * **FreeBSD**: 10.2 (unknown whether it works for older releases) @@ -129,24 +128,35 @@ To install on Windows just extract the zip anywhere and add the bin directory to From Source ----------- +.. note:: + + Some older Linux systems (like RHEL/CentOS 5) and Python interpreter binaries + compiled to be able to run on such systems (like Python installed via Anaconda) + might miss functions required by Borg. + + This issue will be detected early and Borg will abort with a fatal error. + Dependencies ~~~~~~~~~~~~ To install |project_name| from a source package (including pip), you have to install the following dependencies first: -* `Python 3`_ >= 3.4.0, plus development headers. Even though Python 3 is not +* `Python 3`_ >= 3.5.0, plus development headers. Even though Python 3 is not the default Python version on most systems, it is usually available as an optional install. * OpenSSL_ >= 1.0.0, plus development headers. -* libacl_ (that pulls in libattr_ also), both plus development headers. +* libacl_ (which depends on libattr_), both plus development headers. * liblz4_, plus development headers. +* ZeroMQ_ >= 4.0.0, plus development headers. * some Python dependencies, pip will automatically install them for you * optionally, the llfuse_ Python package is required if you wish to mount an archive as a FUSE filesystem. See setup.py about the version requirements. +* optionally libb2_. If it is not found a bundled implementation is used instead. If you have troubles finding the right package names, have a look at the -distribution specific sections below and also at the Vagrantfile in our repo. +distribution specific sections below or the Vagrantfile in the git repository, +which contains installation scripts for a number of operating systems. In the following, the steps needed to install the dependencies are listed for a selection of platforms. If your distribution is not covered by these @@ -166,6 +176,7 @@ Install the dependencies with development headers:: libssl-dev openssl \ libacl1-dev libacl1 \ liblz4-dev liblz4-1 \ + libzmq3-dev libzmq3 \ build-essential sudo apt-get install libfuse-dev fuse pkg-config # optional, for FUSE support @@ -176,6 +187,8 @@ group, log out and log in again. Fedora / Korora +++++++++++++++ +.. todo:: Add zeromq, use python 3.5 or 3.6 + Install the dependencies with development headers:: sudo dnf install python3 python3-devel python3-pip python3-virtualenv @@ -186,14 +199,33 @@ Install the dependencies with development headers:: sudo dnf install redhat-rpm-config # not needed in Korora sudo dnf install fuse-devel fuse pkgconfig # optional, for FUSE support +openSUSE Tumbleweed / Leap +++++++++++++++++++++++++++ + +.. todo:: Add zeromq, use python 3.5 or 3.6 + +Install the dependencies automatically using zypper:: + + sudo zypper source-install --build-deps-only borgbackup + +Alternatively, you can enumerate all build dependencies in the command line:: + + sudo zypper install python3 python3-devel \ + libacl-devel liblz4-devel openssl-devel \ + python3-Cython python3-Sphinx python3-msgpack-python \ + python3-pytest python3-setuptools python3-setuptools_scm \ + python3-sphinx_rtd_theme python3-llfuse gcc gcc-c++ Mac OS X ++++++++ +.. todo:: Add zeromq, use python 3.5 or 3.6 + Assuming you have installed homebrew_, the following steps will install all the dependencies:: brew install python3 lz4 openssl + brew install pkg-config # optional, for FUSE support pip3 install virtualenv For FUSE support to mount the backup archives, you need at least version 3.0 of @@ -204,8 +236,11 @@ FUSE for OS X, which is available as a pre-release_. FreeBSD ++++++++ -Listed below are packages you will need to install |project_name|, its dependencies, -and commands to make fuse work for using the mount command. + +.. todo:: Add zeromq, use python 3.5 or 3.6 + +Listed below are packages you will need to install Borg, its dependencies, +and commands to make FUSE work for using the mount command. :: @@ -217,7 +252,16 @@ and commands to make fuse work for using the mount command. echo 'vfs.usermount=1' >> /etc/sysctl.conf kldload fuse sysctl vfs.usermount=1 - + + +Windows 10's Linux Subsystem +++++++++++++++++++++++++++++ + +.. note:: + Running under Windows 10's Linux Subsystem is experimental and has not been tested much yet. + +Just follow the Ubuntu Linux installation steps. You can omit the FUSE stuff, it won't work anyway. + Windows +++++++ @@ -232,11 +276,13 @@ Cygwin .. note:: Running under Cygwin is experimental and has only been tested with Cygwin - (x86-64) v2.5.2. + (x86-64) v2.5.2. Remote repositories are known broken, local repositories should work. + +.. todo:: Add zeromq, use python 3.5 or 3.6 Use the Cygwin installer to install the dependencies:: - python3 python3-setuptools + python3 python3-devel python3-setuptools binutils gcc-g++ libopenssl openssl-devel liblz4_1 liblz4-devel @@ -293,9 +339,9 @@ While we try not to break master, there are no guarantees on anything. :: source borg-env/bin/activate # always before using! # install borg + dependencies into virtualenv - pip install sphinx # optional, to build the docs cd borg pip install -r requirements.d/development.txt + pip install -r requirements.d/docs.txt # optional, to build the docs pip install -r requirements.d/fuse.txt # optional, for FUSE support pip install -e . # in-place editable mode diff --git a/docs/internals.rst b/docs/internals.rst index 61d84589..786125d0 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -1,451 +1,45 @@ .. include:: global.rst.inc -.. highlight:: none .. _internals: Internals ========= -This page documents the internal data structures and storage -mechanisms of |project_name|. It is partly based on `mailing list -discussion about internals`_ and also on static code analysis. - - -Repository and Archives ------------------------ - -|project_name| stores its data in a `Repository`. Each repository can -hold multiple `Archives`, which represent individual backups that -contain a full archive of the files specified when the backup was -performed. Deduplication is performed across multiple backups, both on -data and metadata, using `Chunks` created by the chunker using the Buzhash_ -algorithm. - -Each repository has the following file structure: - -README - simple text file telling that this is a |project_name| repository - -config - repository configuration - -data/ - directory where the actual data is stored - -hints.%d - hints for repository compaction - -index.%d - repository index - -lock.roster and lock.exclusive/* - used by the locking system to manage shared and exclusive locks - - -Lock files ----------- - -|project_name| uses locks to get (exclusive or shared) access to the cache and -the repository. - -The locking system is based on creating a directory `lock.exclusive` (for -exclusive locks). Inside the lock directory, there is a file indication -hostname, process id and thread id of the lock holder. - -There is also a json file `lock.roster` that keeps a directory of all shared -and exclusive lockers. - -If the process can create the `lock.exclusive` directory for a resource, it has -the lock for it. If creation fails (because the directory has already been -created by some other process), lock acquisition fails. - -The cache lock is usually in `~/.cache/borg/REPOID/lock.*`. -The repository lock is in `repository/lock.*`. - -In case you run into troubles with the locks, you can use the ``borg break-lock`` -command after you first have made sure that no |project_name| process is -running on any machine that accesses this resource. Be very careful, the cache -or repository might get damaged if multiple processes use it at the same time. - - -Config file ------------ - -Each repository has a ``config`` file which which is a ``INI``-style file -and looks like this:: - - [repository] - version = 1 - segments_per_dir = 10000 - max_segment_size = 5242880 - id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6 - -This is where the ``repository.id`` is stored. It is a unique -identifier for repositories. It will not change if you move the -repository around so you can make a local transfer then decide to move -the repository to another (even remote) location at a later time. - - -Keys ----- -The key to address the key/value store is usually computed like this: - -key = id = id_hash(unencrypted_data) - -The id_hash function is: - -* sha256 (no encryption keys available) -* hmac-sha256 (encryption keys available) - - -Segments and archives ---------------------- - -A |project_name| repository is a filesystem based transactional key/value -store. It makes extensive use of msgpack_ to store data and, unless -otherwise noted, data is stored in msgpack_ encoded files. - -Objects referenced by a key are stored inline in files (`segments`) of approx. -5MB size in numbered subdirectories of ``repo/data``. - -They contain: - -* header size -* crc -* size -* tag -* key -* data - -Segments are built locally, and then uploaded. Those files are -strictly append-only and modified only once. - -Tag is either ``PUT``, ``DELETE``, or ``COMMIT``. A segment file is -basically a transaction log where each repository operation is -appended to the file. So if an object is written to the repository a -``PUT`` tag is written to the file followed by the object id and -data. If an object is deleted a ``DELETE`` tag is appended -followed by the object id. A ``COMMIT`` tag is written when a -repository transaction is committed. When a repository is opened any -``PUT`` or ``DELETE`` operations not followed by a ``COMMIT`` tag are -discarded since they are part of a partial/uncommitted transaction. - - -The manifest ------------- - -The manifest is an object with an all-zero key that references all the -archives. -It contains: - -* version -* list of archive infos -* timestamp -* config - -Each archive info contains: - -* name -* id -* time - -It is the last object stored, in the last segment, and is replaced -each time. - -The Archive ------------ - -The archive metadata does not contain the file items directly. Only -references to other objects that contain that data. An archive is an -object that contains: - -* version -* name -* list of chunks containing item metadata -* cmdline -* hostname -* username -* time - - -The Item --------- - -Each item represents a file, directory or other fs item and is stored as an -``item`` dictionary that contains: - -* path -* list of data chunks -* user -* group -* uid -* gid -* mode (item type + permissions) -* source (for links) -* rdev (for devices) -* mtime, atime, ctime in nanoseconds -* xattrs -* acl -* bsdfiles - -All items are serialized using msgpack and the resulting byte stream -is fed into the same chunker algorithm as used for regular file data -and turned into deduplicated chunks. The reference to these chunks is then added -to the archive metadata. To achieve a finer granularity on this metadata -stream, we use different chunker params for this chunker, which result in -smaller chunks. - -A chunk is stored as an object as well, of course. - -.. _chunker_details: - -Chunks ------- - -The |project_name| chunker uses a rolling hash computed by the Buzhash_ algorithm. -It triggers (chunks) when the last HASH_MASK_BITS bits of the hash are zero, -producing chunks of 2^HASH_MASK_BITS Bytes on average. - -``borg create --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE`` -can be used to tune the chunker parameters, the default is: - -- CHUNK_MIN_EXP = 19 (minimum chunk size = 2^19 B = 512 kiB) -- CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB) -- HASH_MASK_BITS = 21 (statistical medium chunk size ~= 2^21 B = 2 MiB) -- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) - -The buzhash table is altered by XORing it with a seed randomly generated once -for the archive, and stored encrypted in the keyfile. This is to prevent chunk -size based fingerprinting attacks on your encrypted repo contents (to guess -what files you have based on a specific set of chunk sizes). - -For some more general usage hints see also ``--chunker-params``. - - -Indexes / Caches ----------------- - -The **files cache** is stored in ``cache/files`` and is indexed on the -``file path hash``. At backup time, it is used to quickly determine whether we -need to chunk a given file (or whether it is unchanged and we already have all -its pieces). -It contains: - -* age -* file inode number -* file size -* file mtime_ns -* file content chunk hashes - -The inode number is stored to make sure we distinguish between -different files, as a single path may not be unique across different -archives in different setups. - -The files cache is stored as a python associative array storing -python objects, which generates a lot of overhead. - -The **chunks cache** is stored in ``cache/chunks`` and is indexed on the -``chunk id_hash``. It is used to determine whether we already have a specific -chunk, to count references to it and also for statistics. -It contains: - -* reference count -* size -* encrypted/compressed size - -The **repository index** is stored in ``repo/index.%d`` and is indexed on the -``chunk id_hash``. It is used to determine a chunk's location in the repository. -It contains: - -* segment (that contains the chunk) -* offset (where the chunk is located in the segment) - -The repository index file is random access. - -Hints are stored in a file (``repo/hints.%d``). -It contains: - -* version -* list of segments -* compact - -hints and index can be recreated if damaged or lost using ``check --repair``. - -The chunks cache and the repository index are stored as hash tables, with -only one slot per bucket, but that spreads the collisions to the following -buckets. As a consequence the hash is just a start position for a linear -search, and if the element is not in the table the index is linearly crossed -until an empty bucket is found. - -When the hash table is filled to 75%, its size is grown. When it's -emptied to 25%, its size is shrinked. So operations on it have a variable -complexity between constant and linear with low factor, and memory overhead -varies between 33% and 300%. - -.. _cache-memory-usage: - -Indexes / Caches memory usage ------------------------------ - -Here is the estimated memory usage of |project_name|: - - chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS - - repo_index_usage = chunk_count * 40 - - chunks_cache_usage = chunk_count * 44 - - files_cache_usage = total_file_count * 240 + chunk_count * 80 - - mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage - = chunk_count * 164 + total_file_count * 240 - -All units are Bytes. - -It is assuming every chunk is referenced exactly once (if you have a lot of -duplicate chunks, you will have less chunks than estimated above). - -It is also assuming that typical chunk size is 2^HASH_MASK_BITS (if you have -a lot of files smaller than this statistical medium chunk size, you will have -more chunks than estimated above, because 1 file is at least 1 chunk). - -If a remote repository is used the repo index will be allocated on the remote side. - -E.g. backing up a total count of 1 Mi (IEC binary prefix e.g. 2^20) files with a total size of 1TiB. - -a) with ``create --chunker-params 10,23,16,4095`` (custom, like borg < 1.0 or attic): - - mem_usage = 2.8GiB - -b) with ``create --chunker-params 19,23,21,4095`` (default): - - mem_usage = 0.31GiB - -.. note:: There is also the ``--no-files-cache`` option to switch off the files cache. - You'll save some memory, but it will need to read / chunk all the files as - it can not skip unmodified files then. - -Encryption ----------- - -AES_-256 is used in CTR mode (so no need for padding). A 64bit initialization -vector is used, a `HMAC-SHA256`_ is computed on the encrypted chunk with a -random 64bit nonce and both are stored in the chunk. -The header of each chunk is: ``TYPE(1)`` + ``HMAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``. -Encryption and HMAC use two different keys. - -In AES CTR mode you can think of the IV as the start value for the counter. -The counter itself is incremented by one after each 16 byte block. -The IV/counter is not required to be random but it must NEVER be reused. -So to accomplish this |project_name| initializes the encryption counter to be -higher than any previously used counter value before encrypting new data. - -To reduce payload size, only 8 bytes of the 16 bytes nonce is saved in the -payload, the first 8 bytes are always zeros. This does not affect security but -limits the maximum repository capacity to only 295 exabytes (2**64 * 16 bytes). - -Encryption keys (and other secrets) are kept either in a key file on the client -('keyfile' mode) or in the repository config on the server ('repokey' mode). -In both cases, the secrets are generated from random and then encrypted by a -key derived from your passphrase (this happens on the client before the key -is stored into the keyfile or as repokey). - -The passphrase is passed through the ``BORG_PASSPHRASE`` environment variable -or prompted for interactive usage. - - -Key files ---------- - -When initialized with the ``init -e keyfile`` command, |project_name| -needs an associated file in ``$HOME/.config/borg/keys`` to read and write -the repository. The format is based on msgpack_, base64 encoding and -PBKDF2_ SHA256 hashing, which is then encoded again in a msgpack_. - -The internal data structure is as follows: - -version - currently always an integer, 1 - -repository_id - the ``id`` field in the ``config`` ``INI`` file of the repository. - -enc_key - the key used to encrypt data with AES (256 bits) - -enc_hmac_key - the key used to HMAC the encrypted data (256 bits) - -id_key - the key used to HMAC the plaintext chunk data to compute the chunk's id - -chunk_seed - the seed for the buzhash chunking table (signed 32 bit integer) - -Those fields are processed using msgpack_. The utf-8 encoded passphrase -is processed with PBKDF2_ (SHA256_, 100000 iterations, random 256 bit salt) -to give us a derived key. The derived key is 256 bits long. -A `HMAC-SHA256`_ checksum of the above fields is generated with the derived -key, then the derived key is also used to encrypt the above pack of fields. -Then the result is stored in a another msgpack_ formatted as follows: - -version - currently always an integer, 1 - -salt - random 256 bits salt used to process the passphrase - -iterations - number of iterations used to process the passphrase (currently 100000) - -algorithm - the hashing algorithm used to process the passphrase and do the HMAC - checksum (currently the string ``sha256``) - -hash - the HMAC of the encrypted derived key - -data - the derived key, encrypted with AES over a PBKDF2_ SHA256 key - described above - -The resulting msgpack_ is then encoded using base64 and written to the -key file, wrapped using the standard ``textwrap`` module with a header. -The header is a single line with a MAGIC string, a space and a hexadecimal -representation of the repository id. - - -Compression ------------ - -|project_name| supports the following compression methods: - -- none (no compression, pass through data 1:1) -- lz4 (low compression, but super fast) -- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead], - level 1 is low, level 9 is high compression) -- lzma (level 0-9, level 0 is low, level 9 is high compression). - -Speed: none > lz4 > zlib > lzma -Compression: lzma > zlib > lz4 > none - -Be careful, higher zlib and especially lzma compression levels might take a -lot of resources (CPU and memory). - -The overall speed of course also depends on the speed of your target storage. -If that is slow, using a higher compression level might yield better overall -performance. You need to experiment a bit. Maybe just watch your CPU load, if -that is relatively low, increase compression until 1 core is 70-100% loaded. - -Even if your target storage is rather fast, you might see interesting effects: -while doing no compression at all (none) is a operation that takes no time, it -likely will need to store more data to the storage compared to using lz4. -The time needed to transfer and store the additional data might be much more -than if you had used lz4 (which is super fast, but still might compress your -data about 2:1). This is assuming your data is compressible (if you backup -already compressed data, trying to compress them at backup time is usually -pointless). - -Compression is applied after deduplication, thus using different compression -methods in one repo does not influence deduplication. - -See ``borg create --help`` about how to specify the compression level and its default. +The internals chapter describes and analyses most of the inner workings +of Borg. + +Borg uses a low-level, key-value store, the :ref:`repository`, and +implements a more complex data structure on top of it, which is made +up of the :ref:`manifest `, :ref:`archives `, +:ref:`items ` and data :ref:`chunks`. + +Each repository can hold multiple :ref:`archives `, which +represent individual backups that contain a full archive of the files +specified when the backup was performed. + +Deduplication is performed globally across all data in the repository +(multiple backups and even multiple hosts), both on data and file +metadata, using :ref:`chunks` created by the chunker using the +Buzhash_ algorithm. + +To actually perform the repository-wide deduplication, a hash of each +chunk is checked against the :ref:`chunks cache `, which is a +hash-table of all chunks that already exist. + +.. figure:: internals/structure.png + :figwidth: 100% + :width: 100% + + Layers in Borg. On the very top commands are implemented, using + a data access layer provided by the Archive and Item classes. + The "key" object provides both compression and authenticated + encryption used by the data access layer. The "key" object represents + the sole trust boundary in Borg. + The lowest layer is the repository, either accessed directly + (Repository) or remotely (RemoteRepository). + +.. toctree:: + :caption: Internals contents + + internals/security + internals/data-structures + internals/frontends diff --git a/docs/internals/compaction.png b/docs/internals/compaction.png new file mode 100644 index 00000000..927ae0b2 Binary files /dev/null and b/docs/internals/compaction.png differ diff --git a/docs/internals/compaction.vsd b/docs/internals/compaction.vsd new file mode 100644 index 00000000..73cc0b06 Binary files /dev/null and b/docs/internals/compaction.vsd differ diff --git a/docs/internals/data-structures.rst b/docs/internals/data-structures.rst new file mode 100644 index 00000000..7c6d46b6 --- /dev/null +++ b/docs/internals/data-structures.rst @@ -0,0 +1,1153 @@ +.. include:: ../global.rst.inc +.. highlight:: none + +.. _data-structures: + +Data structures and file formats +================================ + +This page documents the internal data structures and storage +mechanisms of Borg. It is partly based on `mailing list +discussion about internals`_ and also on static code analysis. + +.. todo:: Clarify terms, perhaps create a glossary. + ID (client?) vs. key (repository?), + chunks (blob of data in repo?) vs. object (blob of data in repo, referred to from another object?), + +.. _repository: + +Repository +---------- + +.. Some parts of this description were taken from the Repository docstring + +Borg stores its data in a `Repository`, which is a file system based +transactional key-value store. Thus the repository does not know about +the concept of archives or items. + +Each repository has the following file structure: + +README + simple text file telling that this is a |project_name| repository + +config + repository configuration + +data/ + directory where the actual data is stored + +hints.%d + hints for repository compaction + +index.%d + repository index + +lock.roster and lock.exclusive/* + used by the locking system to manage shared and exclusive locks + +Transactionality is achieved by using a log (aka journal) to record changes. The log is a series of numbered files +called segments_. Each segment is a series of log entries. The segment number together with the offset of each +entry relative to its segment start establishes an ordering of the log entries. This is the "definition" of +time for the purposes of the log. + +.. _config-file: + +Config file +~~~~~~~~~~~ + +Each repository has a ``config`` file which which is a ``INI``-style file +and looks like this:: + + [repository] + version = 1 + segments_per_dir = 1000 + max_segment_size = 524288000 + id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6 + +This is where the ``repository.id`` is stored. It is a unique +identifier for repositories. It will not change if you move the +repository around so you can make a local transfer then decide to move +the repository to another (even remote) location at a later time. + +Keys +~~~~ + +Repository keys are byte-strings of fixed length (32 bytes), they +don't have a particular meaning (except for the Manifest_). + +Normally the keys are computed like this:: + + key = id = id_hash(unencrypted_data) + +The id_hash function depends on the :ref:`encryption mode `. + +As the id / key is used for deduplication, id_hash must be a cryptographically +strong hash or MAC. + +Segments +~~~~~~~~ + +Objects referenced by a key are stored inline in files (`segments`) of approx. +500 MB size in numbered subdirectories of ``repo/data``. The number of segments +per directory is controlled by the value of ``segments_per_dir``. If you change +this value in a non-empty repository, you may also need to relocate the segment +files manually. + +A segment starts with a magic number (``BORG_SEG`` as an eight byte ASCII string), +followed by a number of log entries. Each log entry consists of: + +* 32-bit size of the entry +* CRC32 of the entire entry (for a PUT this includes the data) +* entry tag: PUT, DELETE or COMMIT +* PUT and DELETE follow this with the 32 byte key +* PUT follow the key with the data + +Those files are strictly append-only and modified only once. + +Tag is either ``PUT``, ``DELETE``, or ``COMMIT``. + +When an object is written to the repository a ``PUT`` entry is written +to the file containing the object id and data. If an object is deleted +a ``DELETE`` entry is appended with the object id. + +A ``COMMIT`` tag is written when a repository transaction is +committed. The segment number of the segment containing +a commit is the **transaction ID**. + +When a repository is opened any ``PUT`` or ``DELETE`` operations not +followed by a ``COMMIT`` tag are discarded since they are part of a +partial/uncommitted transaction. + +The size of individual segments is limited to 4 GiB, since the offset of entries +within segments is stored in a 32-bit unsigned integer in the repository index. + +Index, hints and integrity +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The **repository index** is stored in ``index.`` and is used to +determine an object's location in the repository. It is a HashIndex_, +a hash table using open addressing. It maps object keys_ to two +unsigned 32-bit integers; the first integer gives the segment number, +the second indicates the offset of the object's entry within the segment. + +The **hints file** is a msgpacked file named ``hints.``. +It contains: + +* version +* list of segments +* compact + +The **integrity file** is a msgpacked file named ``integrity.``. +It contains checksums of the index and hints files and is described in the +:ref:`Checksumming data structures ` section below. + +If the index or hints are corrupted, they are re-generated automatically. +If they are outdated, segments are replayed from the index state to the currently +committed transaction. + +Compaction +~~~~~~~~~~ + +For a given key only the last entry regarding the key, which is called current (all other entries are called +superseded), is relevant: If there is no entry or the last entry is a DELETE then the key does not exist. +Otherwise the last PUT defines the value of the key. + +By superseding a PUT (with either another PUT or a DELETE) the log entry becomes obsolete. A segment containing +such obsolete entries is called sparse, while a segment containing no such entries is called compact. + +Since writing a ``DELETE`` tag does not actually delete any data and +thus does not free disk space any log-based data store will need a +compaction strategy (somewhat analogous to a garbage collector). +Borg uses a simple forward compacting algorithm, +which avoids modifying existing segments. +Compaction runs when a commit is issued (unless the :ref:`append_only_mode` is active). +One client transaction can manifest as multiple physical transactions, +since compaction is transacted, too, and Borg does not distinguish between the two:: + + Perspective| Time --> + -----------+-------------- + Client | Begin transaction - Modify Data - Commit | (done) + Repository | Begin transaction - Modify Data - Commit | Compact segments - Commit | (done) + +The compaction algorithm requires two inputs in addition to the segments themselves: + +(i) Which segments are sparse, to avoid scanning all segments (impractical). + Further, Borg uses a conditional compaction strategy: Only those + segments that exceed a threshold sparsity are compacted. + + To implement the threshold condition efficiently, the sparsity has + to be stored as well. Therefore, Borg stores a mapping ``(segment + id,) -> (number of sparse bytes,)``. + + The 1.0.x series used a simpler non-conditional algorithm, + which only required the list of sparse segments. Thus, + it only stored a list, not the mapping described above. +(ii) Each segment's reference count, which indicates how many live objects are in a segment. + This is not strictly required to perform the algorithm. Rather, it is used to validate + that a segment is unused before deleting it. If the algorithm is incorrect, or the reference + count was not accounted correctly, then an assertion failure occurs. + +These two pieces of information are stored in the hints file (`hints.N`) +next to the index (`index.N`). + +When loading a hints file, Borg checks the version contained in the file. +The 1.0.x series writes version 1 of the format (with the segments list instead +of the mapping, mentioned above). Since Borg 1.0.4, version 2 is read as well. +The 1.1.x series writes version 2 of the format and reads either version. +When reading a version 1 hints file, Borg 1.1.x will +read all sparse segments to determine their sparsity. + +This process may take some time if a repository is kept in the append-only mode, +which causes the number of sparse segments to grow. Repositories not in append-only +mode have no sparse segments in 1.0.x, since compaction is unconditional. + +Compaction processes sparse segments from oldest to newest; sparse segments +which don't contain enough deleted data to justify compaction are skipped. This +avoids doing e.g. 500 MB of writing current data to a new segment when only +a couple kB were deleted in a segment. + +Segments that are compacted are read in entirety. Current entries are written to +a new segment, while superseded entries are omitted. After each segment an intermediary +commit is written to the new segment. Then, the old segment is deleted +(asserting that the reference count diminished to zero), freeing disk space. + +A simplified example (excluding conditional compaction and with simpler +commit logic) showing the principal operation of compaction: + +.. figure:: compaction.png + :figwidth: 100% + :width: 100% + +(The actual algorithm is more complex to avoid various consistency issues, refer to +the ``borg.repository`` module for more comments and documentation on these issues.) + +.. _internals_storage_quota: + +Storage quotas +~~~~~~~~~~~~~~ + +Quotas are implemented at the Repository level. The active quota of a repository +is determined by the ``storage_quota`` `config` entry or a run-time override (via :ref:`borg_serve`). +The currently used quota is stored in the hints file. Operations (PUT and DELETE) during +a transaction modify the currently used quota: + +- A PUT adds the size of the *log entry* to the quota, + i.e. the length of the data plus the 41 byte header. +- A DELETE subtracts the size of the deleted log entry from the quota, + which includes the header. + +Thus, PUT and DELETE are symmetric and cancel each other out precisely. + +The quota does not track on-disk size overheads (due to conditional compaction +or append-only mode). In normal operation the inclusion of the log entry headers +in the quota act as a faithful proxy for index and hints overheads. + +By tracking effective content size, the client can *always* recover from a full quota +by deleting archives. This would not be possible if the quota tracked on-disk size, +since journaling DELETEs requires extra disk space before space is freed. +Tracking effective size on the other hand accounts DELETEs immediately as freeing quota. + +.. rubric:: Enforcing the quota + +The storage quota is meant as a robust mechanism for service providers, therefore +:ref:`borg_serve` has to enforce it without loopholes (e.g. modified clients). +The following sections refer to using quotas on remotely accessed repositories. +For local access, consider *client* and *serve* the same. +Accordingly, quotas cannot be enforced with local access, +since the quota can be changed in the repository config. + +The quota is enforcible only if *all* :ref:`borg_serve` versions +accessible to clients support quotas (see next section). Further, quota is +per repository. Therefore, ensure clients can only access a defined set of repositories +with their quotas set, using ``--restrict-to-repository``. + +If the client exceeds the storage quota the ``StorageQuotaExceeded`` exception is +raised. Normally a client could ignore such an exception and just send a ``commit()`` +command anyway, circumventing the quota. However, when ``StorageQuotaExceeded`` is raised, +it is stored in the ``transaction_doomed`` attribute of the repository. +If the transaction is doomed, then commit will re-raise this exception, aborting the commit. + +The transaction_doomed indicator is reset on a rollback (which erases the quota-exceeding +state). + +.. rubric:: Compatibility with older servers and enabling quota after-the-fact + +If no quota data is stored in the hints file, Borg assumes zero quota is used. +Thus, if a repository with an enabled quota is written to with an older ``borg serve`` +version that does not understand quotas, then the quota usage will be erased. + +The client version is irrelevant to the storage quota and has no part in it. +The form of error messages due to exceeding quota varies with client versions. + +A similar situation arises when upgrading from a Borg release that did not have quotas. +Borg will start tracking quota use from the time of the upgrade, starting at zero. + +If the quota shall be enforced accurately in these cases, either + +- delete the ``index.N`` and ``hints.N`` files, forcing Borg to rebuild both, + re-acquiring quota data in the process, or +- edit the msgpacked ``hints.N`` file (not recommended and thus not + documented further). + +The object graph +---------------- + +On top of the simple key-value store offered by the Repository_, +Borg builds a much more sophisticated data structure that is essentially +a completely encrypted object graph. Objects, such as archives_, are referenced +by their chunk ID, which is cryptographically derived from their contents. +More on how this helps security in :ref:`security_structural_auth`. + +.. figure:: object-graph.png + :figwidth: 100% + :width: 100% + +.. _manifest: + +The manifest +~~~~~~~~~~~~ + +The manifest is the root of the object hierarchy. It references +all archives in a repository, and thus all data in it. +Since no object references it, it cannot be stored under its ID key. +Instead, the manifest has a fixed all-zero key. + +The manifest is rewritten each time an archive is created, deleted, +or modified. It looks like this: + +.. code-block:: python + + { + b'version': 1, + b'timestamp': b'2017-05-05T12:42:23.042864', + b'item_keys': [b'acl_access', b'acl_default', ...], + b'config': {}, + b'archives': { + b'2017-05-05-system-backup': { + b'id': b'<32 byte binary object ID>', + b'time': b'2017-05-05T12:42:22.942864', + }, + }, + b'tam': ..., + } + +The *version* field can be either 1 or 2. The versions differ in the +way feature flags are handled, described below. + +The *timestamp* field is used to avoid logical replay attacks where +the server just resets the repository to a previous state. + +*item_keys* is a list containing all Item_ keys that may be encountered in +the repository. It is used by *borg check*, which verifies that all keys +in all items are a subset of these keys. Thus, an older version of *borg check* +supporting this mechanism can correctly detect keys introduced in later versions. + +The *tam* key is part of the :ref:`tertiary authentication mechanism ` +(formerly known as "tertiary authentication for metadata") and authenticates +the manifest, since an ID check is not possible. + +*config* is a general-purpose location for additional metadata. All versions +of Borg preserve its contents (it may have been a better place for *item_keys*, +which is not preserved by unaware Borg versions, releases predating 1.0.4). + +Feature flags ++++++++++++++ + +Feature flags are used to add features to data structures without causing +corruption if older versions are used to access or modify them. The main issues +to consider for a feature flag oriented design are flag granularity, +flag storage, and cache_ invalidation. + +Feature flags are divided in approximately three categories, detailed below. +Due to the nature of ID-based deduplication, write (i.e. creating archives) and +read access are not symmetric; it is possible to create archives referencing +chunks that are not readable with the current feature set. The third +category are operations that require accurate reference counts, for example +archive deletion and check. + +As the manifest is always updated and always read, it is the ideal place to store +feature flags, comparable to the super-block of a file system. The only problem +is to recover from a lost manifest, i.e. how is it possible to detect which feature +flags are enabled, if there is no manifest to tell. This issue is left open at this time, +but is not expected to be a major hurdle; it doesn't have to be handled efficiently, it just +needs to be handled. + +Lastly, cache_ invalidation is handled by noting which feature +flags were and which were not understood while manipulating a cache. +This allows to detect whether the cache needs to be invalidated, +i.e. rebuilt from scratch. See `Cache feature flags`_ below. + +The *config* key stores the feature flags enabled on a repository: + +.. code-block:: python + + config = { + b'feature_flags': { + b'read': { + b'mandatory': [b'some_feature'], + }, + b'check': { + b'mandatory': [b'other_feature'], + } + b'write': ..., + b'delete': ... + }, + } + +The top-level distinction for feature flags is the operation the client intends +to perform, + +| the *read* operation includes extraction and listing of archives, +| the *write* operation includes creating new archives, +| the *delete* (archives) operation, +| the *check* operation requires full understanding of everything in the repository. +| + +These are weakly set-ordered; *check* will include everything required for *delete*, +*delete* will likely include *write* and *read*. However, *read* may require more +features than *write* (due to ID-based deduplication, *write* does not necessarily +require reading/understanding repository contents). + +Each operation can contain several sets of feature flags. Only one set, +the *mandatory* set is currently defined. + +Upon reading the manifest, the Borg client has already determined which operation +should be performed. If feature flags are found in the manifest, the set +of feature flags supported by the client is compared to the mandatory set +found in the manifest. If any unsupported flags are found (i.e. the mandatory set is +not a subset of the features supported by the Borg client used), the operation +is aborted with a *MandatoryFeatureUnsupported* error: + + Unsupported repository feature(s) {'some_feature'}. A newer version of borg is required to access this repository. + +Older Borg releases do not have this concept and do not perform feature flags checks. +These can be locked out with manifest version 2. Thus, the only difference between +manifest versions 1 and 2 is that the latter is only accepted by Borg releases +implementing feature flags. + +Therefore, as soon as any mandatory feature flag is enabled in a repository, +the manifest version must be switched to version 2 in order to lock out all +Borg releases unaware of feature flags. + +.. _Cache feature flags: +.. rubric:: Cache feature flags + +`The cache`_ does not have its separate set of feature flags. Instead, Borg stores +which flags were used to create or modify a cache. + +All mandatory manifest features from all operations are gathered in one set. +Then, two sets of features are computed; + +- those features that are supported by the client and mandated by the manifest + are added to the *mandatory_features* set, +- the *ignored_features* set comprised of those features mandated by the manifest, + but not supported by the client. + +Because the client previously checked compliance with the mandatory set of features +required for the particular operation it is executing, the *mandatory_features* set +will contain all necessary features required for using the cache safely. + +Conversely, the *ignored_features* set contains only those features which were not +relevant to operating the cache. Otherwise, the client would not pass the feature +set test against the manifest. + +When opening a cache and the *mandatory_features* set is not a subset of the features +supported by the client, the cache is wiped out and rebuilt, +since a client not supporting a mandatory feature that the cache was built with +would be unable to update it correctly. +The assumption behind this behaviour is that any of the unsupported features could have +been reflected in the cache and there is no way for the client to discern whether +that is the case. +Meanwhile, it may not be practical for every feature to have clients using it track +whether the feature had an impact on the cache. +Therefore, the cache is wiped. + +When opening a cache and the intersection of *ignored_features* and the features +supported by the client contains any elements, i.e. the client possesses features +that the previous client did not have and those new features are enabled in the repository, +the cache is wiped out and rebuilt. + +While the former condition likely requires no tweaks, the latter condition is formulated +in an especially conservative way to play it safe. It seems likely that specific features +might be exempted from the latter condition. + +.. rubric:: Defined feature flags + +Currently no feature flags are defined. + +From currently planned features, some examples follow, +these may/may not be implemented and purely serve as examples. + +- A mandatory *read* feature could be using a different encryption scheme (e.g. session keys). + This may not be mandatory for the *write* operation - reading data is not strictly required for + creating an archive. +- Any additions to the way chunks are referenced (e.g. to support larger archives) would + become a mandatory *delete* and *check* feature; *delete* implies knowing correct + reference counts, so all object references need to be understood. *check* must + discover the entire object graph as well, otherwise the "orphan chunks check" + could delete data still in use. + +.. _archive: + +Archives +~~~~~~~~ + +Each archive is an object referenced by the manifest. The archive object +itself does not store any of the data contained in the archive it describes. + +Instead, it contains a list of chunks which form a msgpacked stream of items_. +The archive object itself further contains some metadata: + +* *version* +* *name*, which might differ from the name set in the manifest. + When :ref:`borg_check` rebuilds the manifest (e.g. if it was corrupted) and finds + more than one archive object with the same name, it adds a counter to the name + in the manifest, but leaves the *name* field of the archives as it was. +* *items*, a list of chunk IDs containing item metadata (size: count * ~34B) +* *cmdline*, the command line which was used to create the archive +* *hostname* +* *username* +* *time* and *time_end* are the start and end timestamps, respectively +* *comment*, a user-specified archive comment +* *chunker_params* are the :ref:`chunker-params ` used for creating the archive. + This is used by :ref:`borg_recreate` to determine whether a given archive needs rechunking. +* Some other pieces of information related to recreate. + +.. _archive_limitation: + +.. rubric:: Note about archive limitations + +The archive is currently stored as a single object in the repository +and thus limited in size to MAX_OBJECT_SIZE (20MiB). + +As one chunk list entry is ~40B, that means we can reference ~500.000 item +metadata stream chunks per archive. + +Each item metadata stream chunk is ~128kiB (see hardcoded ITEMS_CHUNKER_PARAMS). + +So that means the whole item metadata stream is limited to ~64GiB chunks. +If compression is used, the amount of storable metadata is bigger - by the +compression factor. + +If the medium size of an item entry is 100B (small size file, no ACLs/xattrs), +that means a limit of ~640 million files/directories per archive. + +If the medium size of an item entry is 2kB (~100MB size files or more +ACLs/xattrs), the limit will be ~32 million files/directories per archive. + +If one tries to create an archive object bigger than MAX_OBJECT_SIZE, a fatal +IntegrityError will be raised. + +A workaround is to create multiple archives with less items each, see +also :issue:`1452`. + +.. _item: + +Items +~~~~~ + +Each item represents a file, directory or other file system item and is stored as a +dictionary created by the ``Item`` class that contains: + +* path +* list of data chunks (size: count * ~40B) +* user +* group +* uid +* gid +* mode (item type + permissions) +* source (for symlinks, and for hardlinks within one archive) +* rdev (for device files) +* mtime, atime, ctime in nanoseconds +* xattrs +* acl (various OS-dependent fields) +* bsdflags + +All items are serialized using msgpack and the resulting byte stream +is fed into the same chunker algorithm as used for regular file data +and turned into deduplicated chunks. The reference to these chunks is then added +to the archive metadata. To achieve a finer granularity on this metadata +stream, we use different chunker params for this chunker, which result in +smaller chunks. + +A chunk is stored as an object as well, of course. + +.. _chunks: +.. _chunker_details: + +Chunks +~~~~~~ + +The |project_name| chunker uses a rolling hash computed by the Buzhash_ algorithm. +It triggers (chunks) when the last HASH_MASK_BITS bits of the hash are zero, +producing chunks of 2^HASH_MASK_BITS Bytes on average. + +Buzhash is **only** used for cutting the chunks at places defined by the +content, the buzhash value is **not** used as the deduplication criteria (we +use a cryptographically strong hash/MAC over the chunk contents for this, the +id_hash). + +``borg create --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE`` +can be used to tune the chunker parameters, the default is: + +- CHUNK_MIN_EXP = 19 (minimum chunk size = 2^19 B = 512 kiB) +- CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB) +- HASH_MASK_BITS = 21 (statistical medium chunk size ~= 2^21 B = 2 MiB) +- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) + +The buzhash table is altered by XORing it with a seed randomly generated once +for the archive, and stored encrypted in the keyfile. This is to prevent chunk +size based fingerprinting attacks on your encrypted repo contents (to guess +what files you have based on a specific set of chunk sizes). + +For some more general usage hints see also ``--chunker-params``. + +.. _cache: + +The cache +--------- + +The **files cache** is stored in ``cache/files`` and is used at backup time to +quickly determine whether a given file is unchanged and we have all its chunks. + +In memory, the files cache is a key -> value mapping (a Python *dict*) and contains: + +* key: id_hash of the encoded, absolute file path +* value: + + - file inode number + - file size + - file mtime_ns + - age (0 [newest], 1, 2, 3, ..., BORG_FILES_CACHE_TTL - 1) + - list of chunk ids representing the file's contents + +To determine whether a file has not changed, cached values are looked up via +the key in the mapping and compared to the current file attribute values. + +If the file's size, mtime_ns and inode number is still the same, it is +considered to not have changed. In that case, we check that all file content +chunks are (still) present in the repository (we check that via the chunks +cache). + +If everything is matching and all chunks are present, the file is not read / +chunked / hashed again (but still a file metadata item is written to the +archive, made from fresh file metadata read from the filesystem). This is +what makes borg so fast when processing unchanged files. + +If there is a mismatch or a chunk is missing, the file is read / chunked / +hashed. Chunks already present in repo won't be transferred to repo again. + +The inode number is stored and compared to make sure we distinguish between +different files, as a single path may not be unique across different +archives in different setups. + +Not all filesystems have stable inode numbers. If that is the case, borg can +be told to ignore the inode number in the check via --ignore-inode. + +The age value is used for cache management. If a file is "seen" in a backup +run, its age is reset to 0, otherwise its age is incremented by one. +If a file was not seen in BORG_FILES_CACHE_TTL backups, its cache entry is +removed. See also: :ref:`always_chunking` and :ref:`a_status_oddity` + +The files cache is a python dictionary, storing python objects, which +generates a lot of overhead. + +Borg can also work without using the files cache (saves memory if you have a +lot of files or not much RAM free), then all files are assumed to have changed. +This is usually much slower than with files cache. + +The on-disk format of the files cache is a stream of msgpacked tuples (key, value). +Loading the files cache involves reading the file, one msgpack object at a time, +unpacking it, and msgpacking the value (in an effort to save memory). + +The **chunks cache** is stored in ``cache/chunks`` and is used to determine +whether we already have a specific chunk, to count references to it and also +for statistics. + +The chunks cache is a key -> value mapping and contains: + +* key: + + - chunk id_hash +* value: + + - reference count + - size + - encrypted/compressed size + +The chunks cache is a HashIndex_. Due to some restrictions of HashIndex, +the reference count of each given chunk is limited to a constant, MAX_VALUE +(introduced below in HashIndex_), approximately 2**32. +If a reference count hits MAX_VALUE, decrementing it yields MAX_VALUE again, +i.e. the reference count is pinned to MAX_VALUE. + +.. _cache-memory-usage: + +Indexes / Caches memory usage +----------------------------- + +Here is the estimated memory usage of |project_name| - it's complicated:: + + chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS + + repo_index_usage = chunk_count * 40 + + chunks_cache_usage = chunk_count * 44 + + files_cache_usage = total_file_count * 240 + chunk_count * 80 + + mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage + = chunk_count * 164 + total_file_count * 240 + +Due to the hashtables, the best/usual/worst cases for memory allocation can +be estimated like that:: + + mem_allocation = mem_usage / load_factor # l_f = 0.25 .. 0.75 + + mem_allocation_peak = mem_allocation * (1 + growth_factor) # g_f = 1.1 .. 2 + +All units are Bytes. + +It is assuming every chunk is referenced exactly once (if you have a lot of +duplicate chunks, you will have less chunks than estimated above). + +It is also assuming that typical chunk size is 2^HASH_MASK_BITS (if you have +a lot of files smaller than this statistical medium chunk size, you will have +more chunks than estimated above, because 1 file is at least 1 chunk). + +If a remote repository is used the repo index will be allocated on the remote side. + +The chunks cache, files cache and the repo index are all implemented as hash +tables. A hash table must have a significant amount of unused entries to be +fast - the so-called load factor gives the used/unused elements ratio. + +When a hash table gets full (load factor getting too high), it needs to be +grown (allocate new, bigger hash table, copy all elements over to it, free old +hash table) - this will lead to short-time peaks in memory usage each time this +happens. Usually does not happen for all hashtables at the same time, though. +For small hash tables, we start with a growth factor of 2, which comes down to +~1.1x for big hash tables. + +E.g. backing up a total count of 1 Mi (IEC binary prefix i.e. 2^20) files with a total size of 1TiB. + +a) with ``create --chunker-params 10,23,16,4095`` (custom, like borg < 1.0 or attic): + + mem_usage = 2.8GiB + +b) with ``create --chunker-params 19,23,21,4095`` (default): + + mem_usage = 0.31GiB + +.. note:: There is also the ``--files-cache=disabled`` option to disable the files cache. + You'll save some memory, but it will need to read / chunk all the files as + it can not skip unmodified files then. + +HashIndex +--------- + +The chunks cache and the repository index are stored as hash tables, with +only one slot per bucket, spreading hash collisions to the following +buckets. As a consequence the hash is just a start position for a linear +search. If a key is looked up that is not in the table, then the hash table +is searched from the start position (the hash) until the first empty +bucket is reached. + +This particular mode of operation is open addressing with linear probing. + +When the hash table is filled to 75%, its size is grown. When it's +emptied to 25%, its size is shrinked. Operations on it have a variable +complexity between constant and linear with low factor, and memory overhead +varies between 33% and 300%. + +If an element is deleted, and the slot behind the deleted element is not empty, +then the element will leave a tombstone, a bucket marked as deleted. Tombstones +are only removed by insertions using the tombstone's bucket, or by resizing +the table. They present the same load to the hash table as a real entry, +but do not count towards the regular load factor. + +Thus, if the number of empty slots becomes too low (recall that linear probing +for an element not in the index stops at the first empty slot), the hash table +is rebuilt. The maximum *effective* load factor, i.e. including tombstones, is 93%. + +Data in a HashIndex is always stored in little-endian format, which increases +efficiency for almost everyone, since basically no one uses big-endian processors +any more. + +HashIndex does not use a hashing function, because all keys (save manifest) are +outputs of a cryptographic hash or MAC and thus already have excellent distribution. +Thus, HashIndex simply uses the first 32 bits of the key as its "hash". + +The format is easy to read and write, because the buckets array has the same layout +in memory and on disk. Only the header formats differ. The on-disk header is +``struct HashHeader``: + +- First, the HashIndex magic, the eight byte ASCII string "BORG_IDX". +- Second, the signed 32-bit number of entries (i.e. buckets which are not deleted and not empty). +- Third, the signed 32-bit number of buckets, i.e. the length of the buckets array + contained in the file, and the modulus for index calculation. +- Fourth, the signed 8-bit length of keys. +- Fifth, the signed 8-bit length of values. This has to be at least four bytes. + +All fields are packed. + +The HashIndex is *not* a general purpose data structure. +The value size must be at least 4 bytes, and these first bytes are used for in-band +signalling in the data structure itself. + +The constant MAX_VALUE (defined as 2**32-1025 = 4294966271) defines the valid range for +these 4 bytes when interpreted as an uint32_t from 0 to MAX_VALUE (inclusive). +The following reserved values beyond MAX_VALUE are currently in use (byte order is LE): + +- 0xffffffff marks empty buckets in the hash table +- 0xfffffffe marks deleted buckets in the hash table + +HashIndex is implemented in C and wrapped with Cython in a class-based interface. +The Cython wrapper checks every passed value against these reserved values and +raises an AssertionError if they are used. + +Encryption +---------- + +.. seealso:: The :ref:`borgcrypto` section for an in-depth review. + +AES_-256 is used in CTR mode (so no need for padding). A 64 bit initialization +vector is used, a MAC is computed on the encrypted chunk +and both are stored in the chunk. Encryption and MAC use two different keys. +Each chunk consists of ``TYPE(1)`` + ``MAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``: + +.. figure:: encryption.png + :figwidth: 100% + :width: 100% + +In AES-CTR mode you can think of the IV as the start value for the counter. +The counter itself is incremented by one after each 16 byte block. +The IV/counter is not required to be random but it must NEVER be reused. +So to accomplish this |project_name| initializes the encryption counter to be +higher than any previously used counter value before encrypting new data. + +To reduce payload size, only 8 bytes of the 16 bytes nonce is saved in the +payload, the first 8 bytes are always zeros. This does not affect security but +limits the maximum repository capacity to only 295 exabytes (2**64 * 16 bytes). + +Encryption keys (and other secrets) are kept either in a key file on the client +('keyfile' mode) or in the repository config on the server ('repokey' mode). +In both cases, the secrets are generated from random and then encrypted by a +key derived from your passphrase (this happens on the client before the key +is stored into the keyfile or as repokey). + +The passphrase is passed through the ``BORG_PASSPHRASE`` environment variable +or prompted for interactive usage. + +.. _key_files: + +Key files +--------- + +.. seealso:: The :ref:`key_encryption` section for an in-depth review of the key encryption. + +When initialized with the ``init -e keyfile`` command, |project_name| +needs an associated file in ``$HOME/.config/borg/keys`` to read and write +the repository. The format is based on msgpack_, base64 encoding and +PBKDF2_ SHA256 hashing, which is then encoded again in a msgpack_. + +The same data structure is also used in the "repokey" modes, which store +it in the repository in the configuration file. + +The internal data structure is as follows: + +version + currently always an integer, 1 + +repository_id + the ``id`` field in the ``config`` ``INI`` file of the repository. + +enc_key + the key used to encrypt data with AES (256 bits) + +enc_hmac_key + the key used to HMAC the encrypted data (256 bits) + +id_key + the key used to HMAC the plaintext chunk data to compute the chunk's id + +chunk_seed + the seed for the buzhash chunking table (signed 32 bit integer) + +These fields are packed using msgpack_. The utf-8 encoded passphrase +is processed with PBKDF2_ (SHA256_, 100000 iterations, random 256 bit salt) +to derive a 256 bit key encryption key (KEK). + +A `HMAC-SHA256`_ checksum of the packed fields is generated with the KEK, +then the KEK is also used to encrypt the same packed fields using AES-CTR. + +The result is stored in a another msgpack_ formatted as follows: + +version + currently always an integer, 1 + +salt + random 256 bits salt used to process the passphrase + +iterations + number of iterations used to process the passphrase (currently 100000) + +algorithm + the hashing algorithm used to process the passphrase and do the HMAC + checksum (currently the string ``sha256``) + +hash + HMAC-SHA256 of the *plaintext* of the packed fields. + +data + The encrypted, packed fields. + +The resulting msgpack_ is then encoded using base64 and written to the +key file, wrapped using the standard ``textwrap`` module with a header. +The header is a single line with a MAGIC string, a space and a hexadecimal +representation of the repository id. + +Compression +----------- + +|project_name| supports the following compression methods: + +- none (no compression, pass through data 1:1) +- lz4 (low compression, but super fast) +- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead], + level 1 is low, level 9 is high compression) +- lzma (level 0-9, level 0 is low, level 9 is high compression). + +Speed: none > lz4 > zlib > lzma +Compression: lzma > zlib > lz4 > none + +Be careful, higher zlib and especially lzma compression levels might take a +lot of resources (CPU and memory). + +The overall speed of course also depends on the speed of your target storage. +If that is slow, using a higher compression level might yield better overall +performance. You need to experiment a bit. Maybe just watch your CPU load, if +that is relatively low, increase compression until 1 core is 70-100% loaded. + +Even if your target storage is rather fast, you might see interesting effects: +while doing no compression at all (none) is a operation that takes no time, it +likely will need to store more data to the storage compared to using lz4. +The time needed to transfer and store the additional data might be much more +than if you had used lz4 (which is super fast, but still might compress your +data about 2:1). This is assuming your data is compressible (if you backup +already compressed data, trying to compress them at backup time is usually +pointless). + +Compression is applied after deduplication, thus using different compression +methods in one repo does not influence deduplication. + +See ``borg create --help`` about how to specify the compression level and its default. + +Lock files +---------- + +|project_name| uses locks to get (exclusive or shared) access to the cache and +the repository. + +The locking system is based on creating a directory `lock.exclusive` (for +exclusive locks). Inside the lock directory, there is a file indicating +hostname, process id and thread id of the lock holder. + +There is also a json file `lock.roster` that keeps a directory of all shared +and exclusive lockers. + +If the process can create the `lock.exclusive` directory for a resource, it has +the lock for it. If creation fails (because the directory has already been +created by some other process), lock acquisition fails. + +The cache lock is usually in `~/.cache/borg/REPOID/lock.*`. +The repository lock is in `repository/lock.*`. + +In case you run into troubles with the locks, you can use the ``borg break-lock`` +command after you first have made sure that no |project_name| process is +running on any machine that accesses this resource. Be very careful, the cache +or repository might get damaged if multiple processes use it at the same time. + +Checksumming data structures +---------------------------- + +As detailed in the previous sections, Borg generates and stores various files +containing important meta data, such as the repository index, repository hints, +chunks caches and files cache. + +Data corruption in these files can damage the archive data in a repository, +e.g. due to wrong reference counts in the chunks cache. Only some parts of Borg +were designed to handle corrupted data structures, so a corrupted files cache +may cause crashes or write incorrect archives. + +Therefore, Borg calculates checksums when writing these files and tests checksums +when reading them. Checksums are generally 64-bit XXH64 hashes. +The canonical xxHash representation is used, i.e. big-endian. +Checksums are stored as hexadecimal ASCII strings. + +For compatibility, checksums are not required and absent checksums do not trigger errors. +The mechanisms have been designed to avoid false-positives when various Borg +versions are used alternately on the same repositories. + +Checksums are a data safety mechanism. They are not a security mechanism. + +.. rubric:: Choice of algorithm + +XXH64 has been chosen for its high speed on all platforms, which avoids performance +degradation in CPU-limited parts (e.g. cache synchronization). +Unlike CRC32, it neither requires hardware support (crc32c or CLMUL) +nor vectorized code nor large, cache-unfriendly lookup tables to achieve good performance. +This simplifies deployment of it considerably (cf. src/borg/algorithms/crc32...). + +Further, XXH64 is a non-linear hash function and thus has a "more or less" good +chance to detect larger burst errors, unlike linear CRCs where the probability +of detection decreases with error size. + +The 64-bit checksum length is considered sufficient for the file sizes typically +checksummed (individual files up to a few GB, usually less). +xxHash was expressly designed for data blocks of these sizes. + +Lower layer — file_integrity +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To accommodate the different transaction models used for the cache and repository, +there is a lower layer (borg.crypto.file_integrity.IntegrityCheckedFile) +wrapping a file-like object, performing streaming calculation and comparison of checksums. +Checksum errors are signalled by raising an exception (borg.crypto.file_integrity.FileIntegrityError) +at the earliest possible moment. + +.. rubric:: Calculating checksums + +Before feeding the checksum algorithm any data, the file name (i.e. without any path) +is mixed into the checksum, since the name encodes the context of the data for Borg. + +The various indices used by Borg have separate header and main data parts. +IntegrityCheckedFile allows to checksum them independently, which avoids +even reading the data when the header is corrupted. When a part is signalled, +the length of the part name is mixed into the checksum state first (encoded +as an ASCII string via `%10d` printf format), then the name of the part +is mixed in as an UTF-8 string. Lastly, the current position (length) +in the file is mixed in as well. + +The checksum state is not reset at part boundaries. + +A final checksum is always calculated in the same way as the parts described above, +after seeking to the end of the file. The final checksum cannot prevent code +from processing corrupted data during reading, however, it prevents use of the +corrupted data. + +.. rubric:: Serializing checksums + +All checksums are compiled into a simple JSON structure called *integrity data*: + +.. code-block:: json + + { + "algorithm": "XXH64", + "digests": { + "HashHeader": "eab6802590ba39e3", + "final": "e2a7f132fc2e8b24" + } + } + +The *algorithm* key notes the used algorithm. When reading, integrity data containing +an unknown algorithm is not inspected further. + +The *digests* key contains a mapping of part names to their digests. + +Integrity data is generally stored by the upper layers, introduced below. An exception +is the DetachedIntegrityCheckedFile, which automatically writes and reads it from +a ".integrity" file next to the data file. +It is used for archive chunks indexes in chunks.archive.d. + +Upper layer +~~~~~~~~~~~ + +Storage of integrity data depends on the component using it, since they have +different transaction mechanisms, and integrity data needs to be +transacted with the data it is supposed to protect. + +.. rubric:: Main cache files: chunks and files cache + +The integrity data of the ``chunks`` and ``files`` caches is stored in the +cache ``config``, since all three are transacted together. + +The ``[integrity]`` section is used: + +.. code-block:: ini + + [cache] + version = 1 + repository = 3c4...e59 + manifest = 10e...21c + timestamp = 2017-06-01T21:31:39.699514 + key_type = 2 + previous_location = /path/to/repo + + [integrity] + manifest = 10e...21c + chunks = {"algorithm": "XXH64", "digests": {"HashHeader": "eab...39e3", "final": "e2a...b24"}} + +The manifest ID is duplicated in the integrity section due to the way all Borg +versions handle the config file. Instead of creating a "new" config file from +an internal representation containing only the data understood by Borg, +the config file is read in entirety (using the Python ConfigParser) and modified. +This preserves all sections and values not understood by the Borg version +modifying it. + +Thus, if an older versions uses a cache with integrity data, it would preserve +the integrity section and its contents. If a integrity-aware Borg version +would read this cache, it would incorrectly report checksum errors, since +the older version did not update the checksums. + +However, by duplicating the manifest ID in the integrity section, it is +easy to tell whether the checksums concern the current state of the cache. + +Integrity errors are fatal in these files, terminating the program, +and are not automatically corrected at this time. + +.. rubric:: chunks.archive.d + +Indices in chunks.archive.d are not transacted and use DetachedIntegrityCheckedFile, +which writes the integrity data to a separate ".integrity" file. + +Integrity errors result in deleting the affected index and rebuilding it. +This logs a warning and increases the exit code to WARNING (1). + +.. _integrity_repo: + +.. rubric:: Repository index and hints + +The repository associates index and hints files with a transaction by including the +transaction ID in the file names. Integrity data is stored in a third file +("integrity."). Like the hints file, it is msgpacked: + +.. code-block:: python + + { + b'version': 2, + b'hints': b'{"algorithm": "XXH64", "digests": {"final": "411208db2aa13f1a"}}', + b'index': b'{"algorithm": "XXH64", "digests": {"HashHeader": "846b7315f91b8e48", "final": "cb3e26cadc173e40"}}' + } + +The *version* key started at 2, the same version used for the hints. Since Borg has +many versioned file formats, this keeps the number of different versions in use +a bit lower. + +The other keys map an auxiliary file, like *index* or *hints* to their integrity data. +Note that the JSON is stored as-is, and not as part of the msgpack structure. + +Integrity errors result in deleting the affected file(s) (index/hints) and rebuilding the index, +which is the same action taken when corruption is noticed in other ways (e.g. HashIndex can +detect most corrupted headers, but not data corruption). A warning is logged as well. +The exit code is not influenced, since remote repositories cannot perform that action. +Raising the exit code would be possible for local repositories, but is not implemented. + +Unlike the cache design this mechanism can have false positives whenever an older version +*rewrites* the auxiliary files for a transaction created by a newer version, +since that might result in a different index (due to hash-table resizing) or hints file +(hash ordering, or the older version 1 format), while not invalidating the integrity file. + +For example, using 1.1 on a repository, noticing corruption or similar issues and then running +``borg-1.0 check --repair``, which rewrites the index and hints, results in this situation. +Borg 1.1 would erroneously report checksum errors in the hints and/or index files and trigger +an automatic rebuild of these files. diff --git a/docs/internals/encryption.png b/docs/internals/encryption.png new file mode 100644 index 00000000..e3512074 Binary files /dev/null and b/docs/internals/encryption.png differ diff --git a/docs/internals/encryption.vsd b/docs/internals/encryption.vsd new file mode 100644 index 00000000..22507a17 Binary files /dev/null and b/docs/internals/encryption.vsd differ diff --git a/docs/internals/frontends.rst b/docs/internals/frontends.rst new file mode 100644 index 00000000..75831c95 --- /dev/null +++ b/docs/internals/frontends.rst @@ -0,0 +1,551 @@ +.. include:: ../global.rst.inc +.. highlight:: json + +.. _json_output: + +All about JSON: How to develop frontends +======================================== + +Borg does not have a public API on the Python level. That does not keep you from writing :code:`import borg`, +but does mean that there are no release-to-release guarantees on what you might find in that package, not +even for point releases (1.1.x), and there is no documentation beyond the code and the internals documents. + +Borg does on the other hand provide an API on a command-line level. In other words, a frontend should to +(for example) create a backup archive just invoke :ref:`borg_create`, give commandline parameters/options +as needed and parse JSON output from borg. + +Important: JSON output is expected to be UTF-8, but currently borg depends on the locale being configured +for that (must be a UTF-8 locale and *not* "C" or "ascii"), so that Python will choose to encode to UTF-8. +The same applies to any inputs read by borg, they are expected to be UTF-8 encoded also. + +We consider this a bug (see :issue:`2273`) and might fix it later, so borg will use UTF-8 independent of +the locale. + +On POSIX systems, you can usually set environment vars to choose a UTF-8 locale: + +:: + + export LANG=en_US.UTF-8 + export LC_CTYPE=en_US.UTF-8 + + +Logging +------- + +Especially for graphical frontends it is important to be able to convey and reformat progress information +in meaningful ways. The ``--log-json`` option turns the stderr stream of Borg into a stream of JSON lines, +where each line is a JSON object. The *type* key of the object determines its other contents. + +Since JSON can only encode text, any string representing a file system path may miss non-text parts. + +The following types are in use. Progress information is governed by the usual rules for progress information, +it is not produced unless ``--progress`` is specified. + +archive_progress + Output during operations creating archives (:ref:`borg_create` and :ref:`borg_recreate`). + The following keys exist, each represents the current progress. + + original_size + Original size of data processed so far (before compression and deduplication) + compressed_size + Compressed size + deduplicated_size + Deduplicated size + nfiles + Number of (regular) files processed so far + path + Current path + time + Unix timestamp (float) + +progress_message + A message-based progress information with no concrete progress information, just a message + saying what is currently being worked on. + + operation + unique, opaque integer ID of the operation + :ref:`msgid ` + Message ID of the operation (may be *null*) + finished + boolean indicating whether the operation has finished, only the last object for an *operation* + can have this property set to *true*. + message + current progress message (may be empty/absent) + time + Unix timestamp (float) + +progress_percent + Absolute progress information with defined end/total and current value. + + operation + unique, opaque integer ID of the operation + :ref:`msgid ` + Message ID of the operation (may be *null*) + finished + boolean indicating whether the operation has finished, only the last object for an *operation* + can have this property set to *true*. + message + A formatted progress message, this will include the percentage and perhaps other information + current + Current value (always less-or-equal to *total*) + info + Array that describes the current item, may be *null*, contents depend on *msgid* + total + Total value + time + Unix timestamp (float) + +file_status + This is only output by :ref:`borg_create` and :ref:`borg_recreate` if ``--list`` is specified. The usual + rules for the file listing applies, including the ``--filter`` option. + + status + Single-character status as for regular list output + path + Path of the file system object + +log_message + Any regular log output invokes this type. Regular log options and filtering applies to these as well. + + time + Unix timestamp (float) + levelname + Upper-case log level name (also called severity). Defined levels are: DEBUG, INFO, WARNING, ERROR, CRITICAL + name + Name of the emitting entity + message + Formatted log message + :ref:`msgid ` + Message ID, may be *null* or absent + +See Prompts_ for the types used by prompts. + +.. rubric:: Examples (reformatted, each object would be on exactly one line) + +:ref:`borg_extract` progress:: + + {"message": "100.0% Extracting: src/borgbackup.egg-info/entry_points.txt", + "current": 13000228, "total": 13004993, "info": ["src/borgbackup.egg-info/entry_points.txt"], + "operation": 1, "msgid": "extract", "type": "progress_percent", "finished": false} + {"message": "100.0% Extracting: src/borgbackup.egg-info/SOURCES.txt", + "current": 13004993, "total": 13004993, "info": ["src/borgbackup.egg-info/SOURCES.txt"], + "operation": 1, "msgid": "extract", "type": "progress_percent", "finished": false} + {"operation": 1, "msgid": "extract", "type": "progress_percent", "finished": true} + +:ref:`borg_create` file listing with progress:: + + {"original_size": 0, "compressed_size": 0, "deduplicated_size": 0, "nfiles": 0, "type": "archive_progress", "path": "src"} + {"type": "file_status", "status": "U", "path": "src/borgbackup.egg-info/entry_points.txt"} + {"type": "file_status", "status": "U", "path": "src/borgbackup.egg-info/SOURCES.txt"} + {"type": "file_status", "status": "d", "path": "src/borgbackup.egg-info"} + {"type": "file_status", "status": "d", "path": "src"} + {"original_size": 13176040, "compressed_size": 11386863, "deduplicated_size": 503, "nfiles": 277, "type": "archive_progress", "path": ""} + +Internal transaction progress:: + + {"message": "Saving files cache", "operation": 2, "msgid": "cache.commit", "type": "progress_message", "finished": false} + {"message": "Saving cache config", "operation": 2, "msgid": "cache.commit", "type": "progress_message", "finished": false} + {"message": "Saving chunks cache", "operation": 2, "msgid": "cache.commit", "type": "progress_message", "finished": false} + {"operation": 2, "msgid": "cache.commit", "type": "progress_message", "finished": true} + +A debug log message:: + + {"message": "35 self tests completed in 0.08 seconds", + "type": "log_message", "created": 1488278449.5575905, "levelname": "DEBUG", "name": "borg.archiver"} + +Prompts +------- + +Prompts assume a JSON form as well when the ``--log-json`` option is specified. Responses +are still read verbatim from *stdin*, while prompts are JSON messages printed to *stderr*, +just like log messages. + +Prompts use the *question_prompt* and *question_prompt_retry* types for the prompt itself, +and *question_invalid_answer*, *question_accepted_default*, *question_accepted_true*, +*question_accepted_false* and *question_env_answer* types for information about +prompt processing. + +The *message* property contains the same string displayed regularly in the same situation, +while the *msgid* property may contain a msgid_, typically the name of the +environment variable that can be used to override the prompt. It is the same for all JSON +messages pertaining to the same prompt. + +.. rubric:: Examples (reformatted, each object would be on exactly one line) + +Providing an invalid answer:: + + {"type": "question_prompt", "msgid": "BORG_CHECK_I_KNOW_WHAT_I_AM_DOING", + "message": "... Type 'YES' if you understand this and want to continue: "} + incorrect answer # input on stdin + {"type": "question_invalid_answer", "msgid": "BORG_CHECK_I_KNOW_WHAT_I_AM_DOING", "is_prompt": false, + "message": "Invalid answer, aborting."} + +Providing a false (negative) answer:: + + {"type": "question_prompt", "msgid": "BORG_CHECK_I_KNOW_WHAT_I_AM_DOING", + "message": "... Type 'YES' if you understand this and want to continue: "} + NO # input on stdin + {"type": "question_accepted_false", "msgid": "BORG_CHECK_I_KNOW_WHAT_I_AM_DOING", + "message": "Aborting.", "is_prompt": false} + +Providing a true (affirmative) answer:: + + {"type": "question_prompt", "msgid": "BORG_CHECK_I_KNOW_WHAT_I_AM_DOING", + "message": "... Type 'YES' if you understand this and want to continue: "} + YES # input on stdin + # no further output, just like the prompt without --log-json + +Passphrase prompts +------------------ + +Passphrase prompts should be handled differently. Use the environment variables *BORG_PASSPHRASE* +and *BORG_NEW_PASSPHRASE* (see :ref:`env_vars` for reference) to pass passphrases to Borg, don't +use the interactive passphrase prompts. + +When setting a new passphrase (:ref:`borg_init`, :ref:`borg_key_change-passphrase`) normally +Borg prompts whether it should display the passphrase. This can be suppressed by setting +the environment variable *BORG_DISPLAY_PASSPHRASE* to *no*. + +When "confronted" with an unknown repository, where the application does not know whether +the repository is encrypted, the following algorithm can be followed to detect encryption: + +1. Set *BORG_PASSPHRASE* to gibberish (for example a freshly generated UUID4, which cannot + possibly be the passphrase) +2. Invoke ``borg list repository ...`` +3. If this fails, due the repository being encrypted and the passphrase obviously being + wrong, you'll get an error with the *PassphraseWrong* msgid. + + The repository is encrypted, for further access the application will need the passphrase. + +4. If this does not fail, then the repository is not encrypted. + +Standard output +--------------- + +*stdout* is different and more command-dependent than logging. Commands like :ref:`borg_info`, :ref:`borg_create` +and :ref:`borg_list` implement a ``--json`` option which turns their regular output into a single JSON object. + +Dates are formatted according to ISO 8601 in local time. No explicit time zone is specified *at this time* +(subject to change). The equivalent strftime format string is '%Y-%m-%dT%H:%M:%S.%f', +e.g. ``2017-08-07T12:27:20.123456``. + +The root object at least contains a *repository* key with an object containing: + +id + The ID of the repository, normally 64 hex characters +location + Canonicalized repository path, thus this may be different from what is specified on the command line +last_modified + Date when the repository was last modified by the Borg client + +The *encryption* key, if present, contains: + +mode + Textual encryption mode name (same as :ref:`borg_init` ``--encryption`` names) +keyfile + Path to the local key file used for access. Depending on *mode* this key may be absent. + +The *cache* key, if present, contains: + +path + Path to the local repository cache +stats + Object containing cache stats: + + total_chunks + Number of chunks + total_unique_chunks + Number of unique chunks + total_size + Total uncompressed size of all chunks multiplied with their reference counts + total_csize + Total compressed and encrypted size of all chunks multiplied with their reference counts + unique_size + Uncompressed size of all chunks + unique_csize + Compressed and encrypted size of all chunks + +Example *borg info* output:: + + { + "cache": { + "path": "/home/user/.cache/borg/0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23", + "stats": { + "total_chunks": 511533, + "total_csize": 17948017540, + "total_size": 22635749792, + "total_unique_chunks": 54892, + "unique_csize": 1920405405, + "unique_size": 2449675468 + } + }, + "encryption": { + "mode": "repokey" + }, + "repository": { + "id": "0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23", + "last_modified": "2017-08-07T12:27:20.789123", + "location": "/home/user/testrepo" + }, + "security_dir": "/home/user/.config/borg/security/0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23", + "archives": [] + } + +Archive formats ++++++++++++++++ + +:ref:`borg_info` uses an extended format for archives, which is more expensive to retrieve, while +:ref:`borg_list` uses a simpler format that is faster to retrieve. Either return archives in an +array under the *archives* key, while :ref:`borg_create` returns a single archive object under the +*archive* key. + +Both formats contain a *name* key with the archive name, the *id* key with the hexadecimal archive ID, +and the *start* key with the start timestamp. + +*borg info* and *borg create* further have: + +end + End timestamp +duration + Duration in seconds between start and end in seconds (float) +stats + Archive statistics (freshly calculated, this is what makes "info" more expensive) + + original_size + Size of files and metadata before compression + compressed_size + Size after compression + deduplicated_size + Deduplicated size (against the current repository, not when the archive was created) + nfiles + Number of regular files in the archive +limits + Object describing the utilization of Borg limits + + max_archive_size + Float between 0 and 1 describing how large this archive is relative to the maximum size allowed by Borg +command_line + Array of strings of the command line that created the archive + + The note about paths from above applies here as well. + +:ref:`borg_info` further has: + +hostname + Hostname of the creating host +username + Name of the creating user +comment + Archive comment, if any + +Example of a simple archive listing (``borg list --last 1 --json``):: + + { + "archives": [ + { + "id": "80cd07219ad725b3c5f665c1dcf119435c4dee1647a560ecac30f8d40221a46a", + "name": "host-system-backup-2017-02-27", + "start": "2017-08-07T12:27:20.789123" + } + ], + "encryption": { + "mode": "repokey" + }, + "repository": { + "id": "0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23", + "last_modified": "2017-08-07T12:27:20.789123", + "location": "/home/user/repository" + } + } + +The same archive with more information (``borg info --last 1 --json``):: + + { + "archives": [ + { + "command_line": [ + "/home/user/.local/bin/borg", + "create", + "/home/user/repository", + "..." + ], + "comment": "", + "duration": 5.641542, + "end": "2017-02-27T12:27:20.789123", + "hostname": "host", + "id": "80cd07219ad725b3c5f665c1dcf119435c4dee1647a560ecac30f8d40221a46a", + "limits": { + "max_archive_size": 0.0001330855110409714 + }, + "name": "host-system-backup-2017-02-27", + "start": "2017-02-27T12:27:20.789123", + "stats": { + "compressed_size": 1880961894, + "deduplicated_size": 2791, + "nfiles": 53669, + "original_size": 2400471280 + }, + "username": "user" + } + ], + "cache": { + "path": "/home/user/.cache/borg/0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23", + "stats": { + "total_chunks": 511533, + "total_csize": 17948017540, + "total_size": 22635749792, + "total_unique_chunks": 54892, + "unique_csize": 1920405405, + "unique_size": 2449675468 + } + }, + "encryption": { + "mode": "repokey" + }, + "repository": { + "id": "0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23", + "last_modified": "2017-08-07T12:27:20.789123", + "location": "/home/user/repository" + } + } + +File listings ++++++++++++++ + +Listing the contents of an archive can produce *a lot* of JSON. Since many JSON implementations +don't support a streaming mode of operation, which is pretty much required to deal with this amount of +JSON, output is generated in the `JSON lines `_ format, which is simply +a number of JSON objects separated by new lines. + +Each item (file, directory, ...) is described by one object in the :ref:`borg_list` output. +Refer to the *borg list* documentation for the available keys and their meaning. + +Example (excerpt) of ``borg list --json-lines``:: + + {"type": "d", "mode": "drwxr-xr-x", "user": "user", "group": "user", "uid": 1000, "gid": 1000, "path": "linux", "healthy": true, "source": "", "linktarget": "", "flags": null, "mtime": "2017-02-27T12:27:20.023407", "size": 0} + {"type": "d", "mode": "drwxr-xr-x", "user": "user", "group": "user", "uid": 1000, "gid": 1000, "path": "linux/baz", "healthy": true, "source": "", "linktarget": "", "flags": null, "mtime": "2017-02-27T12:27:20.585407", "size": 0} + +.. _msgid: + +Message IDs +----------- + +Message IDs are strings that essentially give a log message or operation a name, without actually using the +full text, since texts change more frequently. Message IDs are unambiguous and reduce the need to parse +log messages. + +Assigned message IDs are: + +.. See scripts/errorlist.py; this is slightly edited. + +Errors + Archive.AlreadyExists + Archive {} already exists + Archive.DoesNotExist + Archive {} does not exist + Archive.IncompatibleFilesystemEncodingError + Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable. + Cache.CacheInitAbortedError + Cache initialization aborted + Cache.EncryptionMethodMismatch + Repository encryption method changed since last access, refusing to continue + Cache.RepositoryAccessAborted + Repository access aborted + Cache.RepositoryIDNotUnique + Cache is newer than repository - do you have multiple, independently updated repos with same ID? + Cache.RepositoryReplay + Cache is newer than repository - this is either an attack or unsafe (multiple repos with same ID) + Buffer.MemoryLimitExceeded + Requested buffer size {} is above the limit of {}. + ExtensionModuleError + The Borg binary extension modules do not seem to be properly installed + IntegrityError + Data integrity error: {} + NoManifestError + Repository has no manifest. + PlaceholderError + Formatting Error: "{}".format({}): {}({}) + KeyfileInvalidError + Invalid key file for repository {} found in {}. + KeyfileMismatchError + Mismatch between repository {} and key file {}. + KeyfileNotFoundError + No key file for repository {} found in {}. + PassphraseWrong + passphrase supplied in BORG_PASSPHRASE is incorrect + PasswordRetriesExceeded + exceeded the maximum password retries + RepoKeyNotFoundError + No key entry found in the config of repository {}. + UnsupportedManifestError + Unsupported manifest envelope. A newer version is required to access this repository. + UnsupportedPayloadError + Unsupported payload type {}. A newer version is required to access this repository. + NotABorgKeyFile + This file is not a borg key backup, aborting. + RepoIdMismatch + This key backup seems to be for a different backup repository, aborting. + UnencryptedRepo + Keymanagement not available for unencrypted repositories. + UnknownKeyType + Keytype {0} is unknown. + LockError + Failed to acquire the lock {}. + LockErrorT + Failed to acquire the lock {}. + ConnectionClosed + Connection closed by remote host + InvalidRPCMethod + RPC method {} is not valid + PathNotAllowed + Repository path not allowed + RemoteRepository.RPCServerOutdated + Borg server is too old for {}. Required version {} + UnexpectedRPCDataFormatFromClient + Borg {}: Got unexpected RPC data format from client. + UnexpectedRPCDataFormatFromServer + Got unexpected RPC data format from server: + {} + Repository.AlreadyExists + Repository {} already exists. + Repository.CheckNeeded + Inconsistency detected. Please run "borg check {}". + Repository.DoesNotExist + Repository {} does not exist. + Repository.InsufficientFreeSpaceError + Insufficient free space to complete transaction (required: {}, available: {}). + Repository.InvalidRepository + {} is not a valid repository. Check repo config. + Repository.AtticRepository + Attic repository detected. Please run "borg upgrade {}". + Repository.ObjectNotFound + Object with key {} not found in repository {}. + +Operations + - cache.begin_transaction + - cache.download_chunks, appears with ``borg create --no-cache-sync`` + - cache.commit + - cache.sync + + *info* is one string element, the name of the archive currently synced. + - repository.compact_segments + - repository.replay_segments + - repository.check_segments + - check.verify_data + - extract + + *info* is one string element, the name of the path currently extracted. + - extract.permissions + - archive.delete + +Prompts + BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK + For "Warning: Attempting to access a previously unknown unencrypted repository" + BORG_RELOCATED_REPO_ACCESS_IS_OK + For "Warning: The repository at location ... was previously located at ..." + BORG_CHECK_I_KNOW_WHAT_I_AM_DOING + For "Warning: 'check --repair' is an experimental feature that might result in data loss." + BORG_DELETE_I_KNOW_WHAT_I_AM_DOING + For "You requested to completely DELETE the repository *including* all archives it contains:" + BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING + For "recreate is an experimental feature." diff --git a/docs/internals/object-graph.png b/docs/internals/object-graph.png new file mode 100644 index 00000000..8a153428 Binary files /dev/null and b/docs/internals/object-graph.png differ diff --git a/docs/internals/object-graph.vsd b/docs/internals/object-graph.vsd new file mode 100644 index 00000000..70989e1d Binary files /dev/null and b/docs/internals/object-graph.vsd differ diff --git a/docs/internals/security.rst b/docs/internals/security.rst new file mode 100644 index 00000000..66a7ea5e --- /dev/null +++ b/docs/internals/security.rst @@ -0,0 +1,371 @@ + +.. somewhat surprisingly the "bash" highlighter gives nice results with + the pseudo-code notation used in the "Encryption" section. + +.. highlight:: bash + +======== +Security +======== + +.. _borgcrypto: + +Cryptography in Borg +==================== + +Attack model +------------ + +The attack model of Borg is that the environment of the client process +(e.g. ``borg create``) is trusted and the repository (server) is not. The +attacker has any and all access to the repository, including interactive +manipulation (man-in-the-middle) for remote repositories. + +Furthermore the client environment is assumed to be persistent across +attacks (practically this means that the security database cannot be +deleted between attacks). + +Under these circumstances Borg guarantees that the attacker cannot + +1. modify the data of any archive without the client detecting the change +2. rename, remove or add an archive without the client detecting the change +3. recover plain-text data +4. recover definite (heuristics based on access patterns are possible) + structural information such as the object graph (which archives + refer to what chunks) + +The attacker can always impose a denial of service per definition (he could +forbid connections to the repository, or delete it entirely). + +.. _security_structural_auth: + +Structural Authentication +------------------------- + +Borg is fundamentally based on an object graph structure (see :ref:`internals`), +where the root object is called the manifest. + +Borg follows the `Horton principle`_, which states that +not only the message must be authenticated, but also its meaning (often +expressed through context), because every object used is referenced by a +parent object through its object ID up to the manifest. The object ID in +Borg is a MAC of the object's plaintext, therefore this ensures that +an attacker cannot change the context of an object without forging the MAC. + +In other words, the object ID itself only authenticates the plaintext of the +object and not its context or meaning. The latter is established by a different +object referring to an object ID, thereby assigning a particular meaning to +an object. For example, an archive item contains a list of object IDs that +represent packed file metadata. On their own it's not clear that these objects +would represent what they do, but by the archive item referring to them +in a particular part of its own data structure assigns this meaning. + +This results in a directed acyclic graph of authentication from the manifest +to the data chunks of individual files. + +.. _tam_description: + +.. rubric:: Authenticating the manifest + +Since the manifest has a fixed ID (000...000) the aforementioned authentication +does not apply to it, indeed, cannot apply to it; it is impossible to authenticate +the root node of a DAG through its edges, since the root node has no incoming edges. + +With the scheme as described so far an attacker could easily replace the manifest, +therefore Borg includes a tertiary authentication mechanism (TAM) that is applied +to the manifest since version 1.0.9 (see :ref:`tam_vuln`). + +TAM works by deriving a separate key through HKDF_ from the other encryption and +authentication keys and calculating the HMAC of the metadata to authenticate [#]_:: + + # RANDOM(n) returns n random bytes + salt = RANDOM(64) + + ikm = id_key || enc_key || enc_hmac_key + # *context* depends on the operation, for manifest authentication it is + # the ASCII string "borg-metadata-authentication-manifest". + tam_key = HKDF-SHA-512(ikm, salt, context) + + # *data* is a dict-like structure + data[hmac] = zeroes + packed = pack(data) + data[hmac] = HMAC(tam_key, packed) + packed_authenticated = pack(data) + +Since an attacker cannot gain access to this key and also cannot make the +client authenticate arbitrary data using this mechanism, the attacker is unable +to forge the authentication. + +This effectively 'anchors' the manifest to the key, which is controlled by the +client, thereby anchoring the entire DAG, making it impossible for an attacker +to add, remove or modify any part of the DAG without Borg being able to detect +the tampering. + +Note that when using BORG_PASSPHRASE the attacker cannot swap the *entire* +repository against a new repository with e.g. repokey mode and no passphrase, +because Borg will abort access when BORG_PASSPRHASE is incorrect. + +However, interactively a user might not notice this kind of attack +immediately, if she assumes that the reason for the absent passphrase +prompt is a set BORG_PASSPHRASE. See issue :issue:`2169` for details. + +.. [#] The reason why the authentication tag is stored in the packed + data itself is that older Borg versions can still read the + manifest this way, while a changed layout would have broken + compatibility. + +Encryption +---------- + +Encryption is currently based on the Encrypt-then-MAC construction, +which is generally seen as the most robust way to create an authenticated +encryption scheme from encryption and message authentication primitives. + +Every operation (encryption, MAC / authentication, chunk ID derivation) +uses independent, random keys generated by `os.urandom`_ [#]_. + +Borg does not support unauthenticated encryption -- only authenticated encryption +schemes are supported. No unauthenticated encryption schemes will be added +in the future. + +Depending on the chosen mode (see :ref:`borg_init`) different primitives are used: + +- The actual encryption is currently always AES-256 in CTR mode. The + counter is added in plaintext, since it is needed for decryption, + and is also tracked locally on the client to avoid counter reuse. + +- The authentication primitive is either HMAC-SHA-256 or BLAKE2b-256 + in a keyed mode. HMAC-SHA-256 uses 256 bit keys, while BLAKE2b-256 + uses 512 bit keys. + + The latter is secure not only because BLAKE2b itself is not + susceptible to `length extension`_, but also since it truncates the + hash output from 512 bits to 256 bits, which would make the + construction safe even if BLAKE2b were broken regarding length + extension or similar attacks. + +- The primitive used for authentication is always the same primitive + that is used for deriving the chunk ID, but they are always + used with independent keys. + +Encryption:: + + id = AUTHENTICATOR(id_key, data) + compressed = compress(data) + + iv = reserve_iv() + encrypted = AES-256-CTR(enc_key, 8-null-bytes || iv, compressed) + authenticated = type-byte || AUTHENTICATOR(enc_hmac_key, encrypted) || iv || encrypted + + +Decryption:: + + # Given: input *authenticated* data, possibly a *chunk-id* to assert + type-byte, mac, iv, encrypted = SPLIT(authenticated) + + ASSERT(type-byte is correct) + ASSERT( CONSTANT-TIME-COMPARISON( mac, AUTHENTICATOR(enc_hmac_key, encrypted) ) ) + + decrypted = AES-256-CTR(enc_key, 8-null-bytes || iv, encrypted) + decompressed = decompress(decrypted) + + ASSERT( CONSTANT-TIME-COMPARISON( chunk-id, AUTHENTICATOR(id_key, decompressed) ) ) + +The client needs to track which counter values have been used, since +encrypting a chunk requires a starting counter value and no two chunks +may have overlapping counter ranges (otherwise the bitwise XOR of the +overlapping plaintexts is revealed). + +The client does not directly track the counter value, because it +changes often (with each encrypted chunk), instead it commits a +"reservation" to the security database and the repository by taking +the current counter value and adding 4 GiB / 16 bytes (the block size) +to the counter. Thus the client only needs to commit a new reservation +every few gigabytes of encrypted data. + +This mechanism also avoids reusing counter values in case the client +crashes or the connection to the repository is severed, since any +reservation would have been committed to both the security database +and the repository before any data is encrypted. Borg uses its +standard mechanism (SaveFile) to ensure that reservations are durable +(on most hardware / storage systems), therefore a crash of the +client's host would not impact tracking of reservations. + +However, this design is not infallible, and requires synchronization +between clients, which is handled through the repository. Therefore in +a multiple-client scenario a repository can trick a client into +reusing counter values by ignoring counter reservations and replaying +the manifest (which will fail if the client has seen a more recent +manifest or has a more recent nonce reservation). If the repository is +untrusted, but a trusted synchronization channel exists between +clients, the security database could be synchronized between them over +said trusted channel. This is not part of Borgs functionality. + +.. [#] Using the :ref:`borg key migrate-to-repokey ` + command a user can convert repositories created using Attic in "passphrase" + mode to "repokey" mode. In this case the keys were directly derived from + the user's passphrase at some point using PBKDF2. + + Borg does not support "passphrase" mode otherwise any more. + +.. _key_encryption: + +Offline key security +-------------------- + +Borg cannot secure the key material while it is running, because the keys +are needed in plain to decrypt/encrypt repository objects. + +For offline storage of the encryption keys they are encrypted with a +user-chosen passphrase. + +A 256 bit key encryption key (KEK) is derived from the passphrase +using PBKDF2-HMAC-SHA256 with a random 256 bit salt which is then used +to Encrypt-*and*-MAC (unlike the Encrypt-*then*-MAC approach used +otherwise) a packed representation of the keys with AES-256-CTR with a +constant initialization vector of 0. A HMAC-SHA256 of the plaintext is +generated using the same KEK and is stored alongside the ciphertext, +which is converted to base64 in its entirety. + +This base64 blob (commonly referred to as *keyblob*) is then stored in +the key file or in the repository config (keyfile and repokey modes +respectively). + +This scheme, and specifically the use of a constant IV with the CTR +mode, is secure because an identical passphrase will result in a +different derived KEK for every key encryption due to the salt. + +The use of Encrypt-and-MAC instead of Encrypt-then-MAC is seen as +uncritical (but not ideal) here, since it is combined with AES-CTR mode, +which is not vulnerable to padding attacks. + + +.. seealso:: + + Refer to the :ref:`key_files` section for details on the format. + + Refer to issue :issue:`747` for suggested improvements of the encryption + scheme and password-based key derivation. + +Implementations used +-------------------- + +We do not implement cryptographic primitives ourselves, but rely +on widely used libraries providing them: + +- AES-CTR and HMAC-SHA-256 from OpenSSL 1.0 / 1.1 are used, + which is also linked into the static binaries we provide. + We think this is not an additional risk, since we don't ever + use OpenSSL's networking, TLS or X.509 code, but only their + primitives implemented in libcrypto. +- SHA-256 and SHA-512 from Python's hashlib_ standard library module are used. + Borg requires a Python built with OpenSSL support (due to PBKDF2), therefore + these functions are delegated to OpenSSL by Python. +- HMAC, PBKDF2 and a constant-time comparison from Python's hmac_ standard + library module is used. While the HMAC implementation is written in Python, + the PBKDF2 implementation is provided by OpenSSL. The constant-time comparison + (``compare_digest``) is written in C and part of Python. +- BLAKE2b is either provided by the system's libb2, an official implementation, + or a bundled copy of the BLAKE2 reference implementation (written in C). + +Implemented cryptographic constructions are: + +- Encrypt-then-MAC based on AES-256-CTR and either HMAC-SHA-256 + or keyed BLAKE2b256 as described above under Encryption_. +- Encrypt-and-MAC based on AES-256-CTR and HMAC-SHA-256 + as described above under `Offline key security`_. +- HKDF_-SHA-512 + +.. _Horton principle: https://en.wikipedia.org/wiki/Horton_Principle +.. _HKDF: https://tools.ietf.org/html/rfc5869 +.. _length extension: https://en.wikipedia.org/wiki/Length_extension_attack +.. _hashlib: https://docs.python.org/3/library/hashlib.html +.. _hmac: https://docs.python.org/3/library/hmac.html +.. _os.urandom: https://docs.python.org/3/library/os.html#os.urandom + +Remote RPC protocol security +============================ + +.. note:: This section could be further expanded / detailed. + +The RPC protocol is fundamentally based on msgpack'd messages exchanged +over an encrypted SSH channel (the system's SSH client is used for this +by piping data from/to it). + +This means that the authorization and transport security properties +are inherited from SSH and the configuration of the SSH client and the +SSH server -- Borg RPC does not contain *any* networking +code. Networking is done by the SSH client running in a separate +process, Borg only communicates over the standard pipes (stdout, +stderr and stdin) with this process. This also means that Borg doesn't +have to directly use a SSH client (or SSH at all). For example, +``sudo`` or ``qrexec`` could be used as an intermediary. + +By using the system's SSH client and not implementing a +(cryptographic) network protocol Borg sidesteps many security issues +that would normally impact distributing statically linked / standalone +binaries. + +The remainder of this section will focus on the security of the RPC +protocol within Borg. + +The assumed worst-case a server can inflict to a client is a +denial of repository service. + +The situation were a server can create a general DoS on the client +should be avoided, but might be possible by e.g. forcing the client to +allocate large amounts of memory to decode large messages (or messages +that merely indicate a large amount of data follows). The RPC protocol +code uses a limited msgpack Unpacker to prohibit this. + +We believe that other kinds of attacks, especially critical vulnerabilities +like remote code execution are inhibited by the design of the protocol: + +1. The server cannot send requests to the client on its own accord, + it only can send responses. This avoids "unexpected inversion of control" + issues. +2. msgpack serialization does not allow embedding or referencing code that + is automatically executed. Incoming messages are unpacked by the msgpack + unpacker into native Python data structures (like tuples and dictionaries), + which are then passed to the rest of the program. + + Additional verification of the correct form of the responses could be implemented. +3. Remote errors are presented in two forms: + + 1. A simple plain-text *stderr* channel. A prefix string indicates the kind of message + (e.g. WARNING, INFO, ERROR), which is used to suppress it according to the + log level selected in the client. + + A server can send arbitrary log messages, which may confuse a user. However, + log messages are only processed when server requests are in progress, therefore + the server cannot interfere / confuse with security critical dialogue like + the password prompt. + 2. Server-side exceptions passed over the main data channel. These follow the + general pattern of server-sent responses and are sent instead of response data + for a request. + +The msgpack implementation used (msgpack-python) has a good security track record, +a large test suite and no issues found by fuzzing. It is based on the msgpack-c implementation, +sharing the unpacking engine and some support code. msgpack-c has a good track record as well. +Some issues [#]_ in the past were located in code not included in msgpack-python. +Borg does not use msgpack-c. + +.. [#] - `MessagePack fuzzing `_ + - `Fixed integer overflow and EXT size problem `_ + - `Fixed array and map size overflow `_ + +Using OpenSSL +============= + +Borg uses the OpenSSL library for most cryptography (see `Implementations used`_ above). +OpenSSL is bundled with static releases, thus the bundled copy is not updated with system +updates. + +OpenSSL is a large and complex piece of software and has had its share of vulnerabilities, +however, it is important to note that Borg links against ``libcrypto`` **not** ``libssl``. +libcrypto is the low-level cryptography part of OpenSSL, +while libssl implements TLS and related protocols. + +The latter is not used by Borg (cf. `Remote RPC protocol security`_, Borg itself does not implement +any network access) and historically contained most vulnerabilities, especially critical ones. +The static binaries released by the project contain neither libssl nor the Python ssl/_ssl modules. diff --git a/docs/internals/structure.png b/docs/internals/structure.png new file mode 100644 index 00000000..69566003 Binary files /dev/null and b/docs/internals/structure.png differ diff --git a/docs/internals/structure.vsd b/docs/internals/structure.vsd new file mode 100644 index 00000000..3c7ce0cf Binary files /dev/null and b/docs/internals/structure.vsd differ diff --git a/docs/introduction.rst b/docs/introduction.rst new file mode 100644 index 00000000..ab8bd32c --- /dev/null +++ b/docs/introduction.rst @@ -0,0 +1,8 @@ +Introduction +============ + +.. this shim is here to fix the structure in the PDF + rendering. without this stub, the elements in the toctree of + index.rst show up a level below the README file included + +.. include:: ../README.rst diff --git a/docs/man/borg-benchmark-crud.1 b/docs/man/borg-benchmark-crud.1 new file mode 100644 index 00000000..c763aae3 --- /dev/null +++ b/docs/man/borg-benchmark-crud.1 @@ -0,0 +1,101 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-BENCHMARK-CRUD 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-benchmark-crud \- Benchmark Create, Read, Update, Delete for archives. +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] benchmark crud REPO PATH +.SH DESCRIPTION +.sp +This command benchmarks borg CRUD (create, read, update, delete) operations. +.sp +It creates input data below the given PATH and backups this data into the given REPO. +The REPO must already exist (it could be a fresh empty repo or an existing repo, the +command will create / read / update / delete some archives named borg\-test\-data* there. +.sp +Make sure you have free space there, you\(aqll need about 1GB each (+ overhead). +.sp +If your repository is encrypted and borg needs a passphrase to unlock the key, use: +.sp +BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH +.sp +Measurements are done with different input file sizes and counts. +The file contents are very artificial (either all zero or all random), +thus the measurement results do not necessarily reflect performance with real data. +Also, due to the kind of content used, no compression is used in these benchmarks. +.INDENT 0.0 +.TP +.B C\- == borg create (1st archive creation, no compression, do not use files cache) +C\-Z\- == all\-zero files. full dedup, this is primarily measuring reader/chunker/hasher. +C\-R\- == random files. no dedup, measuring throughput through all processing stages. +.TP +.B R\- == borg extract (extract archive, dry\-run, do everything, but do not write files to disk) +R\-Z\- == all zero files. Measuring heavily duplicated files. +R\-R\- == random files. No duplication here, measuring throughput through all processing +.IP "System Message: ERROR/3 (docs/virtmanpage.rst:, line 56)" +Unexpected indentation. +.INDENT 7.0 +.INDENT 3.5 +stages, except writing to disk. +.UNINDENT +.UNINDENT +.TP +.B U\- == borg create (2nd archive creation of unchanged input files, measure files cache speed) +The throughput value is kind of virtual here, it does not actually read the file. +U\-Z\- == needs to check the 2 all\-zero chunks\(aq existence in the repo. +U\-R\- == needs to check existence of a lot of different chunks in the repo. +.TP +.B D\- == borg delete archive (delete last remaining archive, measure deletion + compaction) +D\-Z\- == few chunks to delete / few segments to compact/remove. +D\-R\- == many chunks to delete / many segments to compact/remove. +.UNINDENT +.sp +Please note that there might be quite some variance in these measurements. +Try multiple measurements and having a otherwise idle machine (and network, if you use it). +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPO +repo to use for benchmark (must exist) +.TP +.B PATH +path were to create benchmark input data +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-benchmark.1 b/docs/man/borg-benchmark.1 new file mode 100644 index 00000000..79e356ac --- /dev/null +++ b/docs/man/borg-benchmark.1 @@ -0,0 +1,47 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-BENCHMARK 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-benchmark \- benchmark command +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.nf +borg [common options] benchmark crud ... +.fi +.sp +.SH DESCRIPTION +.sp +These commands do various benchmarks. +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-benchmark\-crud(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-break-lock.1 b/docs/man/borg-break-lock.1 new file mode 100644 index 00000000..7b4291cd --- /dev/null +++ b/docs/man/borg-break-lock.1 @@ -0,0 +1,56 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-BREAK-LOCK 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-break-lock \- Break the repository lock (e.g. in case it was left by a dead borg. +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] break\-lock REPOSITORY +.SH DESCRIPTION +.sp +This command breaks the repository and cache locks. +Please use carefully and only while no borg process (on any machine) is +trying to access the Cache or the Repository. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY +repository for which to break the locks +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-change-passphrase.1 b/docs/man/borg-change-passphrase.1 new file mode 100644 index 00000000..d5b3edbf --- /dev/null +++ b/docs/man/borg-change-passphrase.1 @@ -0,0 +1,52 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-CHANGE-PASSPHRASE 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-change-passphrase \- Change repository key file passphrase +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] change\-passphrase REPOSITORY +.SH DESCRIPTION +.sp +The key files used for repository encryption are optionally passphrase +protected. This command can be used to change this passphrase. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.sp +REPOSITORY +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-check.1 b/docs/man/borg-check.1 new file mode 100644 index 00000000..cf2996a2 --- /dev/null +++ b/docs/man/borg-check.1 @@ -0,0 +1,148 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-CHECK 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-check \- Check repository consistency +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] check REPOSITORY_OR_ARCHIVE +.SH DESCRIPTION +.sp +The check command verifies the consistency of a repository and the corresponding archives. +.sp +First, the underlying repository data files are checked: +.INDENT 0.0 +.IP \(bu 2 +For all segments the segment magic (header) is checked +.IP \(bu 2 +For all objects stored in the segments, all metadata (e.g. crc and size) and +all data is read. The read data is checked by size and CRC. Bit rot and other +types of accidental damage can be detected this way. +.IP \(bu 2 +If we are in repair mode and a integrity error is detected for a segment, +we try to recover as many objects from the segment as possible. +.IP \(bu 2 +In repair mode, it makes sure that the index is consistent with the data +stored in the segments. +.IP \(bu 2 +If you use a remote repo server via ssh:, the repo check is executed on the +repo server without causing significant network traffic. +.IP \(bu 2 +The repository check can be skipped using the \fB\-\-archives\-only\fP option. +.UNINDENT +.sp +Second, the consistency and correctness of the archive metadata is verified: +.INDENT 0.0 +.IP \(bu 2 +Is the repo manifest present? If not, it is rebuilt from archive metadata +chunks (this requires reading and decrypting of all metadata and data). +.IP \(bu 2 +Check if archive metadata chunk is present. if not, remove archive from +manifest. +.IP \(bu 2 +For all files (items) in the archive, for all chunks referenced by these +files, check if chunk is present. +If a chunk is not present and we are in repair mode, replace it with a same\-size +replacement chunk of zeros. +If a previously lost chunk reappears (e.g. via a later backup) and we are in +repair mode, the all\-zero replacement chunk will be replaced by the correct chunk. +This requires reading of archive and file metadata, but not data. +.IP \(bu 2 +If we are in repair mode and we checked all the archives: delete orphaned +chunks from the repo. +.IP \(bu 2 +if you use a remote repo server via ssh:, the archive check is executed on +the client machine (because if encryption is enabled, the checks will require +decryption and this is always done client\-side, because key access will be +required). +.IP \(bu 2 +The archive checks can be time consuming, they can be skipped using the +\fB\-\-repository\-only\fP option. +.UNINDENT +.sp +The \fB\-\-verify\-data\fP option will perform a full integrity verification (as opposed to +checking the CRC32 of the segment) of data, which means reading the data from the +repository, decrypting and decompressing it. This is a cryptographic verification, +which will detect (accidental) corruption. For encrypted repositories it is +tamper\-resistant as well, unless the attacker has access to the keys. +.sp +It is also very slow. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY_OR_ARCHIVE +repository or archive to check consistency of +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-repository\-only +only perform repository checks +.TP +.B \-\-archives\-only +only perform archives checks +.TP +.B \-\-verify\-data +perform cryptographic archive data integrity verification (conflicts with \fB\-\-repository\-only\fP) +.TP +.B \-\-repair +attempt to repair any inconsistencies found +.TP +.B \-\-save\-space +work slower, but using less space +.UNINDENT +.SS filters +.INDENT 0.0 +.TP +.B \-P\fP,\fB \-\-prefix +only consider archive names starting with this prefix. +.TP +.B \-a\fP,\fB \-\-glob\-archives +only consider archive names matching the glob. sh: rules apply, see "borg help patterns". \fB\-\-prefix\fP and \fB\-\-glob\-archives\fP are mutually exclusive. +.TP +.B \-\-sort\-by +Comma\-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp +.TP +.BI \-\-first \ N +consider first N archives after other filters were applied +.TP +.BI \-\-last \ N +consider last N archives after other filters were applied +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-common.1 b/docs/man/borg-common.1 new file mode 100644 index 00000000..f48ccb6c --- /dev/null +++ b/docs/man/borg-common.1 @@ -0,0 +1,96 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-COMMON 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-common \- Common options of Borg commands +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.INDENT 0.0 +.TP +.B \-h\fP,\fB \-\-help +show this help message and exit +.TP +.B \-\-critical +work on log level CRITICAL +.TP +.B \-\-error +work on log level ERROR +.TP +.B \-\-warning +work on log level WARNING (default) +.TP +.B \-\-info\fP,\fB \-v\fP,\fB \-\-verbose +work on log level INFO +.TP +.B \-\-debug +enable debug output, work on log level DEBUG +.TP +.BI \-\-debug\-topic \ TOPIC +enable TOPIC debugging (can be specified multiple times). The logger path is borg.debug. if TOPIC is not fully qualified. +.TP +.B \-p\fP,\fB \-\-progress +show progress information +.TP +.B \-\-log\-json +Output one JSON object per log line instead of formatted text. +.TP +.BI \-\-lock\-wait \ N +wait for the lock, but max. N seconds (default: 1). +.TP +.B \-\-show\-version +show/log the borg version +.TP +.B \-\-show\-rc +show/log the return code (rc) +.TP +.B \-\-no\-files\-cache +do not load/update the file metadata cache used to detect unchanged files +.TP +.BI \-\-umask \ M +set umask to M (local and remote, default: 0077) +.TP +.BI \-\-remote\-path \ PATH +use PATH as borg executable on the remote (default: "borg") +.TP +.BI \-\-remote\-ratelimit \ rate +set remote network upload rate limit in kiByte/s (default: 0=unlimited) +.TP +.B \-\-consider\-part\-files +treat part files like normal files (e.g. to list/extract them) +.TP +.BI \-\-debug\-profile \ FILE +Write execution profile in Borg format into FILE. For local use a Python\-compatible file can be generated by suffixing FILE with ".pyprof". +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-compression.1 b/docs/man/borg-compression.1 new file mode 100644 index 00000000..3347a658 --- /dev/null +++ b/docs/man/borg-compression.1 @@ -0,0 +1,96 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-COMPRESSION 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-compression \- Details regarding compression +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH DESCRIPTION +.sp +It is no problem to mix different compression methods in one repo, +deduplication is done on the source data chunks (not on the compressed +or encrypted data). +.sp +If some specific chunk was once compressed and stored into the repo, creating +another backup that also uses this chunk will not change the stored chunk. +So if you use different compression specs for the backups, whichever stores a +chunk first determines its compression. See also borg recreate. +.sp +Compression is lz4 by default. If you want something else, you have to specify what you want. +.sp +Valid compression specifiers are: +.INDENT 0.0 +.TP +.B none +Do not compress. +.TP +.B lz4 +Use lz4 compression. High speed, low compression. (default) +.TP +.B zlib[,L] +Use zlib ("gz") compression. Medium speed, medium compression. +If you do not explicitely give the compression level L (ranging from 0 +to 9), it will use level 6. +Giving level 0 (means "no compression", but still has zlib protocol +overhead) is usually pointless, you better use "none" compression. +.TP +.B lzma[,L] +Use lzma ("xz") compression. Low speed, high compression. +If you do not explicitely give the compression level L (ranging from 0 +to 9), it will use level 6. +Giving levels above 6 is pointless and counterproductive because it does +not compress better due to the buffer size used by borg \- but it wastes +lots of CPU cycles and RAM. +.TP +.B auto,C[,L] +Use a built\-in heuristic to decide per chunk whether to compress or not. +The heuristic tries with lz4 whether the data is compressible. +For incompressible data, it will not use compression (uses "none"). +For compressible data, it uses the given C[,L] compression \- with C[,L] +being any valid compression specifier. +.UNINDENT +.sp +Examples: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +borg create \-\-compression lz4 REPO::ARCHIVE data +borg create \-\-compression zlib REPO::ARCHIVE data +borg create \-\-compression zlib,1 REPO::ARCHIVE data +borg create \-\-compression auto,lzma,6 REPO::ARCHIVE data +borg create \-\-compression auto,lzma ... +.ft P +.fi +.UNINDENT +.UNINDENT +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-create.1 b/docs/man/borg-create.1 new file mode 100644 index 00000000..ec8d7b52 --- /dev/null +++ b/docs/man/borg-create.1 @@ -0,0 +1,313 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-CREATE 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-create \- Create new archive +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] create ARCHIVE PATH +.SH DESCRIPTION +.sp +This command creates a backup archive containing all files found while recursively +traversing all paths specified. Paths are added to the archive as they are given, +that means if relative paths are desired, the command has to be run from the correct +directory. +.sp +When giving \(aq\-\(aq as path, borg will read data from standard input and create a +file \(aqstdin\(aq in the created archive from that data. +.sp +The archive will consume almost no disk space for files or parts of files that +have already been stored in other archives. +.sp +The archive name needs to be unique. It must not end in \(aq.checkpoint\(aq or +\(aq.checkpoint.N\(aq (with N being a number), because these names are used for +checkpoints and treated in special ways. +.sp +In the archive name, you may use the following placeholders: +{now}, {utcnow}, {fqdn}, {hostname}, {user} and some others. +.sp +To speed up pulling backups over sshfs and similar network file systems which do +not provide correct inode information the \fB\-\-ignore\-inode\fP flag can be used. This +potentially decreases reliability of change detection, while avoiding always reading +all files on these file systems. +.sp +The mount points of filesystems or filesystem snapshots should be the same for every +creation of a new archive to ensure fast operation. This is because the file cache that +is used to determine changed files quickly uses absolute filenames. +If this is not possible, consider creating a bind mount to a stable location. +.sp +The \fB\-\-progress\fP option shows (from left to right) Original, Compressed and Deduplicated +(O, C and D, respectively), then the Number of files (N) processed so far, followed by +the currently processed path. +.sp +See the output of the "borg help patterns" command for more help on exclude patterns. +See the output of the "borg help placeholders" command for more help on placeholders. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B ARCHIVE +name of archive to create (must be also a valid directory name) +.TP +.B PATH +paths to archive +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-n\fP,\fB \-\-dry\-run +do not create a backup archive +.TP +.B \-s\fP,\fB \-\-stats +print statistics for the created archive +.TP +.B \-\-list +output verbose list of items (files, dirs, ...) +.TP +.BI \-\-filter \ STATUSCHARS +only display items with the given status characters +.TP +.B \-\-json +output stats as JSON (implies \-\-stats) +.TP +.B \-\-no\-cache\-sync +experimental: do not synchronize the cache. Implies \-\-no\-files\-cache. +.UNINDENT +.SS Exclusion options +.INDENT 0.0 +.TP +.BI \-e \ PATTERN\fP,\fB \ \-\-exclude \ PATTERN +exclude paths matching PATTERN +.TP +.BI \-\-exclude\-from \ EXCLUDEFILE +read exclude patterns from EXCLUDEFILE, one per line +.TP +.B \-\-exclude\-caches +exclude directories that contain a CACHEDIR.TAG file (\fI\%http://www.brynosaurus.com/cachedir/spec.html\fP) +.TP +.BI \-\-exclude\-if\-present \ NAME +exclude directories that are tagged by containing a filesystem object with the given NAME +.TP +.B \-\-keep\-exclude\-tags\fP,\fB \-\-keep\-tag\-files +if tag objects are specified with \-\-exclude\-if\-present, don\(aqt omit the tag objects themselves from the backup archive +.TP +.BI \-\-pattern \ PATTERN +experimental: include/exclude paths matching PATTERN +.TP +.BI \-\-patterns\-from \ PATTERNFILE +experimental: read include/exclude patterns from PATTERNFILE, one per line +.UNINDENT +.SS Filesystem options +.INDENT 0.0 +.TP +.B \-x\fP,\fB \-\-one\-file\-system +stay in the same file system and do not store mount points of other file systems +.TP +.B \-\-numeric\-owner +only store numeric user and group identifiers +.TP +.B \-\-noatime +do not store atime into archive +.TP +.B \-\-noctime +do not store ctime into archive +.TP +.B \-\-ignore\-inode +ignore inode data in the file metadata cache used to detect unchanged files. +.TP +.B \-\-read\-special +open and read block and char device files as well as FIFOs as if they were regular files. Also follows symlinks pointing to these kinds of files. +.UNINDENT +.SS Archive options +.INDENT 0.0 +.TP +.BI \-\-comment \ COMMENT +add a comment text to the archive +.TP +.BI \-\-timestamp \ TIMESTAMP +manually specify the archive creation date/time (UTC, yyyy\-mm\-ddThh:mm:ss format). alternatively, give a reference file/directory. +.TP +.BI \-c \ SECONDS\fP,\fB \ \-\-checkpoint\-interval \ SECONDS +write checkpoint every SECONDS seconds (Default: 1800) +.TP +.BI \-\-chunker\-params \ PARAMS +specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE). default: 19,23,21,4095 +.TP +.BI \-C \ COMPRESSION\fP,\fB \ \-\-compression \ COMPRESSION +select compression algorithm, see the output of the "borg help compression" command for details. +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Backup ~/Documents into an archive named "my\-documents" +$ borg create /path/to/repo::my\-documents ~/Documents + +# same, but list all files as we process them +$ borg create \-\-list /path/to/repo::my\-documents ~/Documents + +# Backup ~/Documents and ~/src but exclude pyc files +$ borg create /path/to/repo::my\-files \e + ~/Documents \e + ~/src \e + \-\-exclude \(aq*.pyc\(aq + +# Backup home directories excluding image thumbnails (i.e. only +# /home//.thumbnails is excluded, not /home/*/*/.thumbnails etc.) +$ borg create /path/to/repo::my\-files /home \e + \-\-exclude \(aqsh:/home/*/.thumbnails\(aq + +# Backup the root filesystem into an archive named "root\-YYYY\-MM\-DD" +# use zlib compression (good, but slow) \- default is lz4 (fast, low compression ratio) +$ borg create \-C zlib,6 /path/to/repo::root\-{now:%Y\-%m\-%d} / \-\-one\-file\-system + +# Backup a remote host locally ("pull" style) using sshfs +$ mkdir sshfs\-mount +$ sshfs root@example.com:/ sshfs\-mount +$ cd sshfs\-mount +$ borg create /path/to/repo::example.com\-root\-{now:%Y\-%m\-%d} . +$ cd .. +$ fusermount \-u sshfs\-mount + +# Make a big effort in fine granular deduplication (big chunk management +# overhead, needs a lot of RAM and disk space, see formula in internals +# docs \- same parameters as borg < 1.0 or attic): +$ borg create \-\-chunker\-params 10,23,16,4095 /path/to/repo::small /smallstuff + +# Backup a raw device (must not be active/in use/mounted at that time) +$ dd if=/dev/sdx bs=10M | borg create /path/to/repo::my\-sdx \- + +# No compression (default) +$ borg create /path/to/repo::arch ~ + +# Super fast, low compression +$ borg create \-\-compression lz4 /path/to/repo::arch ~ + +# Less fast, higher compression (N = 0..9) +$ borg create \-\-compression zlib,N /path/to/repo::arch ~ + +# Even slower, even higher compression (N = 0..9) +$ borg create \-\-compression lzma,N /path/to/repo::arch ~ + +# Use short hostname, user name and current time in archive name +$ borg create /path/to/repo::{hostname}\-{user}\-{now} ~ +# Similar, use the same datetime format as borg 1.1 will have as default +$ borg create /path/to/repo::{hostname}\-{user}\-{now:%Y\-%m\-%dT%H:%M:%S} ~ +# As above, but add nanoseconds +$ borg create /path/to/repo::{hostname}\-{user}\-{now:%Y\-%m\-%dT%H:%M:%S.%f} ~ + +# Backing up relative paths by moving into the correct directory first +$ cd /home/user/Documents +# The root directory of the archive will be "projectA" +$ borg create /path/to/repo::daily\-projectA\-{now:%Y\-%m\-%d} projectA +.ft P +.fi +.UNINDENT +.UNINDENT +.SH NOTES +.sp +The \fB\-\-exclude\fP patterns are not like tar. In tar \fB\-\-exclude\fP .bundler/gems will +exclude foo/.bundler/gems. In borg it will not, you need to use \fB\-\-exclude\fP +\(aq*/.bundler/gems\(aq to get the same effect. See \fBborg help patterns\fP for +more information. +.sp +In addition to using \fB\-\-exclude\fP patterns, it is possible to use +\fB\-\-exclude\-if\-present\fP to specify the name of a filesystem object (e.g. a file +or folder name) which, when contained within another folder, will prevent the +containing folder from being backed up. By default, the containing folder and +all of its contents will be omitted from the backup. If, however, you wish to +only include the objects specified by \fB\-\-exclude\-if\-present\fP in your backup, +and not include any other contents of the containing folder, this can be enabled +through using the \fB\-\-keep\-exclude\-tags\fP option. +.SS Item flags +.sp +\fB\-\-list\fP outputs a list of all files, directories and other +file system items it considered (no matter whether they had content changes +or not). For each item, it prefixes a single\-letter flag that indicates type +and/or status of the item. +.sp +If you are interested only in a subset of that output, you can give e.g. +\fB\-\-filter=AME\fP and it will only show regular files with A, M or E status (see +below). +.sp +A uppercase character represents the status of a regular file relative to the +"files" cache (not relative to the repo \-\- this is an issue if the files cache +is not used). Metadata is stored in any case and for \(aqA\(aq and \(aqM\(aq also new data +chunks are stored. For \(aqU\(aq all data chunks refer to already existing chunks. +.INDENT 0.0 +.IP \(bu 2 +\(aqA\(aq = regular file, added (see also \fIa_status_oddity\fP in the FAQ) +.IP \(bu 2 +\(aqM\(aq = regular file, modified +.IP \(bu 2 +\(aqU\(aq = regular file, unchanged +.IP \(bu 2 +\(aqE\(aq = regular file, an error happened while accessing/reading \fIthis\fP file +.UNINDENT +.sp +A lowercase character means a file type other than a regular file, +borg usually just stores their metadata: +.INDENT 0.0 +.IP \(bu 2 +\(aqd\(aq = directory +.IP \(bu 2 +\(aqb\(aq = block device +.IP \(bu 2 +\(aqc\(aq = char device +.IP \(bu 2 +\(aqh\(aq = regular file, hardlink (to already seen inodes) +.IP \(bu 2 +\(aqs\(aq = symlink +.IP \(bu 2 +\(aqf\(aq = fifo +.UNINDENT +.sp +Other flags used include: +.INDENT 0.0 +.IP \(bu 2 +\(aqi\(aq = backup data was read from standard input (stdin) +.IP \(bu 2 +\(aq\-\(aq = dry run, item was \fInot\fP backed up +.IP \(bu 2 +\(aqx\(aq = excluded, item was \fInot\fP backed up +.IP \(bu 2 +\(aq?\(aq = missing status code (if you see this, please file a bug report!) +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-delete(1)\fP, \fIborg\-prune(1)\fP, \fIborg\-check(1)\fP, \fIborg\-patterns(1)\fP, \fIborg\-placeholders(1)\fP, \fIborg\-compression(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-delete.1 b/docs/man/borg-delete.1 new file mode 100644 index 00000000..2e889153 --- /dev/null +++ b/docs/man/borg-delete.1 @@ -0,0 +1,109 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-DELETE 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-delete \- Delete an existing repository or archives +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] delete TARGET +.SH DESCRIPTION +.sp +This command deletes an archive from the repository or the complete repository. +Disk space is reclaimed accordingly. If you delete the complete repository, the +local cache for it (if any) is also deleted. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B TARGET +archive or repository to delete +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-s\fP,\fB \-\-stats +print statistics for the deleted archive +.TP +.B \-c\fP,\fB \-\-cache\-only +delete only the local cache for the given repository +.TP +.B \-\-force +force deletion of corrupted archives, use \-\-force \-\-force in case \-\-force does not work. +.TP +.B \-\-save\-space +work slower, but using less space +.UNINDENT +.SS filters +.INDENT 0.0 +.TP +.B \-P\fP,\fB \-\-prefix +only consider archive names starting with this prefix. +.TP +.B \-a\fP,\fB \-\-glob\-archives +only consider archive names matching the glob. sh: rules apply, see "borg help patterns". \fB\-\-prefix\fP and \fB\-\-glob\-archives\fP are mutually exclusive. +.TP +.B \-\-sort\-by +Comma\-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp +.TP +.BI \-\-first \ N +consider first N archives after other filters were applied +.TP +.BI \-\-last \ N +consider last N archives after other filters were applied +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# delete a single backup archive: +$ borg delete /path/to/repo::Monday + +# delete the whole repository and the related local cache: +$ borg delete /path/to/repo +You requested to completely DELETE the repository *including* all archives it contains: +repo Mon, 2016\-02\-15 19:26:54 +root\-2016\-02\-15 Mon, 2016\-02\-15 19:36:29 +newname Mon, 2016\-02\-15 19:50:19 +Type \(aqYES\(aq if you understand this and want to continue: YES +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-diff.1 b/docs/man/borg-diff.1 new file mode 100644 index 00000000..ad030a67 --- /dev/null +++ b/docs/man/borg-diff.1 @@ -0,0 +1,151 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-DIFF 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-diff \- Diff contents of two archives +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] diff REPO_ARCHIVE1 ARCHIVE2 PATH +.SH DESCRIPTION +.sp +This command finds differences (file contents, user/group/mode) between archives. +.sp +A repository location and an archive name must be specified for REPO_ARCHIVE1. +ARCHIVE2 is just another archive name in same repository (no repository location +allowed). +.sp +For archives created with Borg 1.1 or newer diff automatically detects whether +the archives are created with the same chunker params. If so, only chunk IDs +are compared, which is very fast. +.sp +For archives prior to Borg 1.1 chunk contents are compared by default. +If you did not create the archives with different chunker params, +pass \fB\-\-same\-chunker\-params\fP\&. +Note that the chunker params changed from Borg 0.xx to 1.0. +.sp +See the output of the "borg help patterns" command for more help on exclude patterns. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPO_ARCHIVE1 +repository location and ARCHIVE1 name +.TP +.B ARCHIVE2 +ARCHIVE2 name (no repository location allowed) +.TP +.B PATH +paths of items inside the archives to compare; patterns are supported +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-numeric\-owner +only consider numeric user and group identifiers +.TP +.B \-\-same\-chunker\-params +Override check of chunker parameters. +.TP +.B \-\-sort +Sort the output lines by file path. +.UNINDENT +.SS Exclusion options +.INDENT 0.0 +.TP +.BI \-e \ PATTERN\fP,\fB \ \-\-exclude \ PATTERN +exclude paths matching PATTERN +.TP +.BI \-\-exclude\-from \ EXCLUDEFILE +read exclude patterns from EXCLUDEFILE, one per line +.TP +.B \-\-exclude\-caches +exclude directories that contain a CACHEDIR.TAG file (\fI\%http://www.brynosaurus.com/cachedir/spec.html\fP) +.TP +.BI \-\-exclude\-if\-present \ NAME +exclude directories that are tagged by containing a filesystem object with the given NAME +.TP +.B \-\-keep\-exclude\-tags\fP,\fB \-\-keep\-tag\-files +if tag objects are specified with \-\-exclude\-if\-present, don\(aqt omit the tag objects themselves from the backup archive +.TP +.BI \-\-pattern \ PATTERN +experimental: include/exclude paths matching PATTERN +.TP +.BI \-\-patterns\-from \ PATTERNFILE +experimental: read include/exclude patterns from PATTERNFILE, one per line +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg init \-e=none testrepo +$ mkdir testdir +$ cd testdir +$ echo asdf > file1 +$ dd if=/dev/urandom bs=1M count=4 > file2 +$ touch file3 +$ borg create ../testrepo::archive1 . + +$ chmod a+x file1 +$ echo "something" >> file2 +$ borg create ../testrepo::archive2 . + +$ rm file3 +$ touch file4 +$ borg create ../testrepo::archive3 . + +$ cd .. +$ borg diff testrepo::archive1 archive2 +[\-rw\-r\-\-r\-\- \-> \-rwxr\-xr\-x] file1 + +135 B \-252 B file2 + +$ borg diff testrepo::archive2 archive3 +added 0 B file4 +removed 0 B file3 + +$ borg diff testrepo::archive1 archive3 +[\-rw\-r\-\-r\-\- \-> \-rwxr\-xr\-x] file1 + +135 B \-252 B file2 +added 0 B file4 +removed 0 B file3 +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-export-tar.1 b/docs/man/borg-export-tar.1 new file mode 100644 index 00000000..515b4d84 --- /dev/null +++ b/docs/man/borg-export-tar.1 @@ -0,0 +1,142 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-EXPORT-TAR 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-export-tar \- Export archive contents as a tarball +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] export\-tar ARCHIVE FILE PATH +.SH DESCRIPTION +.sp +This command creates a tarball from an archive. +.sp +When giving \(aq\-\(aq as the output FILE, Borg will write a tar stream to standard output. +.sp +By default (\fB\-\-tar\-filter=auto\fP) Borg will detect whether the FILE should be compressed +based on its file extension and pipe the tarball through an appropriate filter +before writing it to FILE: +.INDENT 0.0 +.IP \(bu 2 +\&.tar.gz: gzip +.IP \(bu 2 +\&.tar.bz2: bzip2 +.IP \(bu 2 +\&.tar.xz: xz +.UNINDENT +.sp +Alternatively a \fB\-\-tar\-filter\fP program may be explicitly specified. It should +read the uncompressed tar stream from stdin and write a compressed/filtered +tar stream to stdout. +.sp +The generated tarball uses the GNU tar format. +.sp +export\-tar is a lossy conversion: +BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported. +Timestamp resolution is limited to whole seconds, not the nanosecond resolution +otherwise supported by Borg. +.sp +A \fB\-\-sparse\fP option (as found in borg extract) is not supported. +.sp +By default the entire archive is extracted but a subset of files and directories +can be selected by passing a list of \fBPATHs\fP as arguments. +The file selection can further be restricted by using the \fB\-\-exclude\fP option. +.sp +See the output of the "borg help patterns" command for more help on exclude patterns. +.sp +\fB\-\-progress\fP can be slower than no progress display, since it makes one additional +pass over the archive metadata. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B ARCHIVE +archive to export +.TP +.B FILE +output tar file. "\-" to write to stdout instead. +.TP +.B PATH +paths to extract; patterns are supported +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-tar\-filter +filter program to pipe data through +.TP +.B \-\-list +output verbose list of items (files, dirs, ...) +.TP +.BI \-e \ PATTERN\fP,\fB \ \-\-exclude \ PATTERN +exclude paths matching PATTERN +.TP +.BI \-\-exclude\-from \ EXCLUDEFILE +read exclude patterns from EXCLUDEFILE, one per line +.TP +.BI \-\-pattern \ PATTERN +experimental: include/exclude paths matching PATTERN +.TP +.BI \-\-patterns\-from \ PATTERNFILE +experimental: read include/exclude patterns from PATTERNFILE, one per line +.TP +.BI \-\-strip\-components \ NUMBER +Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped. +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# export as uncompressed tar +$ borg export\-tar /path/to/repo::Monday Monday.tar + +# exclude some types, compress using gzip +$ borg export\-tar /path/to/repo::Monday Monday.tar.gz \-\-exclude \(aq*.so\(aq + +# use higher compression level with gzip +$ borg export\-tar testrepo::linux \-\-tar\-filter="gzip \-9" Monday.tar.gz + +# export a gzipped tar, but instead of storing it on disk, +# upload it to a remote site using curl. +$ borg export\-tar ... \-\-tar\-filter="gzip" \- | curl \-\-data\-binary @\- https://somewhere/to/POST +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-extract.1 b/docs/man/borg-extract.1 new file mode 100644 index 00000000..13a71ab7 --- /dev/null +++ b/docs/man/borg-extract.1 @@ -0,0 +1,137 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-EXTRACT 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-extract \- Extract archive contents +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] extract ARCHIVE PATH +.SH DESCRIPTION +.sp +This command extracts the contents of an archive. By default the entire +archive is extracted but a subset of files and directories can be selected +by passing a list of \fBPATHs\fP as arguments. The file selection can further +be restricted by using the \fB\-\-exclude\fP option. +.sp +See the output of the "borg help patterns" command for more help on exclude patterns. +.sp +By using \fB\-\-dry\-run\fP, you can do all extraction steps except actually writing the +output data: reading metadata and data chunks from the repo, checking the hash/hmac, +decrypting, decompressing. +.sp +\fB\-\-progress\fP can be slower than no progress display, since it makes one additional +pass over the archive metadata. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B ARCHIVE +archive to extract +.TP +.B PATH +paths to extract; patterns are supported +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-list +output verbose list of items (files, dirs, ...) +.TP +.B \-n\fP,\fB \-\-dry\-run +do not actually change any files +.TP +.BI \-e \ PATTERN\fP,\fB \ \-\-exclude \ PATTERN +exclude paths matching PATTERN +.TP +.BI \-\-exclude\-from \ EXCLUDEFILE +read exclude patterns from EXCLUDEFILE, one per line +.TP +.BI \-\-pattern \ PATTERN +experimental: include/exclude paths matching PATTERN +.TP +.BI \-\-patterns\-from \ PATTERNFILE +experimental: read include/exclude patterns from PATTERNFILE, one per line +.TP +.B \-\-numeric\-owner +only obey numeric user and group identifiers +.TP +.BI \-\-strip\-components \ NUMBER +Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped. +.TP +.B \-\-stdout +write all extracted data to stdout +.TP +.B \-\-sparse +create holes in output sparse file from all\-zero chunks +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Extract entire archive +$ borg extract /path/to/repo::my\-files + +# Extract entire archive and list files while processing +$ borg extract \-\-list /path/to/repo::my\-files + +# Verify whether an archive could be successfully extracted, but do not write files to disk +$ borg extract \-\-dry\-run /path/to/repo::my\-files + +# Extract the "src" directory +$ borg extract /path/to/repo::my\-files home/USERNAME/src + +# Extract the "src" directory but exclude object files +$ borg extract /path/to/repo::my\-files home/USERNAME/src \-\-exclude \(aq*.o\(aq + +# Restore a raw device (must not be active/in use/mounted at that time) +$ borg extract \-\-stdout /path/to/repo::my\-sdx | dd of=/dev/sdx bs=10M +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +\fBNOTE:\fP +.INDENT 0.0 +.INDENT 3.5 +Currently, extract always writes into the current working directory ("."), +so make sure you \fBcd\fP to the right place before calling \fBborg extract\fP\&. +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-mount(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-info.1 b/docs/man/borg-info.1 new file mode 100644 index 00000000..338d3ca6 --- /dev/null +++ b/docs/man/borg-info.1 @@ -0,0 +1,116 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-INFO 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-info \- Show archive details such as disk space used +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] info REPOSITORY_OR_ARCHIVE +.SH DESCRIPTION +.sp +This command displays detailed information about the specified archive or repository. +.sp +Please note that the deduplicated sizes of the individual archives do not add +up to the deduplicated size of the repository ("all archives"), because the two +are meaning different things: +.INDENT 0.0 +.TP +.B This archive / deduplicated size = amount of data stored ONLY for this archive += unique chunks of this archive. +.TP +.B All archives / deduplicated size = amount of data stored in the repo += all chunks in the repository. +.UNINDENT +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY_OR_ARCHIVE +archive or repository to display information about +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-json +format output as JSON +.UNINDENT +.SS filters +.INDENT 0.0 +.TP +.B \-P\fP,\fB \-\-prefix +only consider archive names starting with this prefix. +.TP +.B \-a\fP,\fB \-\-glob\-archives +only consider archive names matching the glob. sh: rules apply, see "borg help patterns". \fB\-\-prefix\fP and \fB\-\-glob\-archives\fP are mutually exclusive. +.TP +.B \-\-sort\-by +Comma\-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp +.TP +.BI \-\-first \ N +consider first N archives after other filters were applied +.TP +.BI \-\-last \ N +consider last N archives after other filters were applied +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg info /path/to/repo::root\-2016\-02\-15 +Name: root\-2016\-02\-15 +Fingerprint: 57c827621f21b000a8d363c1e163cc55983822b3afff3a96df595077a660be50 +Hostname: myhostname +Username: root +Time (start): Mon, 2016\-02\-15 19:36:29 +Time (end): Mon, 2016\-02\-15 19:39:26 +Command line: /usr/local/bin/borg create \-\-list \-C zlib,6 /path/to/repo::root\-2016\-02\-15 / \-\-one\-file\-system +Number of files: 38100 + + Original size Compressed size Deduplicated size +This archive: 1.33 GB 613.25 MB 571.64 MB +All archives: 1.63 GB 853.66 MB 584.12 MB + + Unique chunks Total chunks +Chunk index: 36858 48844 +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-list(1)\fP, \fIborg\-diff(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-init.1 b/docs/man/borg-init.1 new file mode 100644 index 00000000..9576afa8 --- /dev/null +++ b/docs/man/borg-init.1 @@ -0,0 +1,217 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-INIT 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-init \- Initialize an empty repository +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] init REPOSITORY +.SH DESCRIPTION +.sp +This command initializes an empty repository. A repository is a filesystem +directory containing the deduplicated data from zero or more archives. +.sp +Encryption can be enabled at repository init time. It cannot be changed later. +.sp +It is not recommended to work without encryption. Repository encryption protects +you e.g. against the case that an attacker has access to your backup repository. +.sp +But be careful with the key / the passphrase: +.sp +If you want "passphrase\-only" security, use one of the repokey modes. The +key will be stored inside the repository (in its "config" file). In above +mentioned attack scenario, the attacker will have the key (but not the +passphrase). +.sp +If you want "passphrase and having\-the\-key" security, use one of the keyfile +modes. The key will be stored in your home directory (in .config/borg/keys). +In the attack scenario, the attacker who has just access to your repo won\(aqt +have the key (and also not the passphrase). +.sp +Make a backup copy of the key file (keyfile mode) or repo config file +(repokey mode) and keep it at a safe place, so you still have the key in +case it gets corrupted or lost. Also keep the passphrase at a safe place. +The backup that is encrypted with that key won\(aqt help you with that, of course. +.sp +Make sure you use a good passphrase. Not too short, not too simple. The real +encryption / decryption key is encrypted with / locked by your passphrase. +If an attacker gets your key, he can\(aqt unlock and use it without knowing the +passphrase. +.sp +Be careful with special or non\-ascii characters in your passphrase: +.INDENT 0.0 +.IP \(bu 2 +Borg processes the passphrase as unicode (and encodes it as utf\-8), +so it does not have problems dealing with even the strangest characters. +.IP \(bu 2 +BUT: that does not necessarily apply to your OS / VM / keyboard configuration. +.UNINDENT +.sp +So better use a long passphrase made from simple ascii chars than one that +includes non\-ascii stuff or characters that are hard/impossible to enter on +a different keyboard layout. +.sp +You can change your passphrase for existing repos at any time, it won\(aqt affect +the encryption/decryption key or other secrets. +.SS Encryption modes +.\" nanorst: inline-fill +. +.TS +center; +|l|l|l|l|. +_ +T{ +Hash/MAC +T} T{ +Not encrypted +no auth +T} T{ +Not encrypted, +but authenticated +T} T{ +Encrypted (AEAD w/ AES) +and authenticated +T} +_ +T{ +SHA\-256 +T} T{ +none +T} T{ +\fIauthenticated\fP +T} T{ +repokey +keyfile +T} +_ +T{ +BLAKE2b +T} T{ +n/a +T} T{ +\fIauthenticated\-blake2\fP +T} T{ +\fIrepokey\-blake2\fP +\fIkeyfile\-blake2\fP +T} +_ +.TE +.\" nanorst: inline-replace +. +.sp +\fIMarked modes\fP are new in Borg 1.1 and are not backwards\-compatible with Borg 1.0.x. +.sp +On modern Intel/AMD CPUs (except very cheap ones), AES is usually +hardware\-accelerated. +BLAKE2b is faster than SHA256 on Intel/AMD 64\-bit CPUs +(except AMD Ryzen and future CPUs with SHA extensions), +which makes \fIauthenticated\-blake2\fP faster than \fInone\fP and \fIauthenticated\fP\&. +.sp +On modern ARM CPUs, NEON provides hardware acceleration for SHA256 making it faster +than BLAKE2b\-256 there. NEON accelerates AES as well. +.sp +Hardware acceleration is always used automatically when available. +.sp +\fIrepokey\fP and \fIkeyfile\fP use AES\-CTR\-256 for encryption and HMAC\-SHA256 for +authentication in an encrypt\-then\-MAC (EtM) construction. The chunk ID hash +is HMAC\-SHA256 as well (with a separate key). +These modes are compatible with Borg 1.0.x. +.sp +\fIrepokey\-blake2\fP and \fIkeyfile\-blake2\fP are also authenticated encryption modes, +but use BLAKE2b\-256 instead of HMAC\-SHA256 for authentication. The chunk ID +hash is a keyed BLAKE2b\-256 hash. +These modes are new and \fInot\fP compatible with Borg 1.0.x. +.sp +\fIauthenticated\fP mode uses no encryption, but authenticates repository contents +through the same HMAC\-SHA256 hash as the \fIrepokey\fP and \fIkeyfile\fP modes (it uses it +as the chunk ID hash). The key is stored like \fIrepokey\fP\&. +This mode is new and \fInot\fP compatible with Borg 1.0.x. +.sp +\fIauthenticated\-blake2\fP is like \fIauthenticated\fP, but uses the keyed BLAKE2b\-256 hash +from the other blake2 modes. +This mode is new and \fInot\fP compatible with Borg 1.0.x. +.sp +\fInone\fP mode uses no encryption and no authentication. It uses SHA256 as chunk +ID hash. Not recommended, rather consider using an authenticated or +authenticated/encrypted mode. This mode has possible denial\-of\-service issues +when running \fBborg create\fP on contents controlled by an attacker. +Use it only for new repositories where no encryption is wanted \fBand\fP when compatibility +with 1.0.x is important. If compatibility with 1.0.x is not important, use +\fIauthenticated\-blake2\fP or \fIauthenticated\fP instead. +This mode is compatible with Borg 1.0.x. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY +repository to create +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-e\fP,\fB \-\-encryption +select encryption key mode \fB(required)\fP +.TP +.B \-\-append\-only +create an append\-only mode repository +.TP +.B \-\-storage\-quota +Set storage quota of the new repository (e.g. 5G, 1.5T). Default: no quota. +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Local repository, repokey encryption, BLAKE2b (often faster, since Borg 1.1) +$ borg init \-\-encryption=repokey\-blake2 /path/to/repo + +# Local repository (no encryption) +$ borg init \-\-encryption=none /path/to/repo + +# Remote repository (accesses a remote borg via ssh) +$ borg init \-\-encryption=repokey\-blake2 user@hostname:backup + +# Remote repository (store the key your home dir) +$ borg init \-\-encryption=keyfile user@hostname:backup +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-create(1)\fP, \fIborg\-delete(1)\fP, \fIborg\-check(1)\fP, \fIborg\-list(1)\fP, \fIborg\-key\-import(1)\fP, \fIborg\-key\-export(1)\fP, \fIborg\-key\-change\-passphrase(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-key-change-passphrase.1 b/docs/man/borg-key-change-passphrase.1 new file mode 100644 index 00000000..88d1b2c1 --- /dev/null +++ b/docs/man/borg-key-change-passphrase.1 @@ -0,0 +1,96 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-KEY-CHANGE-PASSPHRASE 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-key-change-passphrase \- Change repository key file passphrase +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] key change\-passphrase REPOSITORY +.SH DESCRIPTION +.sp +The key files used for repository encryption are optionally passphrase +protected. This command can be used to change this passphrase. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.sp +REPOSITORY +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Create a key file protected repository +$ borg init \-\-encryption=keyfile \-v /path/to/repo +Initializing repository at "/path/to/repo" +Enter new passphrase: +Enter same passphrase again: +Remember your passphrase. Your data will be inaccessible without it. +Key in "/root/.config/borg/keys/mnt_backup" created. +Keep this key safe. Your data will be inaccessible without it. +Synchronizing chunks cache... +Archives: 0, w/ cached Idx: 0, w/ outdated Idx: 0, w/o cached Idx: 0. +Done. + +# Change key file passphrase +$ borg key change\-passphrase \-v /path/to/repo +Enter passphrase for key /root/.config/borg/keys/mnt_backup: +Enter new passphrase: +Enter same passphrase again: +Remember your passphrase. Your data will be inaccessible without it. +Key updated +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Fully automated using environment variables: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ BORG_NEW_PASSPHRASE=old borg init \-e=repokey repo +# now "old" is the current passphrase. +$ BORG_PASSPHRASE=old BORG_NEW_PASSPHRASE=new borg key change\-passphrase repo +# now "new" is the current passphrase. +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-key-export.1 b/docs/man/borg-key-export.1 new file mode 100644 index 00000000..23688683 --- /dev/null +++ b/docs/man/borg-key-export.1 @@ -0,0 +1,81 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-KEY-EXPORT 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-key-export \- Export the repository key for backup +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] key export REPOSITORY PATH +.SH DESCRIPTION +.sp +If repository encryption is used, the repository is inaccessible +without the key. This command allows to backup this essential key. +.sp +There are two backup formats. The normal backup format is suitable for +digital storage as a file. The \fB\-\-paper\fP backup format is optimized +for printing and typing in while importing, with per line checks to +reduce problems with manual input. +.sp +For repositories using keyfile encryption the key is saved locally +on the system that is capable of doing backups. To guard against loss +of this key, the key needs to be backed up independently of the main +data backup. +.sp +For repositories using the repokey encryption the key is saved in the +repository in the config file. A backup is thus not strictly needed, +but guards against the repository becoming inaccessible if the file +is damaged for some reason. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.sp +REPOSITORY +.INDENT 0.0 +.TP +.B PATH +where to store the backup +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-paper +Create an export suitable for printing and later type\-in +.TP +.B \-\-qr\-html +Create an html file suitable for printing and later type\-in or qr scan +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-key\-import(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-key-import.1 b/docs/man/borg-key-import.1 new file mode 100644 index 00000000..92a1754d --- /dev/null +++ b/docs/man/borg-key-import.1 @@ -0,0 +1,67 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-KEY-IMPORT 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-key-import \- Import the repository key from backup +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] key import REPOSITORY PATH +.SH DESCRIPTION +.sp +This command allows to restore a key previously backed up with the +export command. +.sp +If the \fB\-\-paper\fP option is given, the import will be an interactive +process in which each line is checked for plausibility before +proceeding to the next line. For this format PATH must not be given. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.sp +REPOSITORY +.INDENT 0.0 +.TP +.B PATH +path to the backup +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-paper +interactively import from a backup done with \fB\-\-paper\fP +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-key\-export(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-key-migrate-to-repokey.1 b/docs/man/borg-key-migrate-to-repokey.1 new file mode 100644 index 00000000..0d408612 --- /dev/null +++ b/docs/man/borg-key-migrate-to-repokey.1 @@ -0,0 +1,66 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-KEY-MIGRATE-TO-REPOKEY 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-key-migrate-to-repokey \- Migrate passphrase -> repokey +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] key migrate\-to\-repokey REPOSITORY +.SH DESCRIPTION +.sp +This command migrates a repository from passphrase mode (removed in Borg 1.0) +to repokey mode. +.sp +You will be first asked for the repository passphrase (to open it in passphrase +mode). This is the same passphrase as you used to use for this repo before 1.0. +.sp +It will then derive the different secrets from this passphrase. +.sp +Then you will be asked for a new passphrase (twice, for safety). This +passphrase will be used to protect the repokey (which contains these same +secrets in encrypted form). You may use the same passphrase as you used to +use, but you may also use a different one. +.sp +After migrating to repokey mode, you can change the passphrase at any time. +But please note: the secrets will always stay the same and they could always +be derived from your (old) passphrase\-mode passphrase. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.sp +REPOSITORY +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-key.1 b/docs/man/borg-key.1 new file mode 100644 index 00000000..0915aa5f --- /dev/null +++ b/docs/man/borg-key.1 @@ -0,0 +1,47 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-KEY 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-key \- Manage a keyfile or repokey of a repository +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.nf +borg [common options] key export ... +borg [common options] key import ... +borg [common options] key change\-passphrase ... +borg [common options] key migrate\-to\-repokey ... +.fi +.sp +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-key\-export(1)\fP, \fIborg\-key\-import(1)\fP, \fIborg\-key\-change\-passphrase(1)\fP, \fIborg\-key\-migrate\-to\-repokey(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-list.1 b/docs/man/borg-list.1 new file mode 100644 index 00000000..3bceff41 --- /dev/null +++ b/docs/man/borg-list.1 @@ -0,0 +1,249 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-LIST 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-list \- List archive or repository contents +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] list REPOSITORY_OR_ARCHIVE PATH +.SH DESCRIPTION +.sp +This command lists the contents of a repository or an archive. +.sp +See the "borg help patterns" command for more help on exclude patterns. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY_OR_ARCHIVE +repository/archive to list contents of +.TP +.B PATH +paths to list; patterns are supported +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-short +only print file/directory names, nothing else +.TP +.B \-\-format\fP,\fB \-\-list\-format +specify format for file listing +(default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}") +.TP +.B \-\-json +Only valid for listing repository contents. Format output as JSON. The form of \fB\-\-format\fP is ignored, but keys used in it are added to the JSON output. Some keys are always present. Note: JSON can only represent text. A "barchive" key is therefore not available. +.TP +.B \-\-json\-lines +Only valid for listing archive contents. Format output as JSON Lines. The form of \fB\-\-format\fP is ignored, but keys used in it are added to the JSON output. Some keys are always present. Note: JSON can only represent text. A "bpath" key is therefore not available. +.UNINDENT +.SS filters +.INDENT 0.0 +.TP +.B \-P\fP,\fB \-\-prefix +only consider archive names starting with this prefix. +.TP +.B \-a\fP,\fB \-\-glob\-archives +only consider archive names matching the glob. sh: rules apply, see "borg help patterns". \fB\-\-prefix\fP and \fB\-\-glob\-archives\fP are mutually exclusive. +.TP +.B \-\-sort\-by +Comma\-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp +.TP +.BI \-\-first \ N +consider first N archives after other filters were applied +.TP +.BI \-\-last \ N +consider last N archives after other filters were applied +.UNINDENT +.SS Exclusion options +.INDENT 0.0 +.TP +.BI \-e \ PATTERN\fP,\fB \ \-\-exclude \ PATTERN +exclude paths matching PATTERN +.TP +.BI \-\-exclude\-from \ EXCLUDEFILE +read exclude patterns from EXCLUDEFILE, one per line +.TP +.B \-\-exclude\-caches +exclude directories that contain a CACHEDIR.TAG file (\fI\%http://www.brynosaurus.com/cachedir/spec.html\fP) +.TP +.BI \-\-exclude\-if\-present \ NAME +exclude directories that are tagged by containing a filesystem object with the given NAME +.TP +.B \-\-keep\-exclude\-tags\fP,\fB \-\-keep\-tag\-files +if tag objects are specified with \-\-exclude\-if\-present, don\(aqt omit the tag objects themselves from the backup archive +.TP +.BI \-\-pattern \ PATTERN +experimental: include/exclude paths matching PATTERN +.TP +.BI \-\-patterns\-from \ PATTERNFILE +experimental: read include/exclude patterns from PATTERNFILE, one per line +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg list /path/to/repo +Monday Mon, 2016\-02\-15 19:15:11 +repo Mon, 2016\-02\-15 19:26:54 +root\-2016\-02\-15 Mon, 2016\-02\-15 19:36:29 +newname Mon, 2016\-02\-15 19:50:19 +\&... + +$ borg list /path/to/repo::root\-2016\-02\-15 +drwxr\-xr\-x root root 0 Mon, 2016\-02\-15 17:44:27 . +drwxrwxr\-x root root 0 Mon, 2016\-02\-15 19:04:49 bin +\-rwxr\-xr\-x root root 1029624 Thu, 2014\-11\-13 00:08:51 bin/bash +lrwxrwxrwx root root 0 Fri, 2015\-03\-27 20:24:26 bin/bzcmp \-> bzdiff +\-rwxr\-xr\-x root root 2140 Fri, 2015\-03\-27 20:24:22 bin/bzdiff +\&... + +$ borg list /path/to/repo::archiveA \-\-list\-format="{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}" +drwxrwxr\-x user user 0 Sun, 2015\-02\-01 11:00:00 . +drwxrwxr\-x user user 0 Sun, 2015\-02\-01 11:00:00 code +drwxrwxr\-x user user 0 Sun, 2015\-02\-01 11:00:00 code/myproject +\-rw\-rw\-r\-\- user user 1416192 Sun, 2015\-02\-01 11:00:00 code/myproject/file.ext +\&... +.ft P +.fi +.UNINDENT +.UNINDENT +.SH NOTES +.sp +The following keys are available for \fB\-\-format\fP: +.INDENT 0.0 +.IP \(bu 2 +NEWLINE: OS dependent line separator +.IP \(bu 2 +NL: alias of NEWLINE +.IP \(bu 2 +NUL: NUL character for creating print0 / xargs \-0 like output, see barchive/bpath +.IP \(bu 2 +SPACE +.IP \(bu 2 +TAB +.IP \(bu 2 +CR +.IP \(bu 2 +LF +.UNINDENT +.sp +Keys for listing repository archives: +.INDENT 0.0 +.IP \(bu 2 +archive, name: archive name interpreted as text (might be missing non\-text characters, see barchive) +.IP \(bu 2 +barchive: verbatim archive name, can contain any character except NUL +.IP \(bu 2 +time: time of creation of the archive +.IP \(bu 2 +id: internal ID of the archive +.UNINDENT +.sp +Keys for listing archive files: +.INDENT 0.0 +.IP \(bu 2 +type +.IP \(bu 2 +mode +.IP \(bu 2 +uid +.IP \(bu 2 +gid +.IP \(bu 2 +user +.IP \(bu 2 +group +.IP \(bu 2 +path: path interpreted as text (might be missing non\-text characters, see bpath) +.IP \(bu 2 +bpath: verbatim POSIX path, can contain any character except NUL +.IP \(bu 2 +source: link target for links (identical to linktarget) +.IP \(bu 2 +linktarget +.IP \(bu 2 +flags +.IP \(bu 2 +size +.IP \(bu 2 +csize: compressed size +.IP \(bu 2 +dsize: deduplicated size +.IP \(bu 2 +dcsize: deduplicated compressed size +.IP \(bu 2 +num_chunks: number of chunks in this file +.IP \(bu 2 +unique_chunks: number of unique chunks in this file +.IP \(bu 2 +mtime +.IP \(bu 2 +ctime +.IP \(bu 2 +atime +.IP \(bu 2 +isomtime +.IP \(bu 2 +isoctime +.IP \(bu 2 +isoatime +.IP \(bu 2 +md5 +.IP \(bu 2 +sha1 +.IP \(bu 2 +sha224 +.IP \(bu 2 +sha256 +.IP \(bu 2 +sha384 +.IP \(bu 2 +sha512 +.IP \(bu 2 +archiveid +.IP \(bu 2 +archivename +.IP \(bu 2 +extra: prepends {source} with " \-> " for soft links and " link to " for hard links +.IP \(bu 2 +health: either "healthy" (file ok) or "broken" (if file has all\-zero replacement chunks) +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-info(1)\fP, \fIborg\-diff(1)\fP, \fIborg\-prune(1)\fP, \fIborg\-patterns(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-mount.1 b/docs/man/borg-mount.1 new file mode 100644 index 00000000..d1892ac1 --- /dev/null +++ b/docs/man/borg-mount.1 @@ -0,0 +1,118 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-MOUNT 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-mount \- Mount archive or an entire repository as a FUSE filesystem +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] mount REPOSITORY_OR_ARCHIVE MOUNTPOINT +.SH DESCRIPTION +.sp +This command mounts an archive as a FUSE filesystem. This can be useful for +browsing an archive or restoring individual files. Unless the \fB\-\-foreground\fP +option is given the command will run in the background until the filesystem +is \fBumounted\fP\&. +.sp +The command \fBborgfs\fP provides a wrapper for \fBborg mount\fP\&. This can also be +used in fstab entries: +\fB/path/to/repo /mnt/point fuse.borgfs defaults,noauto 0 0\fP +.sp +To allow a regular user to use fstab entries, add the \fBuser\fP option: +\fB/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0\fP +.sp +For mount options, see the fuse(8) manual page. Additional mount options +supported by borg: +.INDENT 0.0 +.IP \(bu 2 +versions: when used with a repository mount, this gives a merged, versioned +view of the files in the archives. EXPERIMENTAL, layout may change in future. +.IP \(bu 2 +allow_damaged_files: by default damaged files (where missing chunks were +replaced with runs of zeros by borg check \fB\-\-repair\fP) are not readable and +return EIO (I/O error). Set this option to read such files. +.UNINDENT +.sp +The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users +to tweak the performance. It sets the number of cached data chunks; additional +memory usage can be up to ~8 MiB times this number. The default is the number +of CPU cores. +.sp +When the daemonized process receives a signal or crashes, it does not unmount. +Unmounting in these cases could cause an active rsync or similar process +to unintentionally delete data. +.sp +When running in the foreground ^C/SIGINT unmounts cleanly, but other +signals or crashes do not. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY_OR_ARCHIVE +repository/archive to mount +.TP +.B MOUNTPOINT +where to mount filesystem +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-f\fP,\fB \-\-foreground +stay in foreground, do not daemonize +.TP +.B \-o +Extra mount options +.UNINDENT +.SS filters +.INDENT 0.0 +.TP +.B \-P\fP,\fB \-\-prefix +only consider archive names starting with this prefix. +.TP +.B \-a\fP,\fB \-\-glob\-archives +only consider archive names matching the glob. sh: rules apply, see "borg help patterns". \fB\-\-prefix\fP and \fB\-\-glob\-archives\fP are mutually exclusive. +.TP +.B \-\-sort\-by +Comma\-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp +.TP +.BI \-\-first \ N +consider first N archives after other filters were applied +.TP +.BI \-\-last \ N +consider last N archives after other filters were applied +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-umount(1)\fP, \fIborg\-extract(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-patterns.1 b/docs/man/borg-patterns.1 new file mode 100644 index 00000000..239a441f --- /dev/null +++ b/docs/man/borg-patterns.1 @@ -0,0 +1,201 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-PATTERNS 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-patterns \- Details regarding patterns +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH DESCRIPTION +.sp +File patterns support these styles: fnmatch, shell, regular expressions, +path prefixes and path full\-matches. By default, fnmatch is used for +\fB\-\-exclude\fP patterns and shell\-style is used for the experimental \fB\-\-pattern\fP +option. +.sp +If followed by a colon (\(aq:\(aq) the first two characters of a pattern are used as a +style selector. Explicit style selection is necessary when a +non\-default style is desired or when the desired pattern starts with +two alphanumeric characters followed by a colon (i.e. \fIaa:something/*\fP). +.INDENT 0.0 +.TP +.B \fI\%Fnmatch\fP, selector \fIfm:\fP +This is the default style for \fB\-\-exclude\fP and \fB\-\-exclude\-from\fP\&. +These patterns use a variant of shell pattern syntax, with \(aq*\(aq matching +any number of characters, \(aq?\(aq matching any single character, \(aq[...]\(aq +matching any single character specified, including ranges, and \(aq[!...]\(aq +matching any character not specified. For the purpose of these patterns, +the path separator (\(aq\(aq for Windows and \(aq/\(aq on other systems) is not +treated specially. Wrap meta\-characters in brackets for a literal +match (i.e. \fI[?]\fP to match the literal character \fI?\fP). For a path +to match a pattern, it must completely match from start to end, or +must match from the start to just before a path separator. Except +for the root path, paths will never end in the path separator when +matching is attempted. Thus, if a given pattern ends in a path +separator, a \(aq*\(aq is appended before matching is attempted. +.TP +.B Shell\-style patterns, selector \fIsh:\fP +This is the default style for \-\-pattern and \-\-patterns\-from. +Like fnmatch patterns these are similar to shell patterns. The difference +is that the pattern may include \fI**/\fP for matching zero or more directory +levels, \fI*\fP for matching zero or more arbitrary characters with the +exception of any path separator. +.TP +.B Regular expressions, selector \fIre:\fP +Regular expressions similar to those found in Perl are supported. Unlike +shell patterns regular expressions are not required to match the complete +path and any substring match is sufficient. It is strongly recommended to +anchor patterns to the start (\(aq^\(aq), to the end (\(aq$\(aq) or both. Path +separators (\(aq\(aq for Windows and \(aq/\(aq on other systems) in paths are +always normalized to a forward slash (\(aq/\(aq) before applying a pattern. The +regular expression syntax is described in the \fI\%Python documentation for +the re module\fP\&. +.TP +.B Path prefix, selector \fIpp:\fP +This pattern style is useful to match whole sub\-directories. The pattern +\fIpp:/data/bar\fP matches \fI/data/bar\fP and everything therein. +.TP +.B Path full\-match, selector \fIpf:\fP +This pattern style is useful to match whole paths. +This is kind of a pseudo pattern as it can not have any variable or +unspecified parts \- the full, precise path must be given. +\fIpf:/data/foo.txt\fP matches \fI/data/foo.txt\fP only. +.sp +Implementation note: this is implemented via very time\-efficient O(1) +hashtable lookups (this means you can have huge amounts of such patterns +without impacting performance much). +Due to that, this kind of pattern does not respect any context or order. +If you use such a pattern to include a file, it will always be included +(if the directory recursion encounters it). +Other include/exclude patterns that would normally match will be ignored. +Same logic applies for exclude. +.UNINDENT +.sp +\fBNOTE:\fP +.INDENT 0.0 +.INDENT 3.5 +\fIre:\fP, \fIsh:\fP and \fIfm:\fP patterns are all implemented on top of the Python SRE +engine. It is very easy to formulate patterns for each of these types which +requires an inordinate amount of time to match paths. If untrusted users +are able to supply patterns, ensure they cannot supply \fIre:\fP patterns. +Further, ensure that \fIsh:\fP and \fIfm:\fP patterns only contain a handful of +wildcards at most. +.UNINDENT +.UNINDENT +.sp +Exclusions can be passed via the command line option \fB\-\-exclude\fP\&. When used +from within a shell the patterns should be quoted to protect them from +expansion. +.sp +The \fB\-\-exclude\-from\fP option permits loading exclusion patterns from a text +file with one pattern per line. Lines empty or starting with the number sign +(\(aq#\(aq) after removing whitespace on both ends are ignored. The optional style +selector prefix is also supported for patterns loaded from a file. Due to +whitespace removal paths with whitespace at the beginning or end can only be +excluded using regular expressions. +.sp +Examples: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Exclude \(aq/home/user/file.o\(aq but not \(aq/home/user/file.odt\(aq: +$ borg create \-e \(aq*.o\(aq backup / + +# Exclude \(aq/home/user/junk\(aq and \(aq/home/user/subdir/junk\(aq but +# not \(aq/home/user/importantjunk\(aq or \(aq/etc/junk\(aq: +$ borg create \-e \(aq/home/*/junk\(aq backup / + +# Exclude the contents of \(aq/home/user/cache\(aq but not the directory itself: +$ borg create \-e /home/user/cache/ backup / + +# The file \(aq/home/user/cache/important\(aq is *not* backed up: +$ borg create \-e /home/user/cache/ backup / /home/user/cache/important + +# The contents of directories in \(aq/home\(aq are not backed up when their name +# ends in \(aq.tmp\(aq +$ borg create \-\-exclude \(aqre:^/home/[^/]+\e.tmp/\(aq backup / + +# Load exclusions from file +$ cat >exclude.txt < REPOSITORY +.SH DESCRIPTION +.sp +The prune command prunes a repository by deleting all archives not matching +any of the specified retention options. This command is normally used by +automated backup scripts wanting to keep a certain number of historic backups. +.sp +Also, prune automatically removes checkpoint archives (incomplete archives left +behind by interrupted backup runs) except if the checkpoint is the latest +archive (and thus still needed). Checkpoint archives are not considered when +comparing archive counts against the retention limits (\fB\-\-keep\-X\fP). +.sp +If a prefix is set with \-P, then only archives that start with the prefix are +considered for deletion and only those archives count towards the totals +specified by the rules. +Otherwise, \fIall\fP archives in the repository are candidates for deletion! +There is no automatic distinction between archives representing different +contents. These need to be distinguished by specifying matching prefixes. +.sp +If you have multiple sequences of archives with different data sets (e.g. +from different machines) in one shared repository, use one prune call per +data set that matches only the respective archives using the \-P option. +.sp +The \fB\-\-keep\-within\fP option takes an argument of the form "", +where char is "H", "d", "w", "m", "y". For example, \fB\-\-keep\-within 2d\fP means +to keep all archives that were created within the past 48 hours. +"1m" is taken to mean "31d". The archives kept with this option do not +count towards the totals specified by any other options. +.sp +A good procedure is to thin out more and more the older your backups get. +As an example, \fB\-\-keep\-daily 7\fP means to keep the latest backup on each day, +up to 7 most recent days with backups (days without backups do not count). +The rules are applied from secondly to yearly, and backups selected by previous +rules do not count towards those of later rules. The time that each backup +starts is used for pruning purposes. Dates and times are interpreted in +the local timezone, and weeks go from Monday to Sunday. Specifying a +negative number of archives to keep means that there is no limit. +.sp +The \fB\-\-keep\-last N\fP option is doing the same as \fB\-\-keep\-secondly N\fP (and it will +keep the last N archives under the assumption that you do not create more than one +backup archive in the same second). +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY +repository to prune +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-n\fP,\fB \-\-dry\-run +do not change repository +.TP +.B \-\-force +force pruning of corrupted archives +.TP +.B \-s\fP,\fB \-\-stats +print statistics for the deleted archive +.TP +.B \-\-list +output verbose list of archives it keeps/prunes +.TP +.BI \-\-keep\-within \ WITHIN +keep all archives within this time interval +.TP +.B \-\-keep\-last\fP,\fB \-\-keep\-secondly +number of secondly archives to keep +.TP +.B \-\-keep\-minutely +number of minutely archives to keep +.TP +.B \-H\fP,\fB \-\-keep\-hourly +number of hourly archives to keep +.TP +.B \-d\fP,\fB \-\-keep\-daily +number of daily archives to keep +.TP +.B \-w\fP,\fB \-\-keep\-weekly +number of weekly archives to keep +.TP +.B \-m\fP,\fB \-\-keep\-monthly +number of monthly archives to keep +.TP +.B \-y\fP,\fB \-\-keep\-yearly +number of yearly archives to keep +.TP +.B \-\-save\-space +work slower, but using less space +.UNINDENT +.SS filters +.INDENT 0.0 +.TP +.B \-P\fP,\fB \-\-prefix +only consider archive names starting with this prefix. +.TP +.B \-a\fP,\fB \-\-glob\-archives +only consider archive names matching the glob. sh: rules apply, see "borg help patterns". \fB\-\-prefix\fP and \fB\-\-glob\-archives\fP are mutually exclusive. +.UNINDENT +.SH EXAMPLES +.sp +Be careful, prune is a potentially dangerous command, it will remove backup +archives. +.sp +The default of prune is to apply to \fBall archives in the repository\fP unless +you restrict its operation to a subset of the archives using \fB\-\-prefix\fP\&. +When using \fB\-\-prefix\fP, be careful to choose a good prefix \- e.g. do not use a +prefix "foo" if you do not also want to match "foobar". +.sp +It is strongly recommended to always run \fBprune \-v \-\-list \-\-dry\-run ...\fP +first so you will see what it would do without it actually doing anything. +.sp +There is also a visualized prune example in \fBdocs/misc/prune\-example.txt\fP\&. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Keep 7 end of day and 4 additional end of week archives. +# Do a dry\-run without actually deleting anything. +$ borg prune \-v \-\-list \-\-dry\-run \-\-keep\-daily=7 \-\-keep\-weekly=4 /path/to/repo + +# Same as above but only apply to archive names starting with the hostname +# of the machine followed by a "\-" character: +$ borg prune \-v \-\-list \-\-keep\-daily=7 \-\-keep\-weekly=4 \-\-prefix=\(aq{hostname}\-\(aq /path/to/repo + +# Keep 7 end of day, 4 additional end of week archives, +# and an end of month archive for every month: +$ borg prune \-v \-\-list \-\-keep\-daily=7 \-\-keep\-weekly=4 \-\-keep\-monthly=\-1 /path/to/repo + +# Keep all backups in the last 10 days, 4 additional end of week archives, +# and an end of month archive for every month: +$ borg prune \-v \-\-list \-\-keep\-within=10d \-\-keep\-weekly=4 \-\-keep\-monthly=\-1 /path/to/repo +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-recreate.1 b/docs/man/borg-recreate.1 new file mode 100644 index 00000000..9124d110 --- /dev/null +++ b/docs/man/borg-recreate.1 @@ -0,0 +1,192 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-RECREATE 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-recreate \- Re-create archives +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] recreate REPOSITORY_OR_ARCHIVE PATH +.SH DESCRIPTION +.sp +Recreate the contents of existing archives. +.sp +This is an \fIexperimental\fP feature. Do \fInot\fP use this on your only backup. +.sp +\fB\-\-exclude\fP, \fB\-\-exclude\-from\fP, \fB\-\-exclude\-if\-present\fP, \fB\-\-keep\-exclude\-tags\fP, and PATH +have the exact same semantics as in "borg create". If PATHs are specified the +resulting archive will only contain files from these PATHs. +.sp +Note that all paths in an archive are relative, therefore absolute patterns/paths +will \fInot\fP match (\fB\-\-exclude\fP, \fB\-\-exclude\-from\fP, PATHs). +.sp +\fB\-\-recompress\fP allows to change the compression of existing data in archives. +Due to how Borg stores compressed size information this might display +incorrect information for archives that were not recreated at the same time. +There is no risk of data loss by this. +.sp +\fB\-\-chunker\-params\fP will re\-chunk all files in the archive, this can be +used to have upgraded Borg 0.xx or Attic archives deduplicate with +Borg 1.x archives. +.sp +\fBUSE WITH CAUTION.\fP +Depending on the PATHs and patterns given, recreate can be used to permanently +delete files from archives. +When in doubt, use \fB\-\-dry\-run \-\-verbose \-\-list\fP to see how patterns/PATHS are +interpreted. +.sp +The archive being recreated is only removed after the operation completes. The +archive that is built during the operation exists at the same time at +".recreate". The new archive will have a different archive ID. +.sp +With \fB\-\-target\fP the original archive is not replaced, instead a new archive is created. +.sp +When rechunking space usage can be substantial, expect at least the entire +deduplicated size of the archives using the previous chunker params. +When recompressing expect approx. (throughput / checkpoint\-interval) in space usage, +assuming all chunks are recompressed. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY_OR_ARCHIVE +repository/archive to recreate +.TP +.B PATH +paths to recreate; patterns are supported +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-\-list +output verbose list of items (files, dirs, ...) +.TP +.BI \-\-filter \ STATUSCHARS +only display items with the given status characters +.TP +.B \-n\fP,\fB \-\-dry\-run +do not change anything +.TP +.B \-s\fP,\fB \-\-stats +print statistics at end +.UNINDENT +.SS Exclusion options +.INDENT 0.0 +.TP +.BI \-e \ PATTERN\fP,\fB \ \-\-exclude \ PATTERN +exclude paths matching PATTERN +.TP +.BI \-\-exclude\-from \ EXCLUDEFILE +read exclude patterns from EXCLUDEFILE, one per line +.TP +.B \-\-exclude\-caches +exclude directories that contain a CACHEDIR.TAG file (\fI\%http://www.brynosaurus.com/cachedir/spec.html\fP) +.TP +.BI \-\-exclude\-if\-present \ NAME +exclude directories that are tagged by containing a filesystem object with the given NAME +.TP +.B \-\-keep\-exclude\-tags\fP,\fB \-\-keep\-tag\-files +if tag objects are specified with \fB\-\-exclude\-if\-present\fP, don\(aqt omit the tag objects themselves from the backup archive +.TP +.BI \-\-pattern \ PATTERN +experimental: include/exclude paths matching PATTERN +.TP +.BI \-\-patterns\-from \ PATTERNFILE +experimental: read include/exclude patterns from PATTERNFILE, one per line +.UNINDENT +.SS Archive options +.INDENT 0.0 +.TP +.BI \-\-target \ TARGET +create a new archive with the name ARCHIVE, do not replace existing archive (only applies for a single archive) +.TP +.BI \-c \ SECONDS\fP,\fB \ \-\-checkpoint\-interval \ SECONDS +write checkpoint every SECONDS seconds (Default: 1800) +.TP +.BI \-\-comment \ COMMENT +add a comment text to the archive +.TP +.BI \-\-timestamp \ TIMESTAMP +manually specify the archive creation date/time (UTC, yyyy\-mm\-ddThh:mm:ss format). alternatively, give a reference file/directory. +.TP +.BI \-C \ COMPRESSION\fP,\fB \ \-\-compression \ COMPRESSION +select compression algorithm, see the output of the "borg help compression" command for details. +.TP +.B \-\-recompress +recompress data chunks according to \fB\-\-compression\fP if \fIif\-different\fP\&. When \fIalways\fP, chunks that are already compressed that way are not skipped, but compressed again. Only the algorithm is considered for \fIif\-different\fP, not the compression level (if any). +.TP +.BI \-\-chunker\-params \ PARAMS +specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE) or \fIdefault\fP to use the current defaults. default: 19,23,21,4095 +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Make old (Attic / Borg 0.xx) archives deduplicate with Borg 1.x archives +# Archives created with Borg 1.1+ and the default chunker params are skipped (archive ID stays the same) +$ borg recreate /mnt/backup \-\-chunker\-params default \-\-progress + +# Create a backup with little but fast compression +$ borg create /mnt/backup::archive /some/files \-\-compression lz4 +# Then compress it \- this might take longer, but the backup has already completed, so no inconsistencies +# from a long\-running backup job. +$ borg recreate /mnt/backup::archive \-\-recompress \-\-compression zlib,9 + +# Remove unwanted files from all archives in a repository +$ borg recreate /mnt/backup \-e /home/icke/Pictures/drunk_photos + + +# Change archive comment +$ borg create \-\-comment "This is a comment" /mnt/backup::archivename ~ +$ borg info /mnt/backup::archivename +Name: archivename +Fingerprint: ... +Comment: This is a comment +\&... +$ borg recreate \-\-comment "This is a better comment" /mnt/backup::archivename +$ borg info /mnt/backup::archivename +Name: archivename +Fingerprint: ... +Comment: This is a better comment +\&... +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-patterns(1)\fP, \fIborg\-placeholders(1)\fP, \fIborg\-compression(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-rename.1 b/docs/man/borg-rename.1 new file mode 100644 index 00000000..17e65f43 --- /dev/null +++ b/docs/man/borg-rename.1 @@ -0,0 +1,76 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-RENAME 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-rename \- Rename an existing archive +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] rename ARCHIVE NEWNAME +.SH DESCRIPTION +.sp +This command renames an archive in the repository. +.sp +This results in a different archive ID. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B ARCHIVE +archive to rename +.TP +.B NEWNAME +the new archive name to use +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg create /path/to/repo::archivename ~ +$ borg list /path/to/repo +archivename Mon, 2016\-02\-15 19:50:19 + +$ borg rename /path/to/repo::archivename newname +$ borg list /path/to/repo +newname Mon, 2016\-02\-15 19:50:19 +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-serve.1 b/docs/man/borg-serve.1 new file mode 100644 index 00000000..5052c724 --- /dev/null +++ b/docs/man/borg-serve.1 @@ -0,0 +1,96 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-SERVE 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-serve \- Start in server mode. This command is usually not used manually. +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] serve +.SH DESCRIPTION +.sp +This command starts a repository server process. This command is usually not used manually. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS optional arguments +.INDENT 0.0 +.TP +.BI \-\-restrict\-to\-path \ PATH +restrict repository access to PATH. Can be specified multiple times to allow the client access to several directories. Access to all sub\-directories is granted implicitly; PATH doesn\(aqt need to directly point to a repository. +.TP +.BI \-\-restrict\-to\-repository \ PATH +restrict repository access. Only the repository located at PATH (no sub\-directories are considered) is accessible. Can be specified multiple times to allow the client access to several repositories. Unlike \-\-restrict\-to\-path sub\-directories are not accessible; PATH needs to directly point at a repository location. PATH may be an empty directory or the last element of PATH may not exist, in which case the client may initialize a repository there. +.TP +.B \-\-append\-only +only allow appending to repository segment files +.TP +.B \-\-storage\-quota +Override storage quota of the repository (e.g. 5G, 1.5T). When a new repository is initialized, sets the storage quota on the new repository as well. Default: no quota. +.UNINDENT +.SH EXAMPLES +.sp +borg serve has special support for ssh forced commands (see \fBauthorized_keys\fP +example below): it will detect that you use such a forced command and extract +the value of the \fB\-\-restrict\-to\-path\fP option(s). +.sp +It will then parse the original command that came from the client, makes sure +that it is also \fBborg serve\fP and enforce path restriction(s) as given by the +forced command. That way, other options given by the client (like \fB\-\-info\fP or +\fB\-\-umask\fP) are preserved (and are not fixed by the forced command). +.sp +Environment variables (such as BORG_HOSTNAME_IS_UNIQUE) contained in the original +command sent by the client are \fInot\fP interpreted, but ignored. If BORG_XXX environment +variables should be set on the \fBborg serve\fP side, then these must be set in system\-specific +locations like \fB/etc/environment\fP or in the forced command itself (example below). +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Allow an SSH keypair to only run borg, and only have access to /path/to/repo. +# Use key options to disable unneeded and potentially dangerous SSH functionality. +# This will help to secure an automated remote backup system. +$ cat ~/.ssh/authorized_keys +command="borg serve \-\-restrict\-to\-path /path/to/repo",no\-pty,no\-agent\-forwarding,no\-port\-forwarding,no\-X11\-forwarding,no\-user\-rc ssh\-rsa AAAAB3[...] + +# Set a BORG_XXX environment variable on the "borg serve" side +$ cat ~/.ssh/authorized_keys +command="export BORG_XXX=value; borg serve [...]",restrict ssh\-rsa [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-umount.1 b/docs/man/borg-umount.1 new file mode 100644 index 00000000..21b3c2ee --- /dev/null +++ b/docs/man/borg-umount.1 @@ -0,0 +1,115 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-UMOUNT 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-umount \- un-mount the FUSE filesystem +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] umount MOUNTPOINT +.SH DESCRIPTION +.sp +This command un\-mounts a FUSE filesystem that was mounted with \fBborg mount\fP\&. +.sp +This is a convenience wrapper that just calls the platform\-specific shell +command \- usually this is either umount or fusermount \-u. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B MOUNTPOINT +mountpoint of the filesystem to umount +.UNINDENT +.SH EXAMPLES +.SS borg mount +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg mount /path/to/repo::root\-2016\-02\-15 /tmp/mymountpoint +$ ls /tmp/mymountpoint +bin boot etc home lib lib64 lost+found media mnt opt root sbin srv tmp usr var +$ borg umount /tmp/mymountpoint +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg mount \-o versions /path/to/repo /tmp/mymountpoint +$ ls \-l /tmp/mymountpoint/home/user/doc.txt/ +total 24 +\-rw\-rw\-r\-\- 1 user group 12357 Aug 26 21:19 doc.txt.cda00bc9 +\-rw\-rw\-r\-\- 1 user group 12204 Aug 26 21:04 doc.txt.fa760f28 +$ fusermount \-u /tmp/mymountpoint +.ft P +.fi +.UNINDENT +.UNINDENT +.SS borgfs +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ echo \(aq/mnt/backup /tmp/myrepo fuse.borgfs defaults,noauto 0 0\(aq >> /etc/fstab +$ echo \(aq/mnt/backup::root\-2016\-02\-15 /tmp/myarchive fuse.borgfs defaults,noauto 0 0\(aq >> /etc/fstab +$ mount /tmp/myrepo +$ mount /tmp/myarchive +$ ls /tmp/myrepo +root\-2016\-02\-01 root\-2016\-02\-2015 +$ ls /tmp/myarchive +bin boot etc home lib lib64 lost+found media mnt opt root sbin srv tmp usr var +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +\fBNOTE:\fP +.INDENT 0.0 +.INDENT 3.5 +\fBborgfs\fP will be automatically provided if you used a distribution +package, \fBpip\fP or \fBsetup.py\fP to install Borg. Users of the +standalone binary will have to manually create a symlink (see +\fIpyinstaller\-binary\fP). +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP, \fIborg\-mount(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-upgrade.1 b/docs/man/borg-upgrade.1 new file mode 100644 index 00000000..0d4cef89 --- /dev/null +++ b/docs/man/borg-upgrade.1 @@ -0,0 +1,187 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-UPGRADE 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-upgrade \- upgrade a repository from a previous version +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] upgrade REPOSITORY +.SH DESCRIPTION +.sp +Upgrade an existing, local Borg repository. +.SS When you do not need borg upgrade +.sp +Not every change requires that you run \fBborg upgrade\fP\&. +.sp +You do \fBnot\fP need to run it when: +.INDENT 0.0 +.IP \(bu 2 +moving your repository to a different place +.IP \(bu 2 +upgrading to another point release (like 1.0.x to 1.0.y), +except when noted otherwise in the changelog +.IP \(bu 2 +upgrading from 1.0.x to 1.1.x, +except when noted otherwise in the changelog +.UNINDENT +.SS Borg 1.x.y upgrades +.sp +Use \fBborg upgrade \-\-tam REPO\fP to require manifest authentication +introduced with Borg 1.0.9 to address security issues. This means +that modifying the repository after doing this with a version prior +to 1.0.9 will raise a validation error, so only perform this upgrade +after updating all clients using the repository to 1.0.9 or newer. +.sp +This upgrade should be done on each client for safety reasons. +.sp +If a repository is accidentally modified with a pre\-1.0.9 client after +this upgrade, use \fBborg upgrade \-\-tam \-\-force REPO\fP to remedy it. +.sp +If you routinely do this you might not want to enable this upgrade +(which will leave you exposed to the security issue). You can +reverse the upgrade by issuing \fBborg upgrade \-\-disable\-tam REPO\fP\&. +.sp +See +\fI\%https://borgbackup.readthedocs.io/en/stable/changes.html#pre\-1\-0\-9\-manifest\-spoofing\-vulnerability\fP +for details. +.SS Attic and Borg 0.xx to Borg 1.x +.sp +This currently supports converting an Attic repository to Borg and also +helps with converting Borg 0.xx to 1.0. +.sp +Currently, only LOCAL repositories can be upgraded (issue #465). +.sp +Please note that \fBborg create\fP (since 1.0.0) uses bigger chunks by +default than old borg or attic did, so the new chunks won\(aqt deduplicate +with the old chunks in the upgraded repository. +See \fB\-\-chunker\-params\fP option of \fBborg create\fP and \fBborg recreate\fP\&. +.sp +\fBborg upgrade\fP will change the magic strings in the repository\(aqs +segments to match the new Borg magic strings. The keyfiles found in +$ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and +copied to $BORG_KEYS_DIR or ~/.config/borg/keys. +.sp +The cache files are converted, from $ATTIC_CACHE_DIR or +~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the +cache layout between Borg and Attic changed, so it is possible +the first backup after the conversion takes longer than expected +due to the cache resync. +.sp +Upgrade should be able to resume if interrupted, although it +will still iterate over all segments. If you want to start +from scratch, use \fIborg delete\fP over the copied repository to +make sure the cache files are also removed: +.INDENT 0.0 +.INDENT 3.5 +borg delete borg +.UNINDENT +.UNINDENT +.sp +Unless \fB\-\-inplace\fP is specified, the upgrade process first +creates a backup copy of the repository, in +REPOSITORY.upgrade\-DATETIME, using hardlinks. This takes +longer than in place upgrades, but is much safer and gives +progress information (as opposed to \fBcp \-al\fP). Once you are +satisfied with the conversion, you can safely destroy the +backup copy. +.sp +WARNING: Running the upgrade in place will make the current +copy unusable with older version, with no way of going back +to previous versions. This can PERMANENTLY DAMAGE YOUR +REPOSITORY! Attic CAN NOT READ BORG REPOSITORIES, as the +magic strings have changed. You have been warned. +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY +path to the repository to be upgraded +.UNINDENT +.SS optional arguments +.INDENT 0.0 +.TP +.B \-n\fP,\fB \-\-dry\-run +do not change repository +.TP +.B \-\-inplace +rewrite repository in place, with no chance of going back to older +versions of the repository. +.TP +.B \-\-force +Force upgrade +.TP +.B \-\-tam +Enable manifest authentication (in key and cache) (Borg 1.0.9 and later) +.TP +.B \-\-disable\-tam +Disable manifest authentication (in key and cache) +.UNINDENT +.SH EXAMPLES +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# Upgrade the borg repository to the most recent version. +$ borg upgrade \-v /path/to/repo +making a hardlink copy in /path/to/repo.upgrade\-2016\-02\-15\-20:51:55 +opening attic repository with borg and converting +no key file found for repository +converting repo index /path/to/repo/index.0 +converting 1 segments... +converting borg 0.xx to borg current +no key file found for repository +.ft P +.fi +.UNINDENT +.UNINDENT +.SS Upgrading a passphrase encrypted attic repo +.sp +attic offered a "passphrase" encryption mode, but this was removed in borg 1.0 +and replaced by the "repokey" mode (which stores the passphrase\-protected +encryption key into the repository config). +.sp +Thus, to upgrade a "passphrase" attic repo to a "repokey" borg repo, 2 steps +are needed, in this order: +.INDENT 0.0 +.IP \(bu 2 +borg upgrade repo +.IP \(bu 2 +borg key migrate\-to\-repokey repo +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg-with-lock.1 b/docs/man/borg-with-lock.1 new file mode 100644 index 00000000..c71bd28a --- /dev/null +++ b/docs/man/borg-with-lock.1 @@ -0,0 +1,71 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG-WITH-LOCK 1 "2017-06-18" "" "borg backup tool" +.SH NAME +borg-with-lock \- run a user specified command with the repository lock held +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] with\-lock REPOSITORY COMMAND ARGS +.SH DESCRIPTION +.sp +This command runs a user\-specified command while the repository lock is held. +.sp +It will first try to acquire the lock (make sure that no other operation is +running in the repo), then execute the given command as a subprocess and wait +for its termination, release the lock and return the user command\(aqs return +code as borg\(aqs return code. +.INDENT 0.0 +.TP +.B Note: if you copy a repository with the lock held, the lock will be present in +the copy, obviously. Thus, before using borg on the copy, you need to +use "borg break\-lock" on it. +.UNINDENT +.SH OPTIONS +.sp +See \fIborg\-common(1)\fP for common options of Borg commands. +.SS arguments +.INDENT 0.0 +.TP +.B REPOSITORY +repository to lock +.TP +.B COMMAND +command to run +.TP +.B ARGS +command arguments +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man/borg.1 b/docs/man/borg.1 new file mode 100644 index 00000000..99493444 --- /dev/null +++ b/docs/man/borg.1 @@ -0,0 +1,603 @@ +.\" Man page generated from reStructuredText. +. +.TH BORG 1 "2017-02-05" "" "borg backup tool" +.SH NAME +borg \- deduplicating and encrypting backup tool +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +borg [common options] [options] [arguments] +.SH DESCRIPTION +.\" we don't include the README.rst here since we want to keep this terse. +. +.sp +BorgBackup (short: Borg) is a deduplicating backup program. +Optionally, it supports compression and authenticated encryption. +.sp +The main goal of Borg is to provide an efficient and secure way to backup data. +The data deduplication technique used makes Borg suitable for daily backups +since only changes are stored. +The authenticated encryption technique makes it suitable for backups to not +fully trusted targets. +.sp +Borg stores a set of files in an \fIarchive\fP\&. A \fIrepository\fP is a collection +of \fIarchives\fP\&. The format of repositories is Borg\-specific. Borg does not +distinguish archives from each other in a any way other than their name, +it does not matter when or where archives where created (eg. different hosts). +.SH EXAMPLES +.SS A step\-by\-step example +.INDENT 0.0 +.IP 1. 3 +Before a backup can be made a repository has to be initialized: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg init \-\-encryption=repokey /path/to/repo +.ft P +.fi +.UNINDENT +.UNINDENT +.IP 2. 3 +Backup the \fB~/src\fP and \fB~/Documents\fP directories into an archive called +\fIMonday\fP: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg create /path/to/repo::Monday ~/src ~/Documents +.ft P +.fi +.UNINDENT +.UNINDENT +.IP 3. 3 +The next day create a new archive called \fITuesday\fP: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg create \-\-stats /path/to/repo::Tuesday ~/src ~/Documents +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This backup will be a lot quicker and a lot smaller since only new never +before seen data is stored. The \fB\-\-stats\fP option causes Borg to +output statistics about the newly created archive such as the amount of unique +data (not shared with other archives): +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +Archive name: Tuesday +Archive fingerprint: bd31004d58f51ea06ff735d2e5ac49376901b21d58035f8fb05dbf866566e3c2 +Time (start): Tue, 2016\-02\-16 18:15:11 +Time (end): Tue, 2016\-02\-16 18:15:11 + +Duration: 0.19 seconds +Number of files: 127 +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + Original size Compressed size Deduplicated size +This archive: 4.16 MB 4.17 MB 26.78 kB +All archives: 8.33 MB 8.34 MB 4.19 MB + + Unique chunks Total chunks +Chunk index: 132 261 +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +.ft P +.fi +.UNINDENT +.UNINDENT +.IP 4. 3 +List all archives in the repository: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg list /path/to/repo +Monday Mon, 2016\-02\-15 19:14:44 +Tuesday Tue, 2016\-02\-16 19:15:11 +.ft P +.fi +.UNINDENT +.UNINDENT +.IP 5. 3 +List the contents of the \fIMonday\fP archive: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg list /path/to/repo::Monday +drwxr\-xr\-x user group 0 Mon, 2016\-02\-15 18:22:30 home/user/Documents +\-rw\-r\-\-r\-\- user group 7961 Mon, 2016\-02\-15 18:22:30 home/user/Documents/Important.doc +\&... +.ft P +.fi +.UNINDENT +.UNINDENT +.IP 6. 3 +Restore the \fIMonday\fP archive by extracting the files relative to the current directory: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg extract /path/to/repo::Monday +.ft P +.fi +.UNINDENT +.UNINDENT +.IP 7. 3 +Recover disk space by manually deleting the \fIMonday\fP archive: +.INDENT 3.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ borg delete /path/to/repo::Monday +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.sp +\fBNOTE:\fP +.INDENT 0.0 +.INDENT 3.5 +Borg is quiet by default (it works on WARNING log level). +You can use options like \fB\-\-progress\fP or \fB\-\-list\fP to get specific +reports during command execution. You can also add the \fB\-v\fP (or +\fB\-\-verbose\fP or \fB\-\-info\fP) option to adjust the log level to INFO to +get other informational messages. +.UNINDENT +.UNINDENT +.SH NOTES +.SS Repository URLs +.sp +\fBLocal filesystem\fP (or locally mounted network filesystem): +.sp +\fB/path/to/repo\fP \- filesystem path to repo directory, absolute path +.sp +\fBpath/to/repo\fP \- filesystem path to repo directory, relative path +.sp +Also, stuff like \fB~/path/to/repo\fP or \fB~other/path/to/repo\fP works (this is +expanded by your shell). +.sp +Note: you may also prepend a \fBfile://\fP to a filesystem path to get URL style. +.sp +\fBRemote repositories\fP accessed via ssh \fI\%user@host\fP: +.sp +\fBuser@host:/path/to/repo\fP \- remote repo, absolute path +.sp +\fBssh://user@host:port/path/to/repo\fP \- same, alternative syntax, port can be given +.sp +\fBRemote repositories with relative paths\fP can be given using this syntax: +.sp +\fBuser@host:path/to/repo\fP \- path relative to current directory +.sp +\fBuser@host:~/path/to/repo\fP \- path relative to user\(aqs home directory +.sp +\fBuser@host:~other/path/to/repo\fP \- path relative to other\(aqs home directory +.sp +Note: giving \fBuser@host:/./path/to/repo\fP or \fBuser@host:/~/path/to/repo\fP or +\fBuser@host:/~other/path/to/repo\fP is also supported, but not required here. +.sp +\fBRemote repositories with relative paths, alternative syntax with port\fP: +.sp +\fBssh://user@host:port/./path/to/repo\fP \- path relative to current directory +.sp +\fBssh://user@host:port/~/path/to/repo\fP \- path relative to user\(aqs home directory +.sp +\fBssh://user@host:port/~other/path/to/repo\fP \- path relative to other\(aqs home directory +.sp +If you frequently need the same repo URL, it is a good idea to set the +\fBBORG_REPO\fP environment variable to set a default for the repo URL: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +export BORG_REPO=\(aqssh://user@host:port/path/to/repo\(aq +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Then just leave away the repo URL if only a repo URL is needed and you want +to use the default \- it will be read from BORG_REPO then. +.sp +Use \fB::\fP syntax to give the repo URL when syntax requires giving a positional +argument for the repo (e.g. \fBborg mount :: /mnt\fP). +.SS Repository / Archive Locations +.sp +Many commands want either a repository (just give the repo URL, see above) or +an archive location, which is a repo URL followed by \fB::archive_name\fP\&. +.sp +Archive names must not contain the \fB/\fP (slash) character. For simplicity, +maybe also avoid blanks or other characters that have special meaning on the +shell or in a filesystem (borg mount will use the archive name as directory +name). +.sp +If you have set BORG_REPO (see above) and an archive location is needed, use +\fB::archive_name\fP \- the repo URL part is then read from BORG_REPO. +.SS Type of log output +.sp +The log level of the builtin logging configuration defaults to WARNING. +This is because we want Borg to be mostly silent and only output +warnings, errors and critical messages, unless output has been requested +by supplying an option that implies output (e.g. \fB\-\-list\fP or \fB\-\-progress\fP). +.sp +Log levels: DEBUG < INFO < WARNING < ERROR < CRITICAL +.sp +Use \fB\-\-debug\fP to set DEBUG log level \- +to get debug, info, warning, error and critical level output. +.sp +Use \fB\-\-info\fP (or \fB\-v\fP or \fB\-\-verbose\fP) to set INFO log level \- +to get info, warning, error and critical level output. +.sp +Use \fB\-\-warning\fP (default) to set WARNING log level \- +to get warning, error and critical level output. +.sp +Use \fB\-\-error\fP to set ERROR log level \- +to get error and critical level output. +.sp +Use \fB\-\-critical\fP to set CRITICAL log level \- +to get critical level output. +.sp +While you can set misc. log levels, do not expect that every command will +give different output on different log levels \- it\(aqs just a possibility. +.sp +\fBWARNING:\fP +.INDENT 0.0 +.INDENT 3.5 +Options \fB\-\-critical\fP and \fB\-\-error\fP are provided for completeness, +their usage is not recommended as you might miss important information. +.UNINDENT +.UNINDENT +.SS Return codes +.sp +Borg can exit with the following return codes (rc): +.TS +center; +|l|l|. +_ +T{ +Return code +T} T{ +Meaning +T} +_ +T{ +0 +T} T{ +success (logged as INFO) +T} +_ +T{ +1 +T} T{ +warning (operation reached its normal end, but there were warnings \-\- +you should check the log, logged as WARNING) +T} +_ +T{ +2 +T} T{ +error (like a fatal error, a local or remote exception, the operation +did not reach its normal end, logged as ERROR) +T} +_ +T{ +128+N +T} T{ +killed by signal N (e.g. 137 == kill \-9) +T} +_ +.TE +.sp +If you use \fB\-\-show\-rc\fP, the return code is also logged at the indicated +level as the last log entry. +.SS Environment Variables +.sp +Borg uses some environment variables for automation: +.INDENT 0.0 +.TP +.B General: +.INDENT 7.0 +.TP +.B BORG_REPO +When set, use the value to give the default repository location. If a command needs an archive +parameter, you can abbreviate as \fB::archive\fP\&. If a command needs a repository parameter, you +can either leave it away or abbreviate as \fB::\fP, if a positional parameter is required. +.TP +.B BORG_PASSPHRASE +When set, use the value to answer the passphrase question for encrypted repositories. +It is used when a passphrase is needed to access an encrypted repo as well as when a new +passphrase should be initially set when initializing an encrypted repo. +See also BORG_NEW_PASSPHRASE. +.TP +.B BORG_PASSCOMMAND +When set, use the standard output of the command (trailing newlines are stripped) to answer the +passphrase question for encrypted repositories. +It is used when a passphrase is needed to access an encrypted repo as well as when a new +passphrase should be initially set when initializing an encrypted repo. +If BORG_PASSPHRASE is also set, it takes precedence. +See also BORG_NEW_PASSPHRASE. +.TP +.B BORG_NEW_PASSPHRASE +When set, use the value to answer the passphrase question when a \fBnew\fP passphrase is asked for. +This variable is checked first. If it is not set, BORG_PASSPHRASE and BORG_PASSCOMMAND will also +be checked. +Main usecase for this is to fully automate \fBborg change\-passphrase\fP\&. +.TP +.B BORG_DISPLAY_PASSPHRASE +When set, use the value to answer the "display the passphrase for verification" question when defining a new passphrase for encrypted repositories. +.TP +.B BORG_HOSTNAME_IS_UNIQUE=no +Borg assumes that it can derive a unique hostname / identity (see \fBborg debug info\fP). +If this is not the case or you do not want Borg to automatically remove stale locks, +set this to \fIno\fP\&. +.TP +.B BORG_LOGGING_CONF +When set, use the given filename as \fI\%INI\fP\-style logging configuration. +.TP +.B BORG_RSH +When set, use this command instead of \fBssh\fP\&. This can be used to specify ssh options, such as +a custom identity file \fBssh \-i /path/to/private/key\fP\&. See \fBman ssh\fP for other options. +.TP +.B BORG_REMOTE_PATH +When set, use the given path as borg executable on the remote (defaults to "borg" if unset). +Using \fB\-\-remote\-path PATH\fP commandline option overrides the environment variable. +.TP +.B BORG_FILES_CACHE_TTL +When set to a numeric value, this determines the maximum "time to live" for the files cache +entries (default: 20). The files cache is used to quickly determine whether a file is unchanged. +The FAQ explains this more detailed in: \fIalways_chunking\fP +.TP +.B TMPDIR +where temporary files are stored (might need a lot of temporary space for some operations) +.UNINDENT +.TP +.B Some automatic "answerers" (if set, they automatically answer confirmation questions): +.INDENT 7.0 +.TP +.B BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=no (or =yes) +For "Warning: Attempting to access a previously unknown unencrypted repository" +.TP +.B BORG_RELOCATED_REPO_ACCESS_IS_OK=no (or =yes) +For "Warning: The repository at location ... was previously located at ..." +.TP +.B BORG_CHECK_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) +For "Warning: \(aqcheck \-\-repair\(aq is an experimental feature that might result in data loss." +.TP +.B BORG_DELETE_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) +For "You requested to completely DELETE the repository \fIincluding\fP all archives it contains:" +.TP +.B BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) +For "recreate is an experimental feature." +.UNINDENT +.sp +Note: answers are case sensitive. setting an invalid answer value might either give the default +answer or ask you interactively, depending on whether retries are allowed (they by default are +allowed). So please test your scripts interactively before making them a non\-interactive script. +.TP +.B Directories and files: +.INDENT 7.0 +.TP +.B BORG_KEYS_DIR +Default to \(aq~/.config/borg/keys\(aq. This directory contains keys for encrypted repositories. +.TP +.B BORG_KEY_FILE +When set, use the given filename as repository key file. +.TP +.B BORG_SECURITY_DIR +Default to \(aq~/.config/borg/security\(aq. This directory contains information borg uses to +track its usage of NONCES ("numbers used once" \- usually in encryption context) and other +security relevant data. +.TP +.B BORG_CACHE_DIR +Default to \(aq~/.cache/borg\(aq. This directory contains the local cache and might need a lot +of space for dealing with big repositories). +.UNINDENT +.TP +.B Building: +.INDENT 7.0 +.TP +.B BORG_OPENSSL_PREFIX +Adds given OpenSSL header file directory to the default locations (setup.py). +.TP +.B BORG_LZ4_PREFIX +Adds given LZ4 header file directory to the default locations (setup.py). +.TP +.B BORG_LIBB2_PREFIX +Adds given prefix directory to the default locations. If a \(aqinclude/blake2.h\(aq is found Borg +will be linked against the system libb2 instead of a bundled implementation. (setup.py) +.UNINDENT +.UNINDENT +.sp +Please note: +.INDENT 0.0 +.IP \(bu 2 +be very careful when using the "yes" sayers, the warnings with prompt exist for your / your data\(aqs security/safety +.IP \(bu 2 +also be very careful when putting your passphrase into a script, make sure it has appropriate file permissions +(e.g. mode 600, root:root). +.UNINDENT +.SS File systems +.sp +We strongly recommend against using Borg (or any other database\-like +software) on non\-journaling file systems like FAT, since it is not +possible to assume any consistency in case of power failures (or a +sudden disconnect of an external drive or similar failures). +.sp +While Borg uses a data store that is resilient against these failures +when used on journaling file systems, it is not possible to guarantee +this with some hardware \-\- independent of the software used. We don\(aqt +know a list of affected hardware. +.sp +If you are suspicious whether your Borg repository is still consistent +and readable after one of the failures mentioned above occurred, run +\fBborg check \-\-verify\-data\fP to make sure it is consistent. +.SS Units +.sp +To display quantities, Borg takes care of respecting the +usual conventions of scale. Disk sizes are displayed in \fI\%decimal\fP, using powers of ten (so +\fBkB\fP means 1000 bytes). For memory usage, \fI\%binary prefixes\fP are used, and are +indicated using the \fI\%IEC binary prefixes\fP, +using powers of two (so \fBKiB\fP means 1024 bytes). +.SS Date and Time +.sp +We format date and time conforming to ISO\-8601, that is: YYYY\-MM\-DD and +HH:MM:SS (24h clock). +.sp +For more information about that, see: \fI\%https://xkcd.com/1179/\fP +.sp +Unless otherwise noted, we display local date and time. +Internally, we store and process date and time as UTC. +.SS Resource Usage +.sp +Borg might use a lot of resources depending on the size of the data set it is dealing with. +.sp +If one uses Borg in a client/server way (with a ssh: repository), +the resource usage occurs in part on the client and in another part on the +server. +.sp +If one uses Borg as a single process (with a filesystem repo), +all the resource usage occurs in that one process, so just add up client + +server to get the approximate resource usage. +.INDENT 0.0 +.TP +.B CPU client: +borg create: does chunking, hashing, compression, crypto (high CPU usage) +chunks cache sync: quite heavy on CPU, doing lots of hashtable operations. +borg extract: crypto, decompression (medium to high CPU usage) +borg check: similar to extract, but depends on options given. +borg prune / borg delete archive: low to medium CPU usage +borg delete repo: done on the server +It won\(aqt go beyond 100% of 1 core as the code is currently single\-threaded. +Especially higher zlib and lzma compression levels use significant amounts +of CPU cycles. Crypto might be cheap on the CPU (if hardware accelerated) or +expensive (if not). +.TP +.B CPU server: +It usually doesn\(aqt need much CPU, it just deals with the key/value store +(repository) and uses the repository index for that. +.sp +borg check: the repository check computes the checksums of all chunks +(medium CPU usage) +borg delete repo: low CPU usage +.TP +.B CPU (only for client/server operation): +When using borg in a client/server way with a \fI\%ssh:\-type\fP repo, the ssh +processes used for the transport layer will need some CPU on the client and +on the server due to the crypto they are doing \- esp. if you are pumping +big amounts of data. +.TP +.B Memory (RAM) client: +The chunks index and the files index are read into memory for performance +reasons. Might need big amounts of memory (see below). +Compression, esp. lzma compression with high levels might need substantial +amounts of memory. +.TP +.B Memory (RAM) server: +The server process will load the repository index into memory. Might need +considerable amounts of memory, but less than on the client (see below). +.TP +.B Chunks index (client only): +Proportional to the amount of data chunks in your repo. Lots of chunks +in your repo imply a big chunks index. +It is possible to tweak the chunker params (see create options). +.TP +.B Files index (client only): +Proportional to the amount of files in your last backups. Can be switched +off (see create options), but next backup might be much slower if you do. +The speed benefit of using the files cache is proportional to file size. +.TP +.B Repository index (server only): +Proportional to the amount of data chunks in your repo. Lots of chunks +in your repo imply a big repository index. +It is possible to tweak the chunker params (see create options) to +influence the amount of chunks being created. +.TP +.B Temporary files (client): +Reading data and metadata from a FUSE mounted repository will consume up to +the size of all deduplicated, small chunks in the repository. Big chunks +won\(aqt be locally cached. +.TP +.B Temporary files (server): +None. +.TP +.B Cache files (client only): +Contains the chunks index and files index (plus a collection of single\- +archive chunk indexes which might need huge amounts of disk space, +depending on archive count and size \- see FAQ about how to reduce). +.TP +.B Network (only for client/server operation): +If your repository is remote, all deduplicated (and optionally compressed/ +encrypted) data of course has to go over the connection (\fBssh://\fP repo url). +If you use a locally mounted network filesystem, additionally some copy +operations used for transaction support also go over the connection. If +you backup multiple sources to one target repository, additional traffic +happens for cache resynchronization. +.UNINDENT +.SH SEE ALSO +.sp +\fIborg\-common(1)\fP for common command line options +.sp +\fIborg\-init(1)\fP, +\fIborg\-create(1)\fP, \fIborg\-mount(1)\fP, \fIborg\-extract(1)\fP, +\fIborg\-list(1)\fP, \fIborg\-info(1)\fP, +\fIborg\-delete(1)\fP, \fIborg\-prune(1)\fP, +\fIborg\-recreate(1)\fP +.sp +\fIborg\-compression(1)\fP, \fIborg\-patterns(1)\fP, \fIborg\-placeholders(1)\fP +.INDENT 0.0 +.IP \(bu 2 +Main web site \fI\%https://borgbackup.readthedocs.org/\fP +.IP \(bu 2 +Releases \fI\%https://github.com/borgbackup/borg/releases\fP +.IP \(bu 2 +Changelog \fI\%https://github.com/borgbackup/borg/blob/master/docs/changes.rst\fP +.IP \(bu 2 +GitHub \fI\%https://github.com/borgbackup/borg\fP +.IP \(bu 2 +Security contact \fI\%https://borgbackup.readthedocs.io/en/latest/support.html#security\-contact\fP +.UNINDENT +.SH AUTHOR +The Borg Collective +.\" Generated by docutils manpage writer. +. diff --git a/docs/man_intro.rst b/docs/man_intro.rst new file mode 100644 index 00000000..44dee959 --- /dev/null +++ b/docs/man_intro.rst @@ -0,0 +1,68 @@ +==== +borg +==== + +---------------------------------------- +deduplicating and encrypting backup tool +---------------------------------------- + +:Author: The Borg Collective +:Date: 2017-02-05 +:Manual section: 1 +:Manual group: borg backup tool + +SYNOPSIS +-------- + +borg [common options] [options] [arguments] + +DESCRIPTION +----------- + +.. we don't include the README.rst here since we want to keep this terse. + +BorgBackup (short: Borg) is a deduplicating backup program. +Optionally, it supports compression and authenticated encryption. + +The main goal of Borg is to provide an efficient and secure way to backup data. +The data deduplication technique used makes Borg suitable for daily backups +since only changes are stored. +The authenticated encryption technique makes it suitable for backups to not +fully trusted targets. + +Borg stores a set of files in an *archive*. A *repository* is a collection +of *archives*. The format of repositories is Borg-specific. Borg does not +distinguish archives from each other in any way other than their name, +it does not matter when or where archives were created (e.g. different hosts). + +EXAMPLES +-------- + +A step-by-step example +~~~~~~~~~~~~~~~~~~~~~~ + +.. include:: quickstart_example.rst.inc + +NOTES +----- + +.. include:: usage_general.rst.inc + +SEE ALSO +-------- + +`borg-common(1)` for common command line options + +`borg-init(1)`, +`borg-create(1)`, `borg-mount(1)`, `borg-extract(1)`, +`borg-list(1)`, `borg-info(1)`, +`borg-delete(1)`, `borg-prune(1)`, +`borg-recreate(1)` + +`borg-compression(1)`, `borg-patterns(1)`, `borg-placeholders(1)` + +* Main web site https://www.borgbackup.org/ +* Releases https://github.com/borgbackup/borg/releases +* Changelog https://github.com/borgbackup/borg/blob/master/docs/changes.rst +* GitHub https://github.com/borgbackup/borg +* Security contact https://borgbackup.readthedocs.io/en/latest/support.html#security-contact diff --git a/docs/misc/asciinema/advanced.json b/docs/misc/asciinema/advanced.json new file mode 100644 index 00000000..d9f951e6 --- /dev/null +++ b/docs/misc/asciinema/advanced.json @@ -0,0 +1,7733 @@ +{ + "version": 1, + "width": 78, + "height": 24, + "duration": 446.783754, + "command": null, + "title": null, + "env": { + "TERM": "xterm-256color", + "SHELL": "/bin/zsh" + }, + "stdout": [ + [ + 0.29658, + "\b\u001b[1m$ # \u001b[1mFor the pro users, here are some advanced features of borg, so you can imp\u001b[1mr\u001b[1mess your friends. ;)\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.025674, + "\u001b[?1l\u001b>" + ], + [ + 0.000375, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000796, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000953, + "\u001b]7;\u0007" + ], + [ + 0.000799, + "\u001b]7;\u0007" + ], + [ + 7.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 4.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000368, + "\u001b[?2004h" + ], + [ + 0.857202, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.269836, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.277016, + "\b\b\u001b[1m#\u001b[1m \u001b[1mN\u001b[0m\u001b[39m" + ], + [ + 0.185115, + "\b\u001b[1mN\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.222294, + "\b\u001b[1mo\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.098908, + "\b\u001b[1mt\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.471037, + "\b\u001b[1me\u001b[1m:\u001b[0m\u001b[39m" + ], + [ + 0.276132, + "\b\u001b[1m:\u001b[1m This screencast was made with borg version 1.1.0 – older or newer bo\u001b[1mr\u001b[1mg versions may behave differently.\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.063392, + "\u001b[?1l\u001b>" + ], + [ + 0.001402, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001228, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.002846, + "\u001b]7;\u0007" + ], + [ + 0.002554, + "\u001b]7;\u0007" + ], + [ + 6.6e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000229, + "\u001b[?1h\u001b=" + ], + [ + 0.000858, + "\u001b[?2004h" + ], + [ + 0.944947, + "\u001b[?1l\u001b>" + ], + [ + 0.000319, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000652, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001131, + "\u001b]7;\u0007" + ], + [ + 0.000871, + "\u001b]7;\u0007" + ], + [ + 9.6e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000117, + "\u001b[?1h\u001b=" + ], + [ + 0.00014, + "\u001b[?2004h" + ], + [ + 0.91046, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.350642, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.226284, + "\b\b\u001b[1m#\u001b[1m \u001b[1mF\u001b[0m\u001b[39m" + ], + [ + 0.190635, + "\b\u001b[1mF\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.226298, + "\b\u001b[1mi\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.094075, + "\b\u001b[1mr\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.125931, + "\b\u001b[1ms\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.210409, + "\b\u001b[1mt\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.333349, + "\b\u001b[1m \u001b[1mof all, we can use several environment variables for borg.\u001b[0m\u001b[39m" + ], + [ + 1.115007, + "\u001b[?1l\u001b>" + ], + [ + 0.000418, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000665, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001185, + "\u001b]7;\u0007" + ], + [ + 0.00091, + "\u001b]7;\u0007" + ], + [ + 2.5e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000298, + "\u001b[?2004h" + ], + [ + 1.193161, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.249128, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.253119, + "\b\b\u001b[1m#\u001b[1m \u001b[1mE\u001b[0m\u001b[39m" + ], + [ + 0.328187, + "\b\u001b[1mE\u001b[1m.\u001b[0m\u001b[39m" + ], + [ + 0.873845, + "\b\u001b[1m.\u001b[1mg\u001b[0m\u001b[39m" + ], + [ + 0.164238, + "\b\u001b[1mg\u001b[1m.\u001b[0m\u001b[39m" + ], + [ + 0.211331, + "\b\u001b[1m.\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.15971, + "\b\u001b[1m \u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.133833, + "\b\u001b[1mw\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 2.95423, + "\b\u001b[1me\u001b[1m do not want to type in our repo path and password again and again…" + ], + [ + 1.769654, + "\u001b[K" + ], + [ + 2.7e-05, + "\u001b[?1l\u001b>" + ], + [ + 0.000616, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000594, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00144, + "\u001b]7;\u0007" + ], + [ + 0.001172, + "\u001b]7;\u0007" + ], + [ + 3.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.2e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000419, + "\u001b[?2004h" + ], + [ + 0.975676, + "\u001b[1m\u001b[31me\u001b[0m\u001b[39m" + ], + [ + 0.156719, + "\b\u001b[0m\u001b[32me\u001b[32mx\u001b[39m" + ], + [ + 0.121911, + "\b\b\u001b[1m\u001b[31me\u001b[1m\u001b[31mx\u001b[1m\u001b[31mp\u001b[0m\u001b[39m" + ], + [ + 0.15502, + "\b\u001b[1m\u001b[31mp\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.26241, + "\b\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.126933, + "\b\b\b\b\b\u001b[0m\u001b[33me\u001b[0m\u001b[33mx\u001b[0m\u001b[33mp\u001b[0m\u001b[33mo\u001b[0m\u001b[33mr\u001b[33mt\u001b[39m" + ], + [ + 0.192182, + " " + ], + [ + 0.304561, + "B" + ], + [ + 0.192073, + "O" + ], + [ + 0.136183, + "R" + ], + [ + 0.114362, + "G" + ], + [ + 0.576349, + "_" + ], + [ + 0.103719, + "R" + ], + [ + 0.113626, + "E" + ], + [ + 0.159395, + "P" + ], + [ + 0.141942, + "O" + ], + [ + 0.554082, + "=" + ], + [ + 0.74644, + "'" + ], + [ + 0.69222, + "/" + ], + [ + 0.20093, + "m" + ], + [ + 0.108068, + "e" + ], + [ + 0.125576, + "d" + ], + [ + 0.161298, + "i" + ], + [ + 0.107949, + "a" + ], + [ + 0.423969, + "/" + ], + [ + 0.623591, + "b" + ], + [ + 0.102775, + "a" + ], + [ + 0.146442, + "c" + ], + [ + 0.116202, + "k" + ], + [ + 0.133034, + "u" + ], + [ + 0.282831, + "p" + ], + [ + 0.436512, + "/" + ], + [ + 0.551147, + "b" + ], + [ + 0.208373, + "o" + ], + [ + 0.108883, + "r" + ], + [ + 0.137272, + "g" + ], + [ + 0.218057, + "d" + ], + [ + 0.122586, + "e" + ], + [ + 0.133605, + "m" + ], + [ + 0.170095, + "o" + ], + [ + 0.795644, + "'" + ], + [ + 0.928899, + "\u001b[?1l\u001b>" + ], + [ + 0.001469, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000802, + "\u001b]2;export BORG_REPO='/media/backup/borgdemo' \u0007\u001b]1;export\u0007" + ], + [ + 0.000109, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001347, + "\u001b]7;\u0007" + ], + [ + 0.001006, + "\u001b]7;\u0007" + ], + [ + 5.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.2e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000186, + "\u001b[?2004h" + ], + [ + 0.718289, + "\u001b[1m\u001b[31me\u001b[0m\u001b[39m" + ], + [ + 0.19628, + "\b\u001b[0m\u001b[32me\u001b[32mx\u001b[39m" + ], + [ + 0.269637, + "\b\b\u001b[1m\u001b[31me\u001b[1m\u001b[31mx\u001b[1m\u001b[31mp\u001b[0m\u001b[39m" + ], + [ + 0.164388, + "\b\u001b[1m\u001b[31mp\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.332999, + "\b\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.121063, + "\b\b\b\b\b\u001b[0m\u001b[33me\u001b[0m\u001b[33mx\u001b[0m\u001b[33mp\u001b[0m\u001b[33mo\u001b[0m\u001b[33mr\u001b[33mt\u001b[39m" + ], + [ + 0.265335, + " " + ], + [ + 0.311313, + "B" + ], + [ + 0.205307, + "O" + ], + [ + 0.159682, + "R" + ], + [ + 0.141683, + "G" + ], + [ + 0.553563, + "_" + ], + [ + 0.225583, + "P" + ], + [ + 0.10739, + "A" + ], + [ + 0.204722, + "S" + ], + [ + 0.145905, + "S" + ], + [ + 0.312666, + "P" + ], + [ + 0.311469, + "H" + ], + [ + 0.209393, + "R" + ], + [ + 0.069618, + "A" + ], + [ + 0.208505, + "S" + ], + [ + 0.202229, + "E" + ], + [ + 0.719142, + "=" + ], + [ + 0.61979, + "'" + ], + [ + 0.414834, + "1" + ], + [ + 0.208777, + "2" + ], + [ + 0.193519, + "3" + ], + [ + 0.171001, + "4" + ], + [ + 0.542373, + "'" + ], + [ + 0.876006, + "\u001b[?1l\u001b>" + ], + [ + 0.002877, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001161, + "\u001b]2;export BORG_PASSPHRASE='1234' \u0007\u001b]1;export\u0007" + ], + [ + 8.5e-05, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.003438, + "\u001b]7;\u0007" + ], + [ + 0.002065, + "\u001b]7;\u0007" + ], + [ + 0.000146, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000508, + "\u001b[?2004h" + ], + [ + 1.238676, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.273221, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.248131, + "\b\b\u001b[1m#\u001b[1m \u001b[1mP\u001b[0m\u001b[39m" + ], + [ + 0.142137, + "\b\u001b[1mP\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.089312, + "\b\u001b[1mr\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.19919, + "\b\u001b[1mo\u001b[1mb\u001b[0m\u001b[39m" + ], + [ + 0.207691, + "\b\u001b[1mb\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.105529, + "\b\u001b[1ml\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.075159, + "\b\u001b[1me\u001b[1mm\u001b[0m\u001b[39m" + ], + [ + 0.625428, + "\b\u001b[1mm\u001b[1m solved, borg will use this automatically… :)\u001b[0m\u001b[39m" + ], + [ + 0.442303, + "\u001b[?1l\u001b>" + ], + [ + 0.0004, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00077, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001065, + "\u001b]7;\u0007" + ], + [ + 0.001105, + "\u001b]7;\u0007" + ], + [ + 2.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000266, + "\u001b[?2004h" + ], + [ + 1.570802, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.218966, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.191279, + "\b\b\u001b[1m#\u001b[1m \u001b[1mW\u001b[0m\u001b[39m" + ], + [ + 0.144698, + "\b\u001b[1mW\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.313061, + "\b\u001b[1me\u001b[1m'\u001b[0m\u001b[39m" + ], + [ + 0.245196, + "\b\u001b[1m'\u001b[1mll use this right away…\u001b[0m\u001b[39m" + ], + [ + 0.532339, + "\u001b[?1l\u001b>" + ], + [ + 0.000412, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00062, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001169, + "\u001b]7;\u0007" + ], + [ + 0.00087, + "\u001b]7;\u0007" + ], + [ + 2.3e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000101, + "\u001b[?1h\u001b=" + ], + [ + 0.000279, + "\u001b[?2004h" + ], + [ + 0.63892, + "\u001b[?1l\u001b>" + ], + [ + 0.000369, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00044, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.002911, + "\u001b]7;\u0007" + ], + [ + 0.002442, + "\u001b]7;\u0007" + ], + [ + 0.000162, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00059, + "\u001b[?2004h" + ], + [ + 0.548725, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.113549, + "\b\u001b[1m#\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.290577, + "\b\b\u001b[1m#\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.262532, + "\b\u001b[1m \u001b[1mA\u001b[0m\u001b[39m" + ], + [ + 0.41846, + "\b\u001b[1mA\u001b[1mDVANCED CREATION ##\u001b[0m\u001b[39m" + ], + [ + 0.535376, + "\u001b[?1l\u001b>" + ], + [ + 0.001234, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000938, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.002912, + "\u001b]7;\u0007" + ], + [ + 0.001987, + "\u001b]7;\u0007" + ], + [ + 7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000134, + "\u001b[?1h\u001b=" + ], + [ + 0.000671, + "\u001b[?2004h" + ], + [ + 0.759129, + "\u001b[?1l\u001b>" + ], + [ + 0.000397, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000757, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001297, + "\u001b]7;\u0007" + ], + [ + 0.00131, + "\u001b]7;\u0007" + ], + [ + 3.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.0001, + "\u001b[?1h\u001b=" + ], + [ + 0.000135, + "\u001b[?2004h" + ], + [ + 0.425509, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.233111, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.185443, + "\b\b\u001b[1m#\u001b[1m \u001b[1mW\u001b[0m\u001b[39m" + ], + [ + 0.151433, + "\b\u001b[1mW\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.157168, + "\b\u001b[1me\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.148414, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.200586, + "\b\u001b[1mc\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.145343, + "\b\u001b[1ma\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.414343, + "\b\u001b[1mn\u001b[1m also use some placeholders in our archive name…\u001b[0m\u001b[39m" + ], + [ + 1.198174, + "\u001b[?1l\u001b>" + ], + [ + 0.000433, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000647, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001494, + "\u001b]7;\u0007" + ], + [ + 0.001069, + "\u001b]7;\u0007" + ], + [ + 8.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 4.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000395, + "\u001b[?2004h" + ], + [ + 0.832499, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.186742, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.076839, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.15706, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.175773, + " " + ], + [ + 0.265231, + "c" + ], + [ + 0.198791, + "r" + ], + [ + 0.162497, + "e" + ], + [ + 0.08856, + "a" + ], + [ + 0.135865, + "t" + ], + [ + 0.112707, + "e" + ], + [ + 0.634063, + " " + ], + [ + 0.621186, + "-" + ], + [ + 0.112118, + "-" + ], + [ + 0.270276, + "s" + ], + [ + 0.135637, + "t" + ], + [ + 0.130994, + "a" + ], + [ + 0.086801, + "t" + ], + [ + 0.119778, + "s" + ], + [ + 0.24882, + " " + ], + [ + 0.47677, + "-" + ], + [ + 0.112232, + "-" + ], + [ + 0.26855, + "p" + ], + [ + 0.218974, + "r" + ], + [ + 0.14527, + "o" + ], + [ + 0.21975, + "g" + ], + [ + 0.104406, + "r" + ], + [ + 0.168975, + "e" + ], + [ + 0.224875, + "s" + ], + [ + 0.161557, + "s" + ], + [ + 0.556139, + " " + ], + [ + 0.90841, + "-" + ], + [ + 0.117065, + "-" + ], + [ + 0.268496, + "c" + ], + [ + 0.118758, + "o" + ], + [ + 0.13892, + "m" + ], + [ + 0.17322, + "p" + ], + [ + 0.146756, + "r" + ], + [ + 0.196139, + "e" + ], + [ + 0.249655, + "s" + ], + [ + 0.157202, + "s" + ], + [ + 0.236521, + "i" + ], + [ + 0.120624, + "o" + ], + [ + 0.175143, + "n" + ], + [ + 0.321073, + " " + ], + [ + 0.249849, + "l" + ], + [ + 0.281988, + "z" + ], + [ + 0.281179, + "4" + ], + [ + 1.223567, + " " + ], + [ + 0.604439, + ":" + ], + [ + 0.099497, + ":" + ], + [ + 0.760652, + "{" + ], + [ + 0.504646, + "u" + ], + [ + 0.249702, + "s" + ], + [ + 0.310204, + "e" + ], + [ + 0.156776, + "r" + ], + [ + 0.927624, + "}" + ], + [ + 0.972074, + "-" + ], + [ + 0.979824, + "{" + ], + [ + 0.397346, + "n" + ], + [ + 0.195251, + "o" + ], + [ + 0.203266, + "w" + ], + [ + 0.716944, + "}" + ], + [ + 0.992466, + " " + ], + [ + 0.404348, + "\u001b[4mW\u001b[24m" + ], + [ + 0.098053, + "\b\u001b[4mW\u001b[4ma\u001b[24m \b" + ], + [ + 0.440872, + "\b\u001b[4ma\u001b[4ml\u001b[24m" + ], + [ + 0.130433, + "\b\u001b[4ml\u001b[4ml\u001b[24m" + ], + [ + 0.079918, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.009903, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m create --stats --progress --compression lz4 ::{user}-{now} \u001b[4mWallpaper\u001b[24m\u001b[K" + ], + [ + 1.432747, + "\u001b[?1l\u001b>" + ], + [ + 0.006238, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001309, + "\u001b]2;borg create --stats --progress --compression lz4 ::{user}-{now} Wallpaper\u0007\u001b]1;borg\u0007" + ], + [ + 0.703285, + "0 B O 0 B C 0 B D 0 N Wallpaper \r" + ], + [ + 0.059704, + "Initializing cache transaction: Reading config \r" + ], + [ + 0.000259, + "Initializing cache transaction: Reading chunks \r" + ], + [ + 0.000283, + "Initializing cache transaction: Reading files \r" + ], + [ + 0.00035, + " \r" + ], + [ + 0.302813, + "Compacting segments 0% \r" + ], + [ + 0.000422, + "Compacting segments 50% \r" + ], + [ + 2.6e-05, + " \r" + ], + [ + 0.053481, + "Saving files cache \r" + ], + [ + 0.010102, + "Saving chunks cache \r" + ], + [ + 0.000354, + "Saving cache config \r" + ], + [ + 0.08865, + " \r" + ], + [ + 2.6e-05, + " \r" + ], + [ + 0.000371, + "------------------------------------------------------------------------------\r\n" + ], + [ + 3.4e-05, + "Archive name: rugk-2017-07-16T18:51:34\r\n" + ], + [ + 8e-06, + "Archive fingerprint: d054cc411324d4bd848b39d1c9cad909073f9ff1a1a503a676d3e050be140396\r\n" + ], + [ + 0.000101, + "Time (start): Sun, 2017-07-16 18:51:34\r\nTime (end): Sun, 2017-07-16 18:51:35\r\n" + ], + [ + 7.5e-05, + "Duration: 0.18 seconds\r\nNumber of files: 1\r\n" + ], + [ + 8.8e-05, + "Utilization of maximum supported archive size: 0%\r\n" + ], + [ + 7e-05, + "------------------------------------------------------------------------------\r\n Original size Compressed size Deduplicated size\r\n" + ], + [ + 1.6e-05, + "This archive: 3.78 MB 3.80 MB 916 B\r\n" + ], + [ + 5.2e-05, + "All archives: 1.86 GB 1.86 GB 561.88 MB\r\n" + ], + [ + 1.3e-05, + "\r\n" + ], + [ + 2.4e-05, + " Unique chunks Total chunks\r\n" + ], + [ + 2.4e-05, + "Chunk index: 1008 3288\r\n" + ], + [ + 2.4e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 0.049018, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00124, + "\u001b]7;\u0007" + ], + [ + 0.000936, + "\u001b]7;\u0007" + ], + [ + 0.000124, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00019, + "\u001b[?2004h" + ], + [ + 0.814358, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.326066, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.279288, + "\b\b\u001b[1m#\u001b[1m \u001b[1mN\u001b[0m\u001b[39m" + ], + [ + 0.200695, + "\b\u001b[1mN\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.2241, + "\b\u001b[1mo\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.221056, + "\b\u001b[1mt\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.341582, + "\b\u001b[1mi\u001b[1mce the backup name.\u001b[0m\u001b[39m" + ], + [ + 1.40396, + "\u001b[?1l\u001b>" + ], + [ + 0.000442, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000701, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00108, + "\u001b]7;\u0007" + ], + [ + 0.000942, + "\u001b]7;\u0007" + ], + [ + 5e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 4.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00028, + "\u001b[?2004h" + ], + [ + 1.540998, + "\u001b[?1l\u001b>" + ], + [ + 0.000288, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000571, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.0013, + "\u001b]7;\u0007" + ], + [ + 0.000852, + "\u001b]7;\u0007" + ], + [ + 0.000106, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000164, + "\u001b[?2004h" + ], + [ + 0.402376, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.27499, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.220032, + "\b\b\u001b[1m#\u001b[1m \u001b[1mA\u001b[0m\u001b[39m" + ], + [ + 0.127907, + "\b\u001b[1mA\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.092357, + "\b\u001b[1mn\u001b[1md\u001b[0m\u001b[39m" + ], + [ + 0.145572, + "\b\u001b[1md\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.222962, + "\b\u001b[1m \u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.178534, + "\b\u001b[1mw\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.196668, + "\b\u001b[1me\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.225933, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.175493, + "\b\u001b[1mc\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.119503, + "\b\u001b[1ma\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.425112, + "\b\u001b[1mn\u001b[1m put completely different data, with different backup settings, i\u001b[1mn\u001b[1m our backup. It will be deduplicated, anyway:\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.421849, + "\u001b[?1l\u001b>" + ], + [ + 0.000749, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00066, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00197, + "\u001b]7;\u0007" + ], + [ + 0.001476, + "\u001b]7;\u0007" + ], + [ + 5.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000124, + "\u001b[?1h\u001b=" + ], + [ + 0.000525, + "\u001b[?2004h" + ], + [ + 1.444268, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.209812, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.118788, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.145792, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.20446, + " " + ], + [ + 0.309592, + "c" + ], + [ + 0.201447, + "r" + ], + [ + 0.151315, + "e" + ], + [ + 0.084953, + "a" + ], + [ + 0.156918, + "t" + ], + [ + 0.091724, + "e" + ], + [ + 0.324287, + " " + ], + [ + 0.861486, + "-" + ], + [ + 0.134231, + "-" + ], + [ + 0.491182, + "s" + ], + [ + 0.195253, + "t" + ], + [ + 0.097572, + "a" + ], + [ + 0.09545, + "t" + ], + [ + 0.111782, + "s" + ], + [ + 0.301387, + " " + ], + [ + 0.524478, + "-" + ], + [ + 0.112538, + "-" + ], + [ + 0.397406, + "p" + ], + [ + 0.175509, + "r" + ], + [ + 0.203203, + "o" + ], + [ + 0.257392, + "g" + ], + [ + 0.1453, + "r" + ], + [ + 0.174285, + "e" + ], + [ + 0.353531, + "s" + ], + [ + 0.176989, + "s" + ], + [ + 0.386157, + " " + ], + [ + 0.510691, + "-" + ], + [ + 0.115919, + "-" + ], + [ + 0.225102, + "c" + ], + [ + 0.145577, + "o" + ], + [ + 0.133821, + "m" + ], + [ + 0.171364, + "p" + ], + [ + 0.157255, + "r" + ], + [ + 0.162989, + "e" + ], + [ + 0.256274, + "s" + ], + [ + 0.167254, + "s" + ], + [ + 0.253369, + "i" + ], + [ + 0.1197, + "o" + ], + [ + 0.178105, + "n" + ], + [ + 0.824434, + " " + ], + [ + 0.734608, + "z" + ], + [ + 0.237239, + "l" + ], + [ + 0.158877, + "i" + ], + [ + 0.148988, + "b" + ], + [ + 0.289236, + "," + ], + [ + 0.349273, + "6" + ], + [ + 0.618231, + " " + ], + [ + 0.449031, + "-" + ], + [ + 0.119307, + "-" + ], + [ + 0.451923, + "e" + ], + [ + 0.330743, + "x" + ], + [ + 0.232655, + "c" + ], + [ + 0.197384, + "l" + ], + [ + 0.176276, + "u" + ], + [ + 0.104427, + "d" + ], + [ + 0.141163, + "e" + ], + [ + 0.359309, + " " + ], + [ + 1.198529, + "\u001b[4m~\u001b[24m" + ], + [ + 0.338729, + "\b\u001b[4m~\u001b[4m/\u001b[24m" + ], + [ + 0.352573, + "\b\u001b[4m/\u001b[4mD\u001b[24m" + ], + [ + 0.190254, + "\b\u001b[4mD\u001b[4mo\u001b[24m" + ], + [ + 0.113631, + "\b\u001b[4mo\u001b[4mw\u001b[24m" + ], + [ + 0.743216, + "\b\u001b[4mw\u001b[4mn\u001b[24m" + ], + [ + 0.613852, + "\b\u001b[4mn\u001b[4ml\u001b[24m" + ], + [ + 0.121501, + "\b\u001b[4ml\u001b[4mo\u001b[24m" + ], + [ + 0.068625, + "\b\u001b[4mo\u001b[4ma\u001b[24m" + ], + [ + 0.183855, + "\b\u001b[4ma\u001b[4md\u001b[24m" + ], + [ + 0.152099, + "\b\u001b[4md\u001b[4ms\u001b[24m" + ], + [ + 0.793349, + "\b\u001b[4ms\u001b[4m/\u001b[24m" + ], + [ + 0.477575, + "\b\u001b[4m/\u001b[4mb\u001b[24m" + ], + [ + 0.198072, + "\b\u001b[4mb\u001b[4mi\u001b[24m \r\u001b[K" + ], + [ + 0.175276, + "\u001b[A\u001b[77C\u001b[4mi\u001b[4mg\u001b[24m" + ], + [ + 0.647369, + "\r\u001b[4mg\u001b[24m " + ], + [ + 0.439418, + ":" + ], + [ + 0.108932, + ":" + ], + [ + 0.556615, + "{" + ], + [ + 0.244626, + "u" + ], + [ + 0.097534, + "s" + ], + [ + 0.187502, + "e" + ], + [ + 0.16023, + "r" + ], + [ + 0.675542, + "}" + ], + [ + 0.988946, + "-" + ], + [ + 0.545789, + "{" + ], + [ + 0.33121, + "n" + ], + [ + 0.204667, + "o" + ], + [ + 0.141818, + "w" + ], + [ + 0.397217, + "}" + ], + [ + 0.979478, + " " + ], + [ + 0.768118, + "\u001b[4m~\u001b[24m" + ], + [ + 0.589532, + "\b\u001b[4m~\u001b[4m/\u001b[24m" + ], + [ + 0.515186, + "\b\u001b[4m/\u001b[4mD\u001b[24m" + ], + [ + 0.17703, + "\b\u001b[4mD\u001b[4mo\u001b[24m" + ], + [ + 0.121294, + "\b\u001b[4mo\u001b[4mw\u001b[24m" + ], + [ + 0.153543, + "\b\u001b[4mw\u001b[4mn\u001b[24m" + ], + [ + 0.282343, + "\b\u001b[4mn\u001b[4ml\u001b[24m" + ], + [ + 0.129573, + "\b\u001b[4ml\u001b[4mo\u001b[24m" + ], + [ + 0.095125, + "\b\u001b[4mo\u001b[4ma\u001b[24m" + ], + [ + 0.19963, + "\b\u001b[4ma\u001b[4md\u001b[24m" + ], + [ + 0.142667, + "\b\u001b[4md\u001b[4ms\u001b[24m" + ], + [ + 1.499285, + "\u001b[?1l\u001b>" + ], + [ + 0.003081, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000637, + "\u001b]2;borg create --stats --progress --compression zlib,6 --exclude ~/Downloads/big\u0007\u001b]1;borg\u0007" + ], + [ + 0.687457, + "0 B O 0 B C 0 B D 0 N home/rugk/Downloads \r" + ], + [ + 0.025551, + "Initializing cache transaction: Reading config \r" + ], + [ + 0.000326, + "Initializing cache transaction: Reading chunks \r" + ], + [ + 0.000273, + "Initializing cache transaction: Reading files \r" + ], + [ + 0.000394, + " \r" + ], + [ + 0.220691, + "1.31 MB O 1.29 MB C 1.29 MB D 1 N home/rugk/Downloads...chiveWithStuffHere.zip\r" + ], + [ + 0.26224, + "7.70 MB O 6.91 MB C 6.91 MB D 2 N home/rugk/Downloads...droid.gms-11059462.apk\r" + ], + [ + 0.32599, + "Compacting segments 0% \r" + ], + [ + 0.026073, + "Compacting segments 50% \r" + ], + [ + 0.001982, + " \r" + ], + [ + 0.058565, + "Saving files cache \r" + ], + [ + 0.011363, + "Saving chunks cache \r" + ], + [ + 0.000378, + "Saving cache config \r" + ], + [ + 0.12955, + " \r" + ], + [ + 3.4e-05, + " \r" + ], + [ + 0.00039, + "------------------------------------------------------------------------------\r\n" + ], + [ + 1.6e-05, + "Archive name: rugk-2017-07-16T18:52:19\r\n" + ], + [ + 3.1e-05, + "Archive fingerprint: 0de98f590b004ad7545f2013c4c9f2d4e3eed1415d177c89d6c2b7ff05918d2e\r\n" + ], + [ + 2.2e-05, + "Time (start): Sun, 2017-07-16 18:52:19\r\n" + ], + [ + 7.2e-05, + "Time (end): Sun, 2017-07-16 18:52:20\r\nDuration: 0.63 seconds\r\n" + ], + [ + 3e-05, + "Number of files: 6\r\n" + ], + [ + 2.5e-05, + "Utilization of maximum supported archive size: 0%\r\n" + ], + [ + 2.4e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 1.8e-05, + " Original size Compressed size Deduplicated size\r\n" + ], + [ + 2.5e-05, + "This archive: 9.55 MB 8.04 MB 8.04 MB\r\n" + ], + [ + 2.4e-05, + "All archives: 1.87 GB 1.86 GB 569.92 MB\r\n" + ], + [ + 2.5e-05, + "\r\n" + ], + [ + 2.4e-05, + " Unique chunks Total chunks\r\n" + ], + [ + 2.4e-05, + "Chunk index: 1023 3303\r\n" + ], + [ + 2.4e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 0.063104, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001326, + "\u001b]7;\u0007" + ], + [ + 0.001145, + "\u001b]7;\u0007" + ], + [ + 8.9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.0002, + "\u001b[?2004h" + ], + [ + 3.131399, + "\u001b[?1l\u001b>" + ], + [ + 0.000281, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00048, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001354, + "\u001b]7;\u0007" + ], + [ + 0.000923, + "\u001b]7;\u0007" + ], + [ + 6.6e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 5.4e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000161, + "\u001b[?2004h" + ], + [ + 0.285262, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.419379, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.277555, + "\b\b\u001b[1m#\u001b[1m \u001b[1mO\u001b[0m\u001b[39m" + ], + [ + 0.015676, + "\b\u001b[1mO\u001b[0m\u001b[39m" + ], + [ + 0.119839, + "\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.315418, + "\b\u001b[1mr\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.224426, + "\b\u001b[1m \u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.10624, + "\b\u001b[1ml\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.170324, + "\b\u001b[1me\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.995665, + "\b\u001b[1mt\u001b[1m'\u001b[0m\u001b[39m" + ], + [ + 0.139331, + "\b\u001b[1m'\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.174188, + "\b\u001b[1ms\u001b[1m backup a device via STDIN.\u001b[0m\u001b[39m" + ], + [ + 1.117059, + "\u001b[?1l\u001b>" + ], + [ + 0.000376, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000566, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001272, + "\u001b]7;\u0007" + ], + [ + 0.000893, + "\u001b]7;\u0007" + ], + [ + 8.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 3.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000291, + "\u001b[?2004h" + ], + [ + 2.390246, + "\u001b[1m\u001b[31ms\u001b[0m\u001b[39m" + ], + [ + 0.179283, + "\b\u001b[0m\u001b[32ms\u001b[32mu\u001b[39m" + ], + [ + 0.08919, + "\b\b\u001b[1m\u001b[31ms\u001b[1m\u001b[31mu\u001b[1m\u001b[31md\u001b[0m\u001b[39m" + ], + [ + 0.156134, + "\b\b\b\u001b[0m\u001b[4m\u001b[32ms\u001b[0m\u001b[4m\u001b[32mu\u001b[0m\u001b[4m\u001b[32md\u001b[4m\u001b[32mo\u001b[24m\u001b[39m" + ], + [ + 0.939511, + " " + ], + [ + 0.219491, + "\u001b[32md\u001b[39m" + ], + [ + 0.128817, + "\b\u001b[32md\u001b[32md\u001b[39m" + ], + [ + 0.317081, + " " + ], + [ + 0.206442, + "i" + ], + [ + 0.127682, + "f" + ], + [ + 0.497718, + "=" + ], + [ + 0.79125, + "/" + ], + [ + 0.162326, + "d" + ], + [ + 0.141147, + "e" + ], + [ + 0.17081, + "v" + ], + [ + 0.229501, + "/" + ], + [ + 0.309668, + "s" + ], + [ + 0.201626, + "d" + ], + [ + 0.121565, + "x" + ], + [ + 1.112764, + " " + ], + [ + 0.458342, + "b" + ], + [ + 0.13412, + "s" + ], + [ + 0.426796, + "=" + ], + [ + 0.325514, + "1" + ], + [ + 0.182735, + "0" + ], + [ + 0.635284, + "M" + ], + [ + 0.571527, + " " + ], + [ + 0.644682, + "|" + ], + [ + 0.668689, + " " + ], + [ + 0.368219, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.197192, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.069454, + "\b\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.15983, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.193693, + " " + ], + [ + 0.342177, + "c" + ], + [ + 0.213502, + "r" + ], + [ + 0.165989, + "e" + ], + [ + 0.101269, + "a" + ], + [ + 0.20561, + "t" + ], + [ + 0.172574, + "e" + ], + [ + 0.302751, + " " + ], + [ + 0.524261, + "-" + ], + [ + 0.112867, + "-" + ], + [ + 0.358854, + "p" + ], + [ + 0.158933, + "r" + ], + [ + 0.146881, + "o" + ], + [ + 0.235592, + "g" + ], + [ + 0.153909, + "r" + ], + [ + 0.187519, + "e" + ], + [ + 0.278997, + "s" + ], + [ + 0.161351, + "s" + ], + [ + 0.536239, + " " + ], + [ + 0.472536, + "-" + ], + [ + 0.103445, + "-" + ], + [ + 0.315142, + "s" + ], + [ + 0.188015, + "t" + ], + [ + 0.092463, + "a" + ], + [ + 0.121697, + "t" + ], + [ + 0.108331, + "s" + ], + [ + 0.863705, + " " + ], + [ + 0.547363, + ":" + ], + [ + 0.101957, + ":" + ], + [ + 0.713103, + "s" + ], + [ + 0.172527, + "p" + ], + [ + 0.143374, + "e" + ], + [ + 0.495475, + "c" + ], + [ + 0.184747, + "i" + ], + [ + 0.118626, + "a" + ], + [ + 0.21782, + "l" + ], + [ + 0.61779, + "b" + ], + [ + 0.056813, + "a" + ], + [ + 0.18761, + "c" + ], + [ + 0.116227, + "k" + ], + [ + 0.143399, + "u \r\u001b[K" + ], + [ + 0.31621, + "p" + ], + [ + 0.174943, + "\rp " + ], + [ + 0.964699, + "-" + ], + [ + 1.23368, + "\u001b[?1l\u001b>" + ], + [ + 0.003628, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000824, + "\u001b]2;sudo dd if=/dev/sdx bs=10M | borg create --progress --stats ::specialbackup\u0007\u001b]1;dd\u0007" + ], + [ + 0.023411, + "[sudo] password for rugk: " + ], + [ + 3.286582, + "\r\n" + ], + [ + 0.077852, + "Initializing cache transaction: Reading config \r" + ], + [ + 0.000267, + "Initializing cache transaction: Reading chunks \r" + ], + [ + 0.000293, + "Initializing cache transaction: Reading files \r" + ], + [ + 0.00045, + " \r" + ], + [ + 0.083816, + "8.39 MB O 34.25 kB C 34.25 kB D 0 N stdin \r" + ], + [ + 0.228267, + "41.94 MB O 166.40 kB C 100.50 kB D 0 N stdin \r" + ], + [ + 0.216716, + "75.50 MB O 298.20 kB C 100.50 kB D 0 N stdin \r" + ], + [ + 0.218476, + "109.05 MB O 430.00 kB C 100.50 kB D 0 N stdin \r" + ], + [ + 0.219164, + "142.61 MB O 562.12 kB C 133.77 kB D 0 N stdin \r" + ], + [ + 0.216368, + "176.16 MB O 693.92 kB C 133.77 kB D 0 N stdin \r" + ], + [ + 0.222311, + "209.72 MB O 825.72 kB C 133.77 kB D 0 N stdin \r" + ], + [ + 0.217156, + "243.27 MB O 957.52 kB C 133.77 kB D 0 N stdin \r" + ], + [ + 0.22399, + "276.82 MB O 1.09 MB C 166.77 kB D 0 N stdin \r" + ], + [ + 0.223827, + "310.38 MB O 1.22 MB C 166.77 kB D 0 N stdin \r" + ], + [ + 0.220959, + "343.93 MB O 1.35 MB C 166.77 kB D 0 N stdin \r" + ], + [ + 0.223439, + "377.49 MB O 1.48 MB C 166.77 kB D 0 N stdin \r" + ], + [ + 0.226226, + "411.04 MB O 1.62 MB C 200.04 kB D 0 N stdin \r" + ], + [ + 0.239743, + "444.60 MB O 1.75 MB C 200.04 kB D 0 N stdin \r" + ], + [ + 0.229508, + "478.15 MB O 1.88 MB C 200.04 kB D 0 N stdin \r" + ], + [ + 0.220491, + "511.71 MB O 2.01 MB C 200.04 kB D 0 N stdin \r" + ], + [ + 0.2504, + "545.26 MB O 2.14 MB C 200.04 kB D 0 N stdin \r" + ], + [ + 0.241044, + "578.81 MB O 2.28 MB C 200.04 kB D 0 N stdin \r" + ], + [ + 0.215372, + "612.37 MB O 2.41 MB C 200.04 kB D 0 N stdin \r" + ], + [ + 0.113508, + "60+0 records in\r\n60+0 records out\r\n" + ], + [ + 3.9e-05, + "629145600 bytes (629 MB, 600 MiB) copied, 4.31277 s, 146 MB/s\r\n" + ], + [ + 0.231874, + "Compacting segments 0% \r" + ], + [ + 0.001188, + "Compacting segments 50% \r" + ], + [ + 3.7e-05, + " \r" + ], + [ + 0.078344, + "Saving chunks cache \r" + ], + [ + 0.000348, + "Saving cache config \r" + ], + [ + 0.087821, + " \r" + ], + [ + 2.8e-05, + " \r" + ], + [ + 0.000346, + "------------------------------------------------------------------------------\r\n" + ], + [ + 2.2e-05, + "Archive name: specialbackup\r\n" + ], + [ + 9.7e-05, + "Archive fingerprint: 68e942cc4a48402e48ba87f4887c24e5b9fe06e881b0ca241c791810a108bec0\r\nTime (start): Sun, 2017-07-16 18:52:58\r\n" + ], + [ + 0.000133, + "Time (end): Sun, 2017-07-16 18:53:05\r\nDuration: 6.99 seconds\r\n" + ], + [ + 1.3e-05, + "Number of files: 1\r\n" + ], + [ + 2.2e-05, + "Utilization of maximum supported archive size: 0%\r\n" + ], + [ + 7.3e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 1.1e-05, + " Original size Compressed size Deduplicated size\r\n" + ], + [ + 2.7e-05, + "This archive: 629.15 MB 2.47 MB 234.02 kB\r\n" + ], + [ + 3.3e-05, + "All archives: 2.50 GB 1.87 GB 570.15 MB\r\n" + ], + [ + 3.3e-05, + "\r\n" + ], + [ + 3.3e-05, + " Unique chunks Total chunks\r\n" + ], + [ + 2.4e-05, + "Chunk index: 1032 3380\r\n" + ], + [ + 2.4e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 0.047256, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001383, + "\u001b]7;\u0007" + ], + [ + 0.001024, + "\u001b]7;\u0007" + ], + [ + 8.3e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ " + ], + [ + 7e-06, + "\u001b[K" + ], + [ + 7.1e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00021, + "\u001b[?2004h" + ], + [ + 3.669021, + "\u001b[?1l\u001b>" + ], + [ + 0.000291, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000719, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001178, + "\u001b]7;\u0007" + ], + [ + 0.0009, + "\u001b]7;\u0007" + ], + [ + 9.6e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00022, + "\u001b[?2004h" + ], + [ + 0.311851, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.290767, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.23476, + "\b\b\u001b[1m#\u001b[1m \u001b[1mL\u001b[0m\u001b[39m" + ], + [ + 0.188456, + "\b\u001b[1mL\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.139916, + "\b\u001b[1me\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.522516, + "\b\u001b[1mt\u001b[1m'\u001b[0m\u001b[39m" + ], + [ + 0.157443, + "\b\u001b[1m'\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.460729, + "\b\u001b[1ms\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.1201, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.324466, + "\b\u001b[1mc\u001b[1montinue with some simple things:\u001b[0m\u001b[39m" + ], + [ + 0.634167, + "\u001b[?1l\u001b>" + ], + [ + 0.000434, + "\u001b[?2004l\r\r\n" + ], + [ + 0.0006, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00124, + "\u001b]7;\u0007" + ], + [ + 0.001113, + "\u001b]7;\u0007" + ], + [ + 0.00012, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000136, + "\u001b[?1h\u001b=" + ], + [ + 0.000274, + "\u001b[?2004h" + ], + [ + 1.724466, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.116327, + "\b\u001b[1m#\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.26172, + "\b\b\u001b[1m#\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.250198, + "\b\u001b[1m \u001b[1mU\u001b[0m\u001b[39m" + ], + [ + 0.746624, + "\b\u001b[1mU\u001b[1mSEFUL COMMANDS ##\u001b[0m\u001b[39m" + ], + [ + 0.5602, + "\u001b[?1l\u001b>" + ], + [ + 0.001411, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001009, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.003137, + "\u001b]7;\u0007" + ], + [ + 0.002454, + "\u001b]7;\u0007" + ], + [ + 0.000167, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000157, + "\u001b[?1h\u001b=" + ], + [ + 0.000746, + "\u001b[?2004h" + ], + [ + 1.207899, + "\u001b[?1l\u001b>" + ], + [ + 0.000322, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000472, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001289, + "\u001b]7;\u0007" + ], + [ + 0.000891, + "\u001b]7;\u0007" + ], + + [ + 9.5e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ " + ], + [ + 1.8e-05, + "\u001b[K" + ], + [ + 0.000115, + "\u001b[?1h\u001b=" + ], + [ + 0.000246, + "\u001b[?2004h" + ], + [ + 0.734707, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.247085, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.182467, + "\b\b\u001b[1m#\u001b[1m \u001b[1mY\u001b[0m\u001b[39m" + ], + [ + 0.123582, + "\b\u001b[1mY\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.16343, + "\b\u001b[1mo\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.183388, + "\b\u001b[1mu\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.083055, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.187526, + "\b\u001b[1mc\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.130988, + "\b\u001b[1ma\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.142246, + "\b\u001b[1mn\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.145489, + "\b\u001b[1m \u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.132155, + "\b\u001b[1ms\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.192915, + "\b\u001b[1mh\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.142644, + "\b\u001b[1mo\u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.149707, + "\b\u001b[1mw\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.134515, + "\b\u001b[1m \u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.085942, + "\b\u001b[1ms\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.160772, + "\b\u001b[1mo\u001b[1mm\u001b[0m\u001b[39m" + ], + [ + 0.132016, + "\b\u001b[1mm\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.219601, + "\b\u001b[1me\u001b[1m information about an archive. You can even do it without \u001b[1mn\u001b[1meeding to specify the archive name:\u001b[0m\u001b[39m\u001b[K" + ], + [ + 0.644657, + "\u001b[?1l\u001b>" + ], + [ + 0.000392, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000705, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001347, + "\u001b]7;\u0007" + ], + [ + 0.001099, + "\u001b]7;\u0007" + ], + [ + 4.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.0001, + "\u001b[?1h\u001b=" + ], + [ + 0.000372, + "\u001b[?2004h" + ], + [ + 2.264862, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.182056, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.083939, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.152072, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.142791, + " " + ], + [ + 0.224315, + "i" + ], + [ + 0.130651, + "n" + ], + [ + 0.100647, + "f" + ], + [ + 0.155636, + "o" + ], + [ + 0.716063, + " " + ], + [ + 0.736635, + ":" + ], + [ + 0.107352, + ":" + ], + [ + 0.289804, + " " + ], + [ + 0.436564, + "-" + ], + [ + 0.131871, + "-" + ], + [ + 0.824072, + "l" + ], + [ + 0.061945, + "a" + ], + [ + 0.136723, + "s" + ], + [ + 0.143197, + "t" + ], + [ + 0.186833, + " " + ], + [ + 0.125784, + "1" + ], + [ + 0.924568, + "\u001b[?1l\u001b>" + ], + [ + 0.002555, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00096, + "\u001b]2;borg info :: --last 1\u0007\u001b]1;borg\u0007" + ], + [ + 0.693043, + "Archive name: specialbackup\r\nArchive fingerprint: 68e942cc4a48402e48ba87f4887c24e5b9fe06e881b0ca241c791810a108bec0\r\nComment: \r\nHostname: tux\r\nUsername: rugk\r\nTime (start): Sun, 2017-07-16 18:52:58\r\nTime (end): Sun, 2017-07-16 18:53:05\r\nDuration: 6.99 seconds\r\nNumber of files: 1\r\nCommand line: borg create --progress --stats ::specialbackup -\r\nUtilization of maximum supported archive size: 0%\r\n------------------------------------------------------------------------------\r\n Original size Compressed size Deduplicated size\r\nThis archive: 629.15 MB 2.47 MB 234.02 kB\r\nAll archives: 2.50 GB 1.87 GB 570.15 MB\r\n\r\n Unique chunks Total chunks\r\nChunk index: 1032 3380\r\n" + ], + [ + 0.045207, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001204, + "\u001b]7;\u0007" + ], + [ + 0.000923, + "\u001b]7;\u0007" + ], + [ + 3.5e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000129, + "\u001b[?1h\u001b=" + ], + [ + 0.000196, + "\u001b[?2004h" + ], + [ + 1.70302, + "\u001b[?1l\u001b>" + ], + [ + 0.000314, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000475, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001262, + "\u001b]7;\u0007" + ], + [ + 0.00098, + "\u001b]7;\u0007" + ], + [ + 4.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.1e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000164, + "\u001b[?2004h" + ], + [ + 0.281651, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.234109, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.181326, + "\b\b\u001b[1m#\u001b[1m \u001b[1mS\u001b[0m\u001b[39m" + ], + [ + 0.12398, + "\b\u001b[1mS\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.166912, + "\b\u001b[1mo\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.490114, + "\b\u001b[1m \u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.160581, + "\b\u001b[1ml\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.148283, + "\b\u001b[1me\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.453708, + "\b\u001b[1mt\u001b[1m'\u001b[0m\u001b[39m" + ], + [ + 0.118956, + "\b\u001b[1m'\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.125062, + "\b\u001b[1ms\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.130519, + "\b\u001b[1m \u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.130132, + "\b\u001b[1mr\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.265033, + "\b\u001b[1me\u001b[1mname our last archive:\u001b[0m\u001b[39m" + ], + [ + 1.001935, + "\u001b[?1l\u001b>" + ], + [ + 0.000416, + "\u001b[?2004l\r\r\n" + ], + [ + 0.0006, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00114, + "\u001b]7;\u0007" + ], + [ + 0.000898, + "\u001b]7;\u0007" + ], + [ + 2.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 9.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000286, + "\u001b[?2004h" + ], + [ + 1.253113, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.202007, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.105752, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.134948, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.14764, + " " + ], + [ + 0.157682, + "r" + ], + [ + 0.124491, + "e" + ], + [ + 0.118993, + "n" + ], + [ + 0.140445, + "a" + ], + [ + 0.101365, + "m" + ], + [ + 0.115953, + "e" + ], + [ + 1.107064, + " " + ], + [ + 0.561405, + ":" + ], + [ + 0.103305, + ":" + ], + [ + 0.263633, + "s" + ], + [ + 0.142089, + "p" + ], + [ + 0.134253, + "e" + ], + [ + 0.240688, + "c" + ], + [ + 0.136782, + "i" + ], + [ + 0.128372, + "a" + ], + [ + 0.170065, + "l" + ], + [ + 0.592209, + "b" + ], + [ + 0.348417, + "a" + ], + [ + 0.210896, + "c" + ], + [ + 0.259528, + "k" + ], + [ + 0.171523, + "u" + ], + [ + 0.245786, + "p" + ], + [ + 0.582735, + " " + ], + [ + 0.568884, + "b" + ], + [ + 0.101982, + "a" + ], + [ + 0.162673, + "c" + ], + [ + 0.104218, + "k" + ], + [ + 0.132828, + "u" + ], + [ + 0.245157, + "p" + ], + [ + 0.266242, + "-" + ], + [ + 0.316388, + "b" + ], + [ + 0.43535, + "l" + ], + [ + 0.133908, + "o" + ], + [ + 0.047013, + "c" + ], + [ + 0.622041, + "k" + ], + [ + 0.82215, + "-" + ], + [ + 0.183882, + "d" + ], + [ + 0.189034, + "e" + ], + [ + 0.181902, + "v" + ], + [ + 0.18728, + "i" + ], + [ + 0.052242, + "c" + ], + [ + 0.160462, + "e" + ], + [ + 0.645053, + "\u001b[?1l\u001b>" + ], + [ + 0.001146, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000741, + "\u001b]2;borg rename ::specialbackup backup-block-device\u0007\u001b]1;borg\u0007" + ], + [ + 1.136038, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001149, + "\u001b]7;\u0007" + ], + [ + 0.000968, + "\u001b]7;\u0007" + ], + [ + 7.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000107, + "\u001b[?1h\u001b=" + ], + [ + 0.000193, + "\u001b[?2004h" + ], + [ + 1.203902, + "\u001b[32mborg\u001b[39m rename ::specialbackup backup-block-device" + ], + [ + 0.192203, + "\u001b[47D\u001b[1m#\u001b[1m \u001b[1mS\u001b[1mo\u001b[1m \u001b[1ml\u001b[1me\u001b[1mt\u001b[1m'\u001b[1ms\u001b[1m \u001b[1mr\u001b[1me\u001b[1mn\u001b[1ma\u001b[1mm\u001b[1me\u001b[1m \u001b[1mo\u001b[1mu\u001b[1mr\u001b[1m \u001b[1ml\u001b[1ma\u001b[1ms\u001b[1mt\u001b[1m \u001b[1ma\u001b[1mr\u001b[1mc\u001b[1mh\u001b[1mi\u001b[1mv\u001b[1me\u001b[1m:\u001b[0m\u001b[39m \u001b[12D" + ], + [ + 0.528657, + "\u001b[35D\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[0m\u001b[32mg\u001b[39m\u001b[0m\u001b[39m \u001b[0m\u001b[39mi\u001b[0m\u001b[39mn\u001b[0m\u001b[39mf\u001b[0m\u001b[39mo\u001b[0m\u001b[39m \u001b[0m\u001b[39m:\u001b[0m\u001b[39m:\u001b[0m\u001b[39m \u001b[0m\u001b[39m-\u001b[0m\u001b[39m-\u001b[0m\u001b[39ml\u001b[0m\u001b[39ma\u001b[0m\u001b[39ms\u001b[0m\u001b[39mt\u001b[0m\u001b[39m \u001b[0m\u001b[39m1\u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[14D" + ], + [ + 0.548884, + "\u001b[?1l\u001b>" + ], + [ + 0.003595, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000857, + "\u001b]2;borg info :: --last 1\u0007\u001b]1;borg\u0007" + ], + [ + 0.689879, + "Archive name: backup-block-device\r\nArchive fingerprint: 5fd9732b4809252742a7cb3fadf2a971dd6371afd11a07944c0b5803d57c240f\r\nComment: \r\nHostname: tux\r\nUsername: rugk\r\nTime (start): Sun, 2017-07-16 18:52:58\r\nTime (end): Sun, 2017-07-16 18:53:05\r\nDuration: 6.99 seconds\r\nNumber of files: 1\r\nCommand line: borg create --progress --stats ::specialbackup -\r\nUtilization of maximum supported archive size: 0%\r\n------------------------------------------------------------------------------\r\n Original size Compressed size Deduplicated size\r\nThis archive: 629.15 MB 2.47 MB 234.04 kB\r\nAll archives: 2.50 GB 1.87 GB 570.15 MB\r\n\r\n Unique chunks Total chunks\r\nChunk index: 1032 3380\r\n" + ], + [ + 0.044772, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001259, + "\u001b]7;\u0007" + ], + [ + 0.001013, + "\u001b]7;\u0007" + ], + [ + 8.6e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000109, + "\u001b[?1h\u001b=" + ], + [ + 0.000191, + "\u001b[?2004h" + ], + [ + 2.415375, + "\u001b[?1l\u001b>" + ], + [ + 0.000379, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000632, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001347, + "\u001b]7;\u0007" + ], + [ + 0.001044, + "\u001b]7;\u0007" + ], + [ + 8.9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000101, + "\u001b[?1h\u001b=" + ], + [ + 0.000183, + "\u001b[?2004h" + ], + [ + 0.412865, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.250988, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.245192, + "\b\b\u001b[1m#\u001b[1m \u001b[1mA\u001b[0m\u001b[39m" + ], + [ + 0.706056, + "\b\u001b[1mA\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.273409, + "\b\u001b[1m \u001b[1mv\u001b[0m\u001b[39m" + ], + [ + 0.194462, + "\b\u001b[1mv\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.114445, + "\b\u001b[1me\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.097756, + "\b\u001b[1mr\u001b[1my\u001b[0m\u001b[39m" + ], + [ + 0.149155, + "\b\u001b[1my\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.258303, + "\b\u001b[1m \u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.133528, + "\b\u001b[1mi\u001b[1mm\u001b[0m\u001b[39m" + ], + [ + 0.225062, + "\b\u001b[1mm\u001b[1mp\u001b[0m\u001b[39m" + ], + [ + 0.352638, + "\b\u001b[1mp\u001b[1mortant step if you choose keyfile mode (where the keyfile is onl\u001b[1my\u001b[1m saved locally) is to export your keyfile and possibly print it, etc.\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.170303, + "\u001b[?1l\u001b>" + ], + [ + 0.000524, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000714, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001487, + "\u001b]7;\u0007" + ], + [ + 0.001303, + "\u001b]7;\u0007" + ], + [ + 3.9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000291, + "\u001b[?2004h" + ], + [ + 2.080689, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.197142, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.172626, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.145083, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.943024, + " " + ], + [ + 0.511742, + "k" + ], + [ + 0.274338, + "e" + ], + [ + 0.308416, + "y" + ], + [ + 0.568141, + " " + ], + [ + 0.62626, + "e" + ], + [ + 0.224255, + "x" + ], + [ + 2.028973, + "p" + ], + [ + 0.220629, + "o" + ], + [ + 0.395617, + "r" + ], + [ + 0.127004, + "t" + ], + [ + 0.635262, + " " + ], + [ + 0.728631, + ":" + ], + [ + 0.116567, + ":" + ], + [ + 0.347323, + " " + ], + [ + 1.713208, + "-" + ], + [ + 0.134471, + "-" + ], + [ + 0.298094, + "q" + ], + [ + 0.316108, + "r" + ], + [ + 0.373821, + "-" + ], + [ + 0.416623, + "c" + ], + [ + 0.400783, + "o" + ], + [ + 0.107762, + "d" + ], + [ + 0.134276, + "e" + ], + [ + 0.384438, + " " + ], + [ + 0.447909, + "f" + ], + [ + 0.162017, + "i" + ], + [ + 0.113187, + "l" + ], + [ + 0.069321, + "e" + ], + [ + 0.627894, + "." + ], + [ + 0.32877, + "h" + ], + [ + 0.137354, + "t" + ], + [ + 0.181468, + "m" + ], + [ + 0.156847, + "l" + ], + [ + 0.434616, + " " + ], + [ + 0.906636, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.546016, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 1.755972, + "\b\u001b[1m \u001b[1mthis creates a nice HTML, but when \u001b[1my\u001b[1mou want something simpler…\u001b[0m\u001b[39m\u001b[K" + ], + [ + 2.940038, + "\b\b\u001b[1mr\u001b[0m\u001b[39m\u001b[K" + ], + [ + 0.691374, + "\b\b\u001b[1me\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.501031, + "\b\b\u001b[1ml\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.0295, + "\b\b\u001b[1mp\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029695, + "\b\b\u001b[1mm\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029437, + "\b\b\u001b[1mi\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.03032, + "\b\b\u001b[1ms\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029433, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030373, + "\b\b\u001b[1mg\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029337, + "\b\b\u001b[1mn\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.031058, + "\b\b\u001b[1mi\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029329, + "\b\b\u001b[1mh\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.031142, + "\b\b\u001b[1mt\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029181, + "\b\b\u001b[1me\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029786, + "\b\b\u001b[1mm\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030603, + "\b\b\u001b[1mo\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029332, + "\b\b\u001b[1ms\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030813, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029428, + "\b\b\u001b[1mt\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029368, + "\b\b\u001b[1mn\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030166, + "\b\b\u001b[1ma\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030524, + "\b\b\u001b[1mw\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029333, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030607, + "\b\b\u001b[1mu\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029346, + "\r\u001b[1my\u001b[1mo\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.031102, + "\r\u001b[1my\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029544, + "\u001b[A\u001b[76C\u001b[1m \u001b[0m\u001b[39m \u001b[K\r" + ], + [ + 0.029675, + "\u001b[A\u001b[76C\u001b[1mn\u001b[0m\u001b[39m\u001b[K\u001b[1B\r\u001b[K\u001b[A\u001b[77C" + ], + [ + 0.030809, + "\b\b\u001b[1me\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.02987, + "\b\b\u001b[1mh\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029707, + "\b\b\u001b[1mw\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029901, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.03057, + "\b\b\u001b[1mt\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029469, + "\b\b\u001b[1mu\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030219, + "\b\b\u001b[1mb\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029227, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030465, + "\b\b\u001b[1m,\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029423, + "\b\b\u001b[1mL\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030292, + "\b\b\u001b[1mM\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030715, + "\b\b\u001b[1mT\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029641, + "\b\b\u001b[1mH\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029367, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.031235, + "\b\b\u001b[1me\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030119, + "\b\b\u001b[1mc\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030061, + "\b\b\u001b[1mi\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030102, + "\b\b\u001b[1mn\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029384, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029499, + "\b\b\u001b[1ma\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.03047, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.03019, + "\b\b\u001b[1ms\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029337, + "\b\b\u001b[1me\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030138, + "\b\b\u001b[1mt\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030049, + "\b\b\u001b[1ma\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030132, + "\b\b\u001b[1me\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029948, + "\b\b\u001b[1mr\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029428, + "\b\b\u001b[1mc\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030197, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.030196, + "\b\b\u001b[1ms\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.03118, + "\b\b\u001b[1mi\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.028165, + "\b\b\u001b[1mh\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.03128, + "\b\b\u001b[1mt\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.029716, + "\b\b\u001b[1m \u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.03012, + "\b\b\u001b[1m#\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.346808, + "\b\u001b[0m\u001b[39m \b" + ], + [ + 0.19843, + "\b" + ], + [ + 0.307235, + "\b \b" + ], + [ + 0.499683, + "\b \b" + ], + [ + 0.028468, + "\b \b" + ], + [ + 0.029472, + "\b \b" + ], + [ + 0.030565, + "\b \b" + ], + [ + 0.029224, + "\b \b" + ], + [ + 0.030493, + "\b \b" + ], + [ + 0.030666, + "\b \b" + ], + [ + 0.029185, + "\b \b" + ], + [ + 0.02989, + "\b" + ], + [ + 0.029921, + "\b \b" + ], + [ + 0.029657, + "\b \b" + ], + [ + 0.154399, + "\b \b" + ], + [ + 0.165915, + "\b \b" + ], + [ + 0.154316, + "\b \b" + ], + [ + 0.154588, + "\b \b" + ], + [ + 0.147868, + "\b \b" + ], + [ + 1.555865, + "p" + ], + [ + 0.446126, + "a" + ], + [ + 0.188714, + "p" + ], + [ + 0.252833, + "e" + ], + [ + 0.142044, + "r" + ], + [ + 0.395895, + " " + ], + [ + 0.423453, + "\u001b[1m# this is a \"manual input\"-only backup (but it is\u001b[1m \u001b[1malso included in the --qr-code option)\u001b[0m\u001b[39m\u001b[K" + ], + [ + 3.71528, + "\u001b[?1l\u001b>" + ], + [ + 0.001413, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000757, + "\u001b]2;borg key export :: --paper\u0007\u001b]1;borg\u0007" + ], + [ + 0.550352, + "To restore key use borg key import --paper /path/to/repo\r\n\r\nBORG PAPER KEY v1\r\nid: 20 / 54f957 2d6d72 de8280 / 158a57 45bdc3 - f6\r\n 1: 86a961 6c676f 726974 686da6 736861 323536 - 14\r\n 2: a46461 7461da 00def1 7c9f3c 81ebc6 730a05 - 35\r\n 3: 12453e d02760 ffdeef 4d0daa 231d81 ae10d8 - e5\r\n 4: 7bb0a1 97c30f 312b61 7170ba d1ea91 da2c88 - 30\r\n 5: ca997e 177b74 38f906 709a66 fbf013 40ab3d - c4\r\n 6: 6af94b 8a36a9 e07b9d b0e08d 3935cd f1bbb9 - 5c\r\n 7: 2b10b6 ebb586 4c0967 f682b9 c64358 fbb63c - a4\r\n 8: b9fc94 240d08 072524 98b619 7bd1c5 21094e - ec\r\n 9: ac4f05 d65a6a 7f8a0d 8cc14e 405b36 c248e1 - 79\r\n10: d23b89 c61074 3e68c9 79c683 2384e8 cd9f82 - 50\r\n11: fc76a9 3f2a9e 05d5f1 313f95 ec4313 53e0c1 - 4a\r\n12: 654f1d ab2b51 2ccbe8 80be07 b6132f 86aeb5 - 11\r\n13: 7e6e48 5ff0d4 41e659 a421f0 5123df f88dff - c9\r\n14: 03db58 bbb410 87d7fc 075b14 5108a4 686173 - 9a\r\n15: 68da00 20524b 8769e9 e5bd18 a9b431 c05b49 - ba\r\n16: 505280 9b104a b081c0 f4efd1 1d3771 34c701 - 40\r\n17: aa6974 657261 74696f 6e73ce 000186 a0a473 - 15\r\n18: 616c7" + ], + [ + 7.2e-05, + "4 da0020 0be74e e1e9af 7b1364 3ee362 - 32\r\n19: 643069 b57a75 d30eb6 104c28 367e17 7dd4d9 - 79\r\n20: f556a7 766572 73696f 6e01 - 32\r\n\r\n" + ], + [ + 0.048873, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001193, + "\u001b]7;\u0007" + ], + [ + 0.000921, + "\u001b]7;\u0007" + ], + [ + 9.3e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000185, + "\u001b[?2004h" + ], + [ + 3.146565, + "\u001b[?1l\u001b>" + ], + [ + 0.000424, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000795, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001307, + "\u001b]7;\u0007" + ], + [ + 0.001444, + "\u001b]7;\u0007" + ], + [ + 8.9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.00011, + "\u001b[?1h\u001b=" + ], + [ + 0.000263, + "\u001b[?2004h" + ], + [ + 0.441809, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.136081, + "\b\u001b[1m#\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.375389, + "\b\b\u001b[1m#\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.284554, + "\b\u001b[1m \u001b[1mM\u001b[0m\u001b[39m" + ], + [ + 0.395833, + "\b\u001b[1mM\u001b[1mA\u001b[0m\u001b[39m" + ], + [ + 0.434316, + "\b\u001b[1mA\u001b[1mINTENANCE ##\u001b[0m\u001b[39m" + ], + [ + 1.471226, + "\u001b[?1l\u001b>" + ], + [ + 0.00055, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000605, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001464, + "\u001b]7;\u0007" + ], + [ + 0.00092, + "\u001b]7;\u0007" + ], + [ + 9.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000104, + "\u001b[?1h\u001b=" + ], + [ + 0.000309, + "\u001b[?2004h" + ], + [ + 0.977805, + "\u001b[?1l\u001b>" + ], + [ + 0.000452, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000828, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001436, + "\u001b]7;\u0007" + ], + [ + 0.001464, + "\u001b]7;\u0007" + ], + [ + 3.8e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000171, + "\u001b[?1h\u001b=" + ], + [ + 0.000247, + "\u001b[?2004h" + ], + [ + 0.221358, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.374414, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.189751, + "\b\b\u001b[1m#\u001b[1m \u001b[1mS\u001b[0m\u001b[39m" + ], + [ + 0.087275, + "\b\u001b[1mS\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.140008, + "\b\u001b[1mo\u001b[1mm\u001b[0m\u001b[39m" + ], + [ + 0.150891, + "\b\u001b[1mm\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.387855, + "\b\u001b[1me\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.204067, + "\b\u001b[1mt\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.127209, + "\b\u001b[1mi\u001b[1mm\u001b[0m\u001b[39m" + ], + [ + 0.073999, + "\b\u001b[1mm\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.130356, + "\b\u001b[1me\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.224406, + "\b\u001b[1ms\u001b[1m backups get broken or we want a regular \"checkup\" that everythin\u001b[1mg\u001b[1m is okay…\u001b[0m\u001b[39m\u001b[K" + ], + [ + 2.361948, + "\u001b[?1l\u001b>" + ], + [ + 0.000402, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000743, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001212, + "\u001b]7;\u0007" + ], + [ + 0.000923, + "\u001b]7;\u0007" + ], + [ + 1.3e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000321, + "\u001b[?2004h" + ], + [ + 2.246766, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.18622, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.121068, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.146401, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.255479, + " " + ], + [ + 0.268833, + "c" + ], + [ + 0.154418, + "h" + ], + [ + 0.106649, + "e" + ], + [ + 0.142762, + "c" + ], + [ + 0.306359, + "k" + ], + [ + 0.697455, + " " + ], + [ + 1.113236, + "-" + ], + [ + 0.768765, + "v" + ], + [ + 0.477353, + " " + ], + [ + 0.387303, + ":" + ], + [ + 0.102251, + ":" + ], + [ + 0.749971, + "\u001b[?1l\u001b>" + ], + [ + 0.001961, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000798, + "\u001b]2;borg check -v ::\u0007\u001b]1;borg\u0007" + ], + [ + 0.54272, + "Starting repository check\r\n" + ], + [ + 1.152819, + "Starting repository index check\r\n" + ], + [ + 0.00038, + "Completed repository check, no problems found.\r\n" + ], + [ + 0.000129, + "Starting archive consistency check...\r\n" + ], + [ + 0.095799, + "Analyzing archive backup1 (1/6)\r\n" + ], + [ + 0.109358, + "Analyzing archive backup2 (2/6)\r\n" + ], + [ + 0.036555, + "Analyzing archive backup3 (3/6)\r\n" + ], + [ + 0.03649, + "Analyzing archive rugk-2017-07-16T18:51:34 (4/6)\r\n" + ], + [ + 0.000491, + "Analyzing archive rugk-2017-07-16T18:52:19 (5/6)\r\n" + ], + [ + 0.000729, + "Analyzing archive backup-block-device (6/6)\r\n" + ], + [ + 0.00119, + "Archive consistency check complete, no problems found.\r\n" + ], + [ + 0.081895, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001153, + "\u001b]7;\u0007" + ], + [ + 0.000924, + "\u001b]7;\u0007" + ], + [ + 0.000108, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00022, + "\u001b[?2004h" + ], + [ + 2.243609, + "\u001b[?1l\u001b>" + ], + [ + 0.000511, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000535, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001789, + "\u001b]7;\u0007" + ], + [ + 0.00157, + "\u001b]7;\u0007" + ], + [ + 0.000139, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.7e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00033, + "\u001b[?2004h" + ], + [ + 0.326751, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.24289, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.285802, + "\b\b\u001b[1m#\u001b[1m \u001b[1mN\u001b[0m\u001b[39m" + ], + [ + 0.191158, + "\b\u001b[1mN\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.184029, + "\b\u001b[1me\u001b[1mx\u001b[0m\u001b[39m" + ], + [ + 0.16373, + "\b\u001b[1mx\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.239936, + "\b\u001b[1mt\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.27885, + "\b\u001b[1m \u001b[1mp\u001b[0m\u001b[39m" + ], + [ + 0.12665, + "\b\u001b[1mp\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.154792, + "\b\u001b[1mr\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.372203, + "\b\u001b[1mo\u001b[1mblem: Usually you do not have infinite disk space. So you may need\u001b[1m \u001b[1mto prune your archive…\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.956234, + "\u001b[?1l\u001b>" + ], + [ + 0.000446, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000607, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001281, + "\u001b]7;\u0007" + ], + [ + 0.000983, + "\u001b]7;\u0007" + ], + [ + 2.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000142, + "\u001b[?1h\u001b=" + ], + [ + 0.00032, + "\u001b[?2004h" + ], + [ + 1.137641, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.26675, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.151609, + "\b\b\u001b[1m#\u001b[1m \u001b[1mY\u001b[0m\u001b[39m" + ], + [ + 0.11765, + "\b\u001b[1mY\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.158458, + "\b\u001b[1mo\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.149615, + "\b\u001b[1mu\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.080657, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.144379, + "\b\u001b[1mc\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.104266, + "\b\u001b[1ma\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.132218, + "\b\u001b[1mn\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.202965, + "\b\u001b[1m \u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.17807, + "\b\u001b[1mt\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.123814, + "\b\u001b[1mu\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.325016, + "\b\u001b[1mn\u001b[1me this in every detail. See the docs for details. Here only a s\u001b[1mi\u001b[1mmple example:\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.91505, + "\u001b[?1l\u001b>" + ], + [ + 0.000406, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000684, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001347, + "\u001b]7;\u0007" + ], + [ + 0.001084, + "\u001b]7;\u0007" + ], + [ + 0.000116, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000118, + "\u001b[?1h\u001b=" + ], + [ + 0.000246, + "\u001b[?2004h" + ], + [ + 2.556304, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.198214, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.125589, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.147156, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.202848, + " " + ], + [ + 0.369539, + "p" + ], + [ + 0.228714, + "r" + ], + [ + 0.184236, + "u" + ], + [ + 0.154014, + "n" + ], + [ + 0.136362, + "e" + ], + [ + 0.94169, + " " + ], + [ + 0.44829, + "-" + ], + [ + 0.112062, + "-" + ], + [ + 0.37454, + "l" + ], + [ + 0.157195, + "i" + ], + [ + 0.116633, + "s" + ], + [ + 0.193515, + "t" + ], + [ + 0.486369, + " " + ], + [ + 0.442107, + "-" + ], + [ + 0.12257, + "-" + ], + [ + 0.403774, + "k" + ], + [ + 0.214488, + "e" + ], + [ + 0.771743, + "e" + ], + [ + 0.349591, + "p" + ], + [ + 0.352253, + "-" + ], + [ + 0.201267, + "l" + ], + [ + 0.109728, + "a" + ], + [ + 0.146296, + "s" + ], + [ + 0.130476, + "t" + ], + [ + 0.234998, + " " + ], + [ + 0.264266, + "1" + ], + [ + 0.429572, + " " + ], + [ + 0.505667, + "-" + ], + [ + 0.105697, + "-" + ], + [ + 0.294354, + "d" + ], + [ + 0.178175, + "r" + ], + [ + 0.239011, + "y" + ], + [ + 0.561933, + "-" + ], + [ + 0.220564, + "r" + ], + [ + 0.172983, + "u" + ], + [ + 0.138969, + "n" + ], + [ + 0.891028, + "\u001b[?1l\u001b>" + ], + [ + 0.004152, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000975, + "\u001b]2;borg prune --list --keep-last 1 --dry-run\u0007\u001b]1;borg\u0007" + ], + [ + 0.658906, + "Keeping archive: backup-block-device Sun, 2017-07-16 18:52:58 [5fd9732b4809252742a7cb3fadf2a971dd6371afd11a07944c0b5803d57c240f]\r\n" + ], + [ + 0.000155, + "Would prune: rugk-2017-07-16T18:52:19 Sun, 2017-07-16 18:52:19 [0de98f590b004ad7545f2013c4c9f2d4e3eed1415d177c89d6c2b7ff05918d2e]\r\n" + ], + [ + 0.000118, + "Would prune: rugk-2017-07-16T18:51:34 Sun, 2017-07-16 18:51:34 [d054cc411324d4bd848b39d1c9cad909073f9ff1a1a503a676d3e050be140396]\r\n" + ], + [ + 6.5e-05, + "Would prune: backup3 Fri, 2017-07-14 21:55:37 [36cd8fdf9b8b2e3bbb3fc2bb600acd48609efaf3a0880f900e0701a47ff69d4d]\r\n" + ], + [ + 7.1e-05, + "Would prune: backup2 Fri, 2017-07-14 21:54:56 [5aaf03d1c710cf774f9c9ff1c6317b621c14e519c6bac459f6d64b31e3bbd200]\r\n" + ], + [ + 7.1e-05, + "Would prune: backup1 Fri, 2017-07-14 21:54:06 [9758c7db339a066360bffad17b2ffac4fb368c6722c0be3a47a7a9b631f06407]\r\n" + ], + [ + 0.047362, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001109, + "\u001b]7;\u0007" + ], + [ + 0.00093, + "\u001b]7;\u0007" + ], + [ + 7.6e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000163, + "\u001b[?2004h" + ], + [ + 2.173126, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.420696, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.658252, + "\b\b\u001b[1m#\u001b[1m \u001b[1mW\u001b[0m\u001b[39m" + ], + [ + 0.186236, + "\b\u001b[1mW\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.09843, + "\b\u001b[1mh\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.143515, + "\b\u001b[1me\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.153626, + "\b\u001b[1mn\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.136407, + "\b\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.170555, + "\b\u001b[1ma\u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.157309, + "\b\u001b[1mc\u001b[1mtually executing it in a script, you have to use it without the --dry\u001b[1m-\u001b[1mrun option, of course.\u001b[0m\u001b[39m\u001b[K" + ], + [ + 2.08243, + "\u001b[?1l\u001b>" + ], + [ + 0.000512, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000552, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001375, + "\u001b]7;\u0007" + ], + [ + 0.000922, + "\u001b]7;\u0007" + ], + [ + 3.6e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.1e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00026, + "\u001b[?2004h" + ], + [ + 1.169356, + "\u001b[?1l\u001b>" + ], + [ + 0.000602, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000917, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001594, + "\u001b]7;\u0007" + ], + [ + 0.001826, + "\u001b]7;\u0007" + ], + [ + 7.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000204, + "\u001b[?1h\u001b=" + ], + [ + 0.000349, + "\u001b[?2004h" + ], + [ + 0.464206, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.135956, + "\b\u001b[1m#\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.484249, + "\b\b\u001b[1m#\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.299809, + "\b\u001b[1m \u001b[1mR\u001b[0m\u001b[39m" + ], + [ + 0.199072, + "\b\u001b[1mR\u001b[1mE\u001b[0m\u001b[39m" + ], + [ + 0.620669, + "\b\u001b[1mE\u001b[1mSTORE ##\u001b[0m\u001b[39m" + ], + [ + 0.924028, + "\u001b[?1l\u001b>" + ], + [ + 0.000399, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000744, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001142, + "\u001b]7;\u0007" + ], + [ + 0.000834, + "\u001b]7;\u0007" + ], + [ + 9.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000124, + "\u001b[?1h\u001b=" + ], + [ + 0.000294, + "\u001b[?2004h" + ], + [ + 0.797042, + "\u001b[?1l\u001b>" + ], + [ + 0.000325, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001543, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.002662, + "\u001b]7;\u0007" + ], + [ + 0.001568, + "\u001b]7;\u0007" + ], + [ + 4.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 9.4e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000185, + "\u001b[?2004h" + ], + [ + 0.705049, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.50212, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 1.210452, + "\b\b\u001b[1m#\u001b[1m \u001b[1mW\u001b[0m\u001b[39m" + ], + [ + 0.1987, + "\b\u001b[1mW\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.12116, + "\b\u001b[1mh\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.152173, + "\b\u001b[1me\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.16582, + "\b\u001b[1mn\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.378037, + "\b\u001b[1m \u001b[1my\u001b[0m\u001b[39m" + ], + [ + 0.330829, + "\b\u001b[1my\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.180945, + "\b\u001b[1mo\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.152701, + "\b\u001b[1mu\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.121298, + "\b\u001b[1m \u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.148067, + "\b\u001b[1mw\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.233865, + "\b\u001b[1ma\u001b[1mnt to see the diff between two archives use this command.\u001b[0m\u001b[39m" + ], + [ + 1.947763, + "\u001b[?1l\u001b>" + ], + [ + 0.000408, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000607, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001033, + "\u001b]7;\u0007" + ], + [ + 0.000979, + "\u001b]7;\u0007" + ], + [ + 0.000127, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.2e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000278, + "\u001b[?2004h" + ], + [ + 0.693036, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.275798, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.281158, + "\b\b\u001b[1m#\u001b[1m \u001b[1mE\u001b[0m\u001b[39m" + ], + [ + 0.386709, + "\b\u001b[1mE\u001b[1m.\u001b[0m\u001b[39m" + ], + [ + 0.136187, + "\b\u001b[1m.\u001b[1mg\u001b[0m\u001b[39m" + ], + [ + 0.262011, + "\b\u001b[1mg\u001b[1m.\u001b[0m\u001b[39m" + ], + [ + 0.234889, + "\b\u001b[1m.\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.361971, + "\b\u001b[1m \u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.162798, + "\b\u001b[1mw\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.077265, + "\b\u001b[1mh\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.148774, + "\b\u001b[1ma\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.34541, + "\b\u001b[1mt\u001b[1m happened between the first two backups?\u001b[0m\u001b[39m" + ], + [ + 1.295996, + "\u001b[?1l\u001b>" + ], + [ + 0.000733, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001102, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001634, + "\u001b]7;\u0007" + ], + [ + 0.000634, + "\u001b]7;\u0007" + ], + [ + 5.6e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.4e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000303, + "\u001b[?2004h" + ], + [ + 0.441685, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.182795, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.072867, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.161104, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.179655, + " " + ], + [ + 0.154676, + "d" + ], + [ + 0.132421, + "i" + ], + [ + 0.124239, + "f" + ], + [ + 0.13999, + "f" + ], + [ + 0.624444, + " " + ], + [ + 0.862302, + ":" + ], + [ + 0.1169, + ":" + ], + [ + 0.274626, + "b" + ], + [ + 0.100778, + "a" + ], + [ + 0.188526, + "c" + ], + [ + 0.097402, + "k" + ], + [ + 0.144999, + "u" + ], + [ + 0.22317, + "p" + ], + [ + 0.167969, + "1" + ], + [ + 0.44642, + " " + ], + [ + 0.240129, + "b" + ], + [ + 0.164579, + "a" + ], + [ + 0.190471, + "c" + ], + [ + 0.136211, + "k" + ], + [ + 0.12257, + "u" + ], + [ + 0.258587, + "p" + ], + [ + 0.215453, + "2" + ], + [ + 1.160869, + "\u001b[?1l\u001b>" + ], + [ + 0.001983, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000801, + "\u001b]2;borg diff ::backup1 backup2\u0007\u001b]1;borg\u0007" + ], + [ + 0.717522, + "added 20 B Wallpaper/newfile.txt\r\n" + ], + [ + 0.044186, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001157, + "\u001b]7;\u0007" + ], + [ + 0.000949, + "\u001b]7;\u0007" + ], + [ + 0.000108, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 9.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000147, + "\u001b[?2004h" + ], + [ + 1.545435, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.26435, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.178864, + "\b\b\u001b[1m#\u001b[1m \u001b[1mA\u001b[0m\u001b[39m" + ], + [ + 0.161899, + "\b\u001b[1mA\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.240289, + "\b\u001b[1mh\u001b[1m,\u001b[0m\u001b[39m" + ], + [ + 0.132971, + "\b\u001b[1m,\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.115812, + "\b\u001b[1m \u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.111227, + "\b\u001b[1mw\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.159647, + "\b\u001b[1me\u001b[1m added a file, right…\u001b[0m\u001b[39m" + ], + [ + 0.97686, + "\u001b[?1l\u001b>" + ], + [ + 0.000441, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00091, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001031, + "\u001b]7;\u0007" + ], + [ + 0.000995, + "\u001b]7;\u0007" + ], + [ + 2.5e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000141, + "\u001b[?1h\u001b=" + ], + [ + 0.000303, + "\u001b[?2004h" + ], + [ + 6.370198, + "\u001b[?1l\u001b>" + ], + [ + 0.000854, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000815, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.003101, + "\u001b]7;\u0007" + ], + [ + 0.002831, + "\u001b]7;\u0007" + ], + [ + 0.000107, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000314, + "\u001b[?1h\u001b=" + ], + [ + 0.000499, + "\u001b[?2004h" + ], + [ + 0.580198, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.240323, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.29592, + "\b\b\u001b[1m#\u001b[1m \u001b[1mT\u001b[0m\u001b[39m" + ], + [ + 0.135389, + "\b\u001b[1mT\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.115437, + "\b\u001b[1mh\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.157526, + "\b\u001b[1me\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.624235, + "\b\u001b[1mr\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.282742, + "\b\u001b[1me\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.133006, + "\b\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.206434, + "\b\u001b[1ma\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.13301, + "\b\u001b[1mr\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.255991, + "\b\u001b[1me\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.196416, + "\b\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.275594, + "\b\u001b[1ma\u001b[1mlso other ways to extract the data.\u001b[0m\u001b[39m" + ], + [ + 0.932018, + "\u001b[?1l\u001b>" + ], + [ + 0.001354, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001071, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00297, + "\u001b]7;\u0007" + ], + [ + 0.002675, + "\u001b]7;\u0007" + ], + [ + 0.000154, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000231, + "\u001b[?1h\u001b=" + ], + [ + 0.000895, + "\u001b[?2004h" + ], + [ + 1.021752, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.238058, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.245484, + "\b\b\u001b[1m#\u001b[1m \u001b[1mE\u001b[0m\u001b[39m" + ], + [ + 0.719467, + "\b\u001b[1mE\u001b[1m.\u001b[0m\u001b[39m" + ], + [ + 0.151468, + "\b\u001b[1m.\u001b[1mg\u001b[0m\u001b[39m" + ], + [ + 0.183213, + "\b\u001b[1mg\u001b[1m.\u001b[0m\u001b[39m" + ], + [ + 0.599958, + "\b\u001b[1m.\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.316279, + "\b\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.166858, + "\b\u001b[1ma\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.551272, + "\b\u001b[1ms\u001b[1m a tar archive.\u001b[0m\u001b[39m" + ], + [ + 0.938861, + "\u001b[?1l\u001b>" + ], + [ + 0.000638, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000793, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001159, + "\u001b]7;\u0007" + ], + [ + 0.000867, + "\u001b]7;\u0007" + ], + [ + 9.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000282, + "\u001b[?2004h" + ], + [ + 0.860998, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.189263, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.11245, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.133531, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.62438, + " " + ], + [ + 0.295845, + "e" + ], + [ + 0.165874, + "x" + ], + [ + 0.180501, + "p" + ], + [ + 0.166254, + "o" + ], + [ + 0.27793, + "r" + ], + [ + 0.113477, + "t" + ], + [ + 0.46559, + "-" + ], + [ + 0.34577, + "t" + ], + [ + 0.148398, + "a" + ], + [ + 0.17144, + "r" + ], + [ + 0.920527, + " " + ], + [ + 0.40208, + "-" + ], + [ + 0.108683, + "-" + ], + [ + 0.326944, + "p" + ], + [ + 0.195982, + "r" + ], + [ + 0.175632, + "o" + ], + [ + 0.229442, + "g" + ], + [ + 0.133505, + "r" + ], + [ + 0.171995, + "e" + ], + [ + 0.244119, + "s" + ], + [ + 0.154514, + "s" + ], + [ + 0.579295, + " " + ], + [ + 0.575201, + ":" + ], + [ + 0.112098, + ":" + ], + [ + 0.355392, + "b" + ], + [ + 0.110008, + "a" + ], + [ + 0.172393, + "c" + ], + [ + 0.080739, + "k" + ], + [ + 0.134163, + "u" + ], + [ + 0.221434, + "p" + ], + [ + 0.276712, + "2" + ], + [ + 0.6747, + " " + ], + [ + 0.372614, + "b" + ], + [ + 0.09319, + "a" + ], + [ + 0.152876, + "c" + ], + [ + 0.089531, + "k" + ], + [ + 0.150747, + "u" + ], + [ + 0.233879, + "p" + ], + [ + 0.273301, + "." + ], + [ + 0.354416, + "t" + ], + [ + 0.107034, + "a" + ], + [ + 0.144993, + "r" + ], + [ + 0.463039, + "." + ], + [ + 0.352906, + "g" + ], + [ + 0.133262, + "z" + ], + [ + 1.083854, + "\u001b[?1l\u001b>" + ], + [ + 0.004197, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001109, + "\u001b]2;borg export-tar --progress ::backup2 backup.tar.gz\u0007" + ], + [ + 4.7e-05, + "\u001b]1;borg\u0007" + ], + [ + 0.679042, + "Calculating size \r" + ], + [ + 0.036244, + " 0.0% Processing: Wallpaper/bigcollection/Macaws_USBingImage.jpg \r" + ], + [ + 0.020857, + " 0.1% Processing: Wallpaper/bigcollection/_A...r_the_town_of_Herrischried.jpg\r" + ], + [ + 0.030544, + " 0.2% Processing: Wallpaper/bigcollection/_A...her_Atlas__Marokko____Doug.jpg\r" + ], + [ + 0.030864, + " 0.3% Processing: Wallpaper/bigcollection/_A...ssar__S_d-Sulawesi__Indone.jpg\r" + ], + [ + 0.030144, + " 0.4% Processing: Wallpaper/bigcollection/_A..._N_he_von_Timimoun__Algeri.jpg\r" + ], + [ + 0.027643, + " 0.5% Processing: Wallpaper/bigcollection/_A...t_kleinen__aus_Servietten_.jpg\r" + ], + [ + 0.03121, + " 0.6% Processing: Wallpaper/bigcollection/_A...g_Norderoog__small_island_.jpg\r" + ], + [ + 0.031343, + " 0.7% Processing: Wallpaper/bigcollection/_A...im_Snowdonia-Nationalpark_.jpg\r" + ], + [ + 0.031862, + " 0.8% Processing: Wallpaper/bigcollection/_A...l_an_einem_Wasserloch_im_Q.jpg\r" + ], + [ + 0.034847, + " 0.9% Processing: Wallpaper/bigcollection/_A...nten____James_HagerOffset_.jpg\r" + ], + [ + 0.033989, + " 1.0% Processing: Wallpaper/bigcollection/_A...nen__Masai_Mara_National_R.jpg\r" + ], + [ + 0.027388, + " 1.1% Processing: Wallpaper/bigcollection/_A...ard_im_Londolozi-Wildreser.jpg\r" + ], + [ + 0.026632, + " 1.2% Processing: Wallpaper/bigcollection/_A...orning_fog__Aidling__Pfaff.jpg\r" + ], + [ + 0.030864, + " 1.3% Processing: Wallpaper/bigcollection/_A...hutzgebiet_Lewa_Wildlife_C.jpg\r" + ], + [ + 0.029943, + " 1.4% Processing: Wallpaper/bigcollection/_A...in_Delhi__Indien____AirPan.jpg\r" + ], + [ + 0.035404, + " 1.5% Processing: Wallpaper/bigcollection/_A...morial_Gardens_in_den_Dand.jpg\r" + ], + [ + 0.030931, + " 1.6% Processing: Wallpaper/bigcollection/_A...rthumberland__England____A.jpg\r" + ], + [ + 0.035605, + " 1.7% Processing: Wallpaper/bigcollection/_A...berg__Bayern__Deutschland_.jpg\r" + ], + [ + 0.026827, + " 1.8% Processing: Wallpaper/bigcollection/_A...ns_am_Little_Missouri_Rive.jpg\r" + ], + [ + 0.030196, + " 1.9% Processing: Wallpaper/bigcollection/_A...toberfest_in_Munich__Ger 1.jpg\r" + ], + [ + 0.025763, + " 2.0% Processing: Wallpaper/bigcollection/_A...toberfest_in_Munich__Ger 2.jpg\r" + ], + [ + 0.025306, + " 2.1% Processing: Wallpaper/bigcollection/_A...toberfest_in_Munich__Ger 3.jpg\r" + ], + [ + 0.027286, + " 2.2% Processing: Wallpaper/bigcollection/_A...Oktoberfest_in_Munich__Ger.jpg\r" + ], + [ + 0.02806, + " 2.3% Processing: Wallpaper/bigcollection/_A..._Florida-Scheibenanemone__.jpg\r" + ], + [ + 0.032994, + " 2.4% Processing: Wallpaper/bigcollection/_A...n__Nationalpark_Ankarafant.jpg\r" + ], + [ + 0.033538, + " 2.5% Processing: Wallpaper/bigcollection/_A..._der_N_he_von_Page__Arizon.jpg\r" + ], + [ + 0.030034, + " 2.6% Processing: Wallpaper/bigcollection/_A...r_Inselgruppe_L_archipel_d.jpg\r" + ], + [ + 0.030477, + " 2.7% Processing: Wallpaper/bigcollection/_A...land____Hercules_MilasAlam.jpg\r" + ], + [ + 0.033376, + " 2.8% Processing: Wallpaper/bigcollection/_A...maguchiFlickr_OpenGetty_Im.jpg\r" + ], + [ + 0.032919, + " 2.9% Processing: Wallpaper/bigcollection/_A...imetersubmillimeter_Array_.jpg\r" + ], + [ + 0.027034, + " 4.7% Processing: Wallpaper/bigcollection/_B...rairie_Creek_Redwoods_Stat.jpg\r" + ], + [ + 0.034892, + " 4.8% Processing: Wallpaper/bigcollection/_B...__Montana__USA____Jeff_Kro.jpg\r" + ], + [ + 0.031042, + " 4.9% Processing: Wallpaper/bigcollection/_B...gatta__Golf_von_Triest__It.jpg\r" + ], + [ + 0.030521, + " 5.0% Processing: Wallpaper/bigcollection/_B...and__Schleswig-Holstein__D.jpg\r" + ], + [ + 0.028755, + " 5.1% Processing: Wallpaper/bigcollection/_B..._Islands__Irland____Bart_B.jpg\r" + ], + [ + 0.031129, + " 5.2% Processing: Wallpaper/bigcollection/_B...e_im_Glacier-Nationalpark_.jpg\r" + ], + [ + 0.032588, + " 5.3% Processing: Wallpaper/bigcollection/_B...Nationalpark_Bayerischer_W.jpg\r" + ], + [ + 0.025077, + " 5.4% Processing: Wallpaper/bigcollection/_B...Arena_bei_Nacht__Stockholm.jpg\r" + ], + [ + 0.027803, + " 5.5% Processing: Wallpaper/bigcollection/_B...ner_Fernsehturm_w_hrend_de.jpg\r" + ], + [ + 0.031262, + " 5.6% Processing: Wallpaper/bigcollection/_B...nd__Bayern__Deutschland_mi.jpg\r" + ], + [ + 0.031721, + " 5.7% Processing: Wallpaper/bigcollection/_B...er__Schwarzwald__Baden-W_r.jpg\r" + ], + [ + 0.032768, + " 5.8% Processing: Wallpaper/bigcollection/_B...ebirge_oberhalb_der_Dad_s-.jpg\r" + ], + [ + 0.030763, + " 5.9% Processing: Wallpaper/bigcollection/_B...ngerburgbahn__Innsbruck___.jpg\r" + ], + [ + 0.028673, + " 7.6% Processing: Wallpaper/bigcollection/_B...rn_des__Wilson_Stump___ein.jpg\r" + ], + [ + 0.029182, + " 7.7% Processing: Wallpaper/bigcollection/_B...t_Jefferson-Wildschutzgebi.jpg\r" + ], + [ + 0.029225, + " 11.2% Processing: Wallpaper/bigcollection/_B...Saloum-Delta__Senegal____B.jpg\r" + ], + [ + 0.030837, + " 11.3% Processing: Wallpaper/bigcollection/_B..._Venedig__Italien____Digit.jpg\r" + ], + [ + 0.034033, + " 11.4% Processing: Wallpaper/bigcollection/_B..._Koblenz_und_Trier__Rheinl.jpg\r" + ], + [ + 0.028958, + " 11.5% Processing: Wallpaper/bigcollection/_B..._Baden-W_rttemberg__Deutsc.jpg\r" + ], + [ + 0.025933, + " 11.6% Processing: Wallpaper/bigcollection/_B..._Bisingen__Baden-W_rttembe.jpg\r" + ], + [ + 0.030318, + " 11.7% Processing: Wallpaper/bigcollection/_B...in_Koknese__Lettland____An.jpg\r" + ], + [ + 0.029535, + " 11.8% Processing: Wallpaper/bigcollection/_C...__Deutschland____R_diger_H.jpg\r" + ], + [ + 0.032432, + " 11.9% Processing: Wallpaper/bigcollection/_C...Toulouse__D_partement_Haut.jpg\r" + ], + [ + 0.032966, + " 12.0% Processing: Wallpaper/bigcollection/_C...pring__Germany____Boris_St.jpg\r" + ], + [ + 0.024881, + " 12.1% Processing: Wallpaper/bigcollection/_C...pring__Germany____Boris_St.jpg\r" + ], + [ + 0.02818, + " 12.2% Processing: Wallpaper/bigcollection/_C...Mallorca__Balearische_Inse.jpg\r" + ], + [ + 0.029353, + " 12.3% Processing: Wallpaper/bigcollection/_C...A__ESA__N._Smith__Universi.jpg\r" + ], + [ + 0.03626, + " 12.4% Processing: Wallpaper/bigcollection/_C...gebr_cke_bei_Ballintoy__Co.jpg\r" + ], + [ + 0.025838, + " 12.5% Processing: Wallpaper/bigcollection/_C...gebr_cke_bei_Ballintoy__Co.jpg\r" + ], + [ + 0.027176, + " 12.6% Processing: Wallpaper/bigcollection/_C...lona__Spanien____Nora_De_A.jpg\r" + ], + [ + 0.0298, + " 12.7% Processing: Wallpaper/bigcollection/_C...rcia__Nationalpark_Monti_S.jpg\r" + ], + [ + 0.027672, + " 12.8% Processing: Wallpaper/bigcollection/_C...vinz_Potenza__Italien____F.jpg\r" + ], + [ + 0.032259, + " 12.9% Processing: Wallpaper/bigcollection/_C...semite-Nationalpark__Kalif.jpg\r" + ], + [ + 0.031451, + " 13.0% Processing: Wallpaper/bigcollection/_C...um_Ludwig__Cologne__German.jpg\r" + ], + [ + 0.030096, + " 13.1% Processing: Wallpaper/bigcollection/_C..._Ludwig__Cologne__North_ 1.jpg\r" + ], + [ + 0.028235, + " 15.1% Processing: Wallpaper/bigcollection/_D...n_Hannover_bei_Nacht____Ma.jpg\r" + ], + [ + 0.028761, + " 15.2% Processing: Wallpaper/bigcollection/_D...rieb_befindliche_Opernhaus.jpg\r" + ], + [ + 0.027439, + " 15.3% Processing: Wallpaper/bigcollection/_D...esert_View_Watchtower__Gra.jpg\r" + ], + [ + 0.028598, + " 15.4% Processing: Wallpaper/bigcollection/_D..._Provinz_Shaanxi__Volksrep.jpg\r" + ], + [ + 0.031617, + " 15.5% Processing: Wallpaper/bigcollection/_D...gr__t_den_Hund_Sudo_in_Mel.jpg\r" + ], + [ + 0.032865, + " 17.5% Processing: Wallpaper/bigcollection/_D...s_du_Tarn__Nationalpark_Ce.jpg\r" + ], + [ + 0.031736, + " 17.6% Processing: Wallpaper/bigcollection/_D..._he_von_Sens__D_partement_.jpg\r" + ], + [ + 0.030474, + " 17.7% Processing: Wallpaper/bigcollection/_D...__Wales__Vereinigtes_K_nig.jpg\r" + ], + [ + 0.026112, + " 20.5% Processing: Wallpaper/bigcollection/_E...Junges_schnuppert_an_einer.jpg\r" + ], + [ + 0.027898, + " 20.6% Processing: Wallpaper/bigcollection/_E...chen_versteckt_sich_in_ein.jpg\r" + ], + [ + 0.027202, + " 20.7% Processing: Wallpaper/bigcollection/_E...r_Frosch_in_einem_Wassertr.jpg\r" + ], + [ + 0.027615, + " 20.8% Processing: Wallpaper/bigcollection/_E...ekorierter_Saguaro-Kaktus_.jpg\r" + ], + [ + 0.028446, + " 20.9% Processing: Wallpaper/bigcollection/_E...e__berquert_den_Luangwa-Fl.jpg\r" + ], + [ + 0.031808, + " 21.0% Processing: Wallpaper/bigcollection/_E...ngstunnel_zur_Felsenkirche.jpg\r" + ], + [ + 0.031065, + " 22.7% Processing: Wallpaper/bigcollection/_E...n_Koblenz_and_Trier__Germa.jpg\r" + ], + [ + 0.033059, + " 22.8% Processing: Wallpaper/bigcollection/_E...n_Angola_und_Namibia____Fr.jpg\r" + ], + [ + 0.035115, + " 22.9% Processing: Wallpaper/bigcollection/_E...r_Olympischen_Spiele_1896_.jpg\r" + ], + [ + 0.032507, + " 23.0% Processing: Wallpaper/bigcollection/_E..._Fr_hlingskrokus__Almwiese.jpg\r" + ], + [ + 0.028219, + " 23.1% Processing: Wallpaper/bigcollection/_E...in_der_Meeresbucht_Cathedr.jpg\r" + ], + [ + 0.029551, + " 23.2% Processing: Wallpaper/bigcollection/_E..._Nationalpark_Bayerischer_.jpg\r" + ], + [ + 0.02746, + " 23.3% Processing: Wallpaper/bigcollection/_E...im_Nationalpark_Bayerische.jpg\r" + ], + [ + 0.028081, + " 23.4% Processing: Wallpaper/bigcollection/_E...im__umava-Nationalpark__Ts.jpg\r" + ], + [ + 0.027796, + " 23.5% Processing: Wallpaper/bigcollection/_E..._Emsland__Germany____Erh 1.jpg\r" + ], + [ + 0.026053, + " 25.4% Processing: Wallpaper/bigcollection/_F...chersee_J_kuls_rl_n__Islan.jpg\r" + ], + [ + 0.029312, + " 25.5% Processing: Wallpaper/bigcollection/_F...Yellowstone_Nationalpark__.jpg\r" + ], + [ + 0.029189, + " 25.6% Processing: Wallpaper/bigcollection/_F...yi__Provinz_Phang-nga__Tha.jpg\r" + ], + [ + 0.029535, + " 25.7% Processing: Wallpaper/bigcollection/_F..._Tree_River__Kitikmeot_Reg.jpg\r" + ], + [ + 0.031935, + " 25.8% Processing: Wallpaper/bigcollection/_F...ystad__Niederlande____Erns.jpg\r" + ], + [ + 0.034076, + " 25.9% Processing: Wallpaper/bigcollection/_F...kyline_von_Baku__Aserbaids.jpg\r" + ], + [ + 0.028655, + " 26.0% Processing: Wallpaper/bigcollection/_F..._New_York_City__Bundesstaa.jpg\r" + ], + [ + 0.030152, + " 26.1% Processing: Wallpaper/bigcollection/_F...wals__Cierva_Cove__Antarkt.jpg\r" + ], + [ + 0.030983, + " 26.2% Processing: Wallpaper/bigcollection/_F..._des_Norman_River__Queensl.jpg\r" + ], + [ + 0.027019, + " 27.4% Processing: Wallpaper/bigcollection/_G..._Ger_llhang_im_Rondane-Nat.jpg\r" + ], + [ + 0.027058, + " 27.5% Processing: Wallpaper/bigcollection/_G...tzgebiet_Sacramento_Nation.jpg\r" + ], + [ + 0.038515, + " 27.6% Processing: Wallpaper/bigcollection/_G...Villandry__Loiretal__Frank.jpg\r" + ], + [ + 0.024219, + " 27.7% Processing: Wallpaper/bigcollection/_G...Villandry__Loiretal__Frank.jpg\r" + ], + [ + 0.028063, + " 27.8% Processing: Wallpaper/bigcollection/_G...__Champion-Insel__Floreana.jpg\r" + ], + [ + 0.030237, + " 27.9% Processing: Wallpaper/bigcollection/_G...__R_bida__Gal_pagosinseln_.jpg\r" + ], + [ + 0.031455, + " 28.0% Processing: Wallpaper/bigcollection/_G...c-Nationalpark__Alaska__US.jpg\r" + ], + [ + 0.028409, + " 28.1% Processing: Wallpaper/bigcollection/_G...um_Bridge__Newcastle_upon_.jpg\r" + ], + [ + 0.031595, + " 28.2% Processing: Wallpaper/bigcollection/_G..._Kanal_in_Venedig__Italien.jpg\r" + ], + [ + 0.031079, + " 28.3% Processing: Wallpaper/bigcollection/_G..._Rock_Canyon__Waterton-Lak.jpg\r" + ], + [ + 0.028272, + " 30.5% Processing: Wallpaper/bigcollection/_G...oos_in_der_Gro_aufnahme___.jpg\r" + ], + [ + 0.034208, + " 30.6% Processing: Wallpaper/bigcollection/_G...iesel__Bayern__Deutschland.jpg\r" + ], + [ + 0.034016, + " 30.7% Processing: Wallpaper/bigcollection/_G...__ber_dem_Thunersee__Berne.jpg\r" + ], + [ + 0.0292, + " 30.8% Processing: Wallpaper/bigcollection/_G...ell-St.-Elias-Nationalpark.jpg\r" + ], + [ + 0.024942, + " 32.8% Processing: Wallpaper/bigcollection/_G..._bei_Mettlach__Saarland__D.jpg\r" + ], + [ + 0.031677, + " 32.9% Processing: Wallpaper/bigcollection/_G...ngxia-Zuchtstation__Ya_an_.jpg\r" + ], + [ + 0.031108, + " 33.9% Processing: Wallpaper/bigcollection/_H...kaido__Japan____JTB_Media_.jpg\r" + ], + [ + 0.030964, + " 35.9% Processing: Wallpaper/bigcollection/_H...ew_RussellVisuals_Unlimite.jpg\r" + ], + [ + 0.026577, + " 38.9% Processing: Wallpaper/bigcollection/_I...eutschen_Doms__Gendarmenma.jpg\r" + ], + [ + 0.031898, + " 39.0% Processing: Wallpaper/bigcollection/_I...ukuoka_Tower__Fukuoka__Jap.jpg\r" + ], + [ + 0.031693, + " 39.1% Processing: Wallpaper/bigcollection/_I...__Bermeo__Provinz_Bizkaia_.jpg\r" + ], + [ + 0.026825, + " 39.2% Processing: Wallpaper/bigcollection/_I...P_tzcuaro-See__Bundesstaat.jpg\r" + ], + [ + 0.030749, + " 41.0% Processing: Wallpaper/bigcollection/_J...ia-Nationalpark__Maine__US.jpg\r" + ], + [ + 0.032301, + " 41.1% Processing: Wallpaper/bigcollection/_J..._im_Moremi_Game_Reserve__O.jpg\r" + ], + [ + 0.031689, + " 42.2% Processing: Wallpaper/bigcollection/_K...n_in_der_Antarktis____Jan_.jpg\r" + ], + [ + 0.029222, + " 42.3% Processing: Wallpaper/bigcollection/_K...e_Washington__Antarktis___.jpg\r" + ], + [ + 0.174039, + " 42.4% Processing: Wallpaper/bigcollection/_K...K_ken__Snow_Hill_Island__A.jpg\r" + ], + [ + 0.03322, + " 42.5% Processing: Wallpaper/bigcollection/_K...SCO-Welterbest_tte__Trier_.jpg\r" + ], + [ + 0.031657, + " 43.4% Processing: Wallpaper/bigcollection/_K...ufort__South_Carolina__USA.jpg\r" + ], + [ + 0.026738, + " 43.5% Processing: Wallpaper/bigcollection/_K...chfang_vor_Port_St._Johns_.jpg\r" + ], + [ + 0.033834, + " 44.4% Processing: Wallpaper/bigcollection/_K...eide__Schottisches_Hochlan.jpg\r" + ], + [ + 0.034061, + " 44.5% Processing: Wallpaper/bigcollection/_K...m_Schlossgarten_Schwetzing.jpg\r" + ], + [ + 0.033845, + " 44.6% Processing: Wallpaper/bigcollection/_K...dscha__Kachetien__Georgien.jpg\r" + ], + [ + 0.031383, + " 44.7% Processing: Wallpaper/bigcollection/_K..._Baden-W_rttemberg__Deutsc.jpg\r" + ], + [ + 0.027515, + " 44.8% Processing: Wallpaper/bigcollection/_K..._Zanskar__Region_Ladakh__B.jpg\r" + ], + [ + 0.031935, + " 44.9% Processing: Wallpaper/bigcollection/_K...Meteora__Griechenland____S.jpg\r" + ], + [ + 0.030994, + " 45.0% Processing: Wallpaper/bigcollection/_K...-Ville__Belgien____Patty_P.jpg\r" + ], + [ + 0.031632, + " 46.8% Processing: Wallpaper/bigcollection/_L...ionalpark__Simbabwe____Jer.jpg\r" + ], + [ + 0.032645, + " 46.9% Processing: Wallpaper/bigcollection/_L...Hochland_von_Cuenca__Auton.jpg\r" + ], + [ + 0.028682, + " 47.0% Processing: Wallpaper/bigcollection/_L...Hochland_von_Cuenca__Auton.jpg\r" + ], + [ + 0.030087, + " 47.1% Processing: Wallpaper/bigcollection/_L...__Axel_Flasbarth500px_____.jpg\r" + ], + [ + 0.030684, + " 47.2% Processing: Wallpaper/bigcollection/_L...athedrale_von_Chartres__Fr.jpg\r" + ], + [ + 0.029522, + " 47.3% Processing: Wallpaper/bigcollection/_L...und_Aiguilles_de_Chamonix_.jpg\r" + ], + [ + 0.032174, + " 47.4% Processing: Wallpaper/bigcollection/_L...Nutthavood_Punpeng500px___.jpg\r" + ], + [ + 0.029075, + " 47.5% Processing: Wallpaper/bigcollection/_L...nd__Great_Barrier_Reef__Au.jpg\r" + ], + [ + 0.028973, + " 47.6% Processing: Wallpaper/bigcollection/_L...__Insel_Corvo__Portugal___.jpg\r" + ], + [ + 0.030047, + " 47.7% Processing: Wallpaper/bigcollection/_L...ationalpark__British_Colum.jpg\r" + ], + [ + 0.031497, + " 49.3% Processing: Wallpaper/bigcollection/_L...hof__Great_Court__des_Brit.jpg\r" + ], + [ + 0.029466, + " 49.4% Processing: Wallpaper/bigcollection/_L...em_Wald_auf_der_Insel_Shik.jpg\r" + ], + [ + 0.025178, + " 49.5% Processing: Wallpaper/bigcollection/_L...er_K_ste_von_Ixtapa_Zihuat.jpg\r" + ], + [ + 0.030228, + " 49.6% Processing: Wallpaper/bigcollection/_L...e_Itapu__in_Salvador__Bahi.jpg\r" + ], + [ + 0.027644, + " 49.7% Processing: Wallpaper/bigcollection/_L...l_Point_in_der_N_he_von_Po.jpg\r" + ], + [ + 0.026513, + " 49.8% Processing: Wallpaper/bigcollection/_L...eversand__Westerhever__Sch.jpg\r" + ], + [ + 0.032316, + " 49.9% Processing: Wallpaper/bigcollection/_L...i__Provinz_Jiangsu__Volksr.jpg\r" + ], + [ + 0.026983, + " 50.0% Processing: Wallpaper/bigcollection/_L...g__aufgenommen_von_der_Int.jpg\r" + ], + [ + 0.03107, + " 51.7% Processing: Wallpaper/bigcollection/_M..._Cay__Exuma__Bahamas____Ji.jpg\r" + ], + [ + 0.028123, + " 51.8% Processing: Wallpaper/bigcollection/_M...ationalpark_Jardines_de_la.jpg\r" + ], + [ + 0.028547, + " 51.9% Processing: Wallpaper/bigcollection/_M...au____WaterFrameAlamy_____.jpg\r" + ], + [ + 0.030092, + " 53.1% Processing: Wallpaper/bigcollection/_M...ands-Nationalpark__Utah__U.jpg\r" + ], + [ + 0.027589, + " 53.2% Processing: Wallpaper/bigcollection/_M...useum_in_den_Wolken__Monte.jpg\r" + ], + [ + 0.029779, + " 53.3% Processing: Wallpaper/bigcollection/_M...Plaza_de_la_Encarnaci_n__S.jpg\r" + ], + [ + 0.031154, + " 54.6% Processing: Wallpaper/bigcollection/_M...lmie_National_Forest__Bund.jpg\r" + ], + [ + 0.03317, + " 54.7% Processing: Wallpaper/bigcollection/_M...t_Edziza_Provincial_Park__.jpg\r" + ], + [ + 0.031631, + " 54.8% Processing: Wallpaper/bigcollection/_M...__Washington__USA____Diane.jpg\r" + ], + [ + 0.025722, + " 56.1% Processing: Wallpaper/bigcollection/_N..._K_ste_des_Atlantischen_Oz.jpg\r" + ], + [ + 0.029888, + " 56.2% Processing: Wallpaper/bigcollection/_N...hee__Schiras__Iran____R.Cr.jpg\r" + ], + [ + 0.022761, + " 57.5% Processing: Wallpaper/bigcollection/_N...Fischotter_im_Yellowstone-.jpg\r" + ], + [ + 0.030469, + " 57.6% Processing: Wallpaper/bigcollection/_N..._Baumstachler____Minden_Pi.jpg\r" + ], + [ + 0.032258, + " 58.9% Processing: Wallpaper/bigcollection/_O...-Park__Bomarzo__Italien___.jpg\r" + ], + [ + 0.028556, + " 59.0% Processing: Wallpaper/bigcollection/_O...-Park__Bomarzo__Italien___.jpg\r" + ], + [ + 0.029665, + " 60.4% Processing: Wallpaper/bigcollection/_P..._der_Boardman_Tree_Farm__B.jpg\r" + ], + [ + 0.030072, + " 60.5% Processing: Wallpaper/bigcollection/_P...o-Ebene__Italien____Eddy_G.jpg\r" + ], + [ + 0.034601, + " 60.6% Processing: Wallpaper/bigcollection/_P...nem_Karnevalswagen_beim_Ro.jpg\r" + ], + [ + 0.029305, + " 61.9% Processing: Wallpaper/bigcollection/_P...der_argentinischen_Atlanti.jpg\r" + ], + [ + 0.03045, + " 62.0% Processing: Wallpaper/bigcollection/_P...m__Pilsum__Niedersachsen__.jpg\r" + ], + [ + 0.02941, + " 63.4% Processing: Wallpaper/bigcollection/_P...rk_Torres_del_Paine__Chile.jpg\r" + ], + [ + 0.033345, + " 63.5% Processing: Wallpaper/bigcollection/_P...i-Nationalpark__New_South_.jpg\r" + ], + [ + 0.031818, + " 64.9% Processing: Wallpaper/bigcollection/_R...ationalpark_Sarek__Schwede.jpg\r" + ], + [ + 0.025656, + " 65.0% Processing: Wallpaper/bigcollection/_R...ationalpark_Sarek__Schwede.jpg\r" + ], + [ + 0.030751, + " 66.6% Processing: Wallpaper/bigcollection/_R...nyang__Provinz_Yunnan__Chi.jpg\r" + ], + [ + 0.030313, + " 66.7% Processing: Wallpaper/bigcollection/_R...n_Ludwig_XIV._auf_dem_Plac.jpg\r" + ], + [ + 0.032915, + " 68.6% Processing: Wallpaper/bigcollection/_R...r____Getty_Images______Bin.jpg\r" + ], + [ + 0.029504, + " 70.1% Processing: Wallpaper/bigcollection/_S...tional_Park__Germany____ 3.jpg\r" + ], + [ + 0.026571, + " 70.2% Processing: Wallpaper/bigcollection/_S...tional_Park__Germany____ 4.jpg\r" + ], + [ + 0.032136, + " 71.7% Processing: Wallpaper/bigcollection/_S...e_t_sich_als_Wasserfall_vo.jpg\r" + ], + [ + 0.032883, + " 72.6% Processing: Wallpaper/bigcollection/_S...riehunde_im_Wind_Cave_Nati.jpg\r" + ], + [ + 0.031602, + " 72.7% Processing: Wallpaper/bigcollection/_S...erkstatt__Hexenlochm_hle__.jpg\r" + ], + [ + 0.030634, + " 73.6% Processing: Wallpaper/bigcollection/_S...en_in_der_Son_Doong-H_hle_.jpg\r" + ], + [ + 0.027026, + " 74.5% Processing: Wallpaper/bigcollection/_S..._at_sunset__Attendorn__Sau.jpg\r" + ], + [ + 0.038777, + " 75.4% Processing: Wallpaper/bigcollection/_S..._Dartmoor-Nationalpark__De.jpg\r" + ], + [ + 0.027422, + " 75.5% Processing: Wallpaper/bigcollection/_S..._der_Halong-Bucht__Vietnam.jpg\r" + ], + [ + 0.027539, + " 76.3% Processing: Wallpaper/bigcollection/_S...em_See__Bergpark_Wilhelmsh.jpg\r" + ], + [ + 0.031058, + " 76.4% Processing: Wallpaper/bigcollection/_S...ge_in_den_Ausl_ufern_der_R.jpg\r" + ], + + [ + 0.036506, + " 77.6% Processing: Wallpaper/bigcollection/_S..._Geothermalgebiet_Haukadal.jpg\r" + ], + [ + 0.025063, + " 77.7% Processing: Wallpaper/bigcollection/_S...ampagne-Ardennes__Frankrei.jpg\r" + ], + [ + 0.029054, + " 77.8% Processing: Wallpaper/bigcollection/_S...r__ber_West_Point__Nebrask.jpg\r" + ], + [ + 0.028908, + " 77.9% Processing: Wallpaper/bigcollection/_S...n-Bodenstation__Longyearby.jpg\r" + ], + [ + 0.029276, + " 78.0% Processing: Wallpaper/bigcollection/_S..._und_Solidarit_t_Kerzen__K.jpg\r" + ], + [ + 0.024812, + " 79.0% Processing: Wallpaper/bigcollection/_T..._Blatt_eines_Per_ckenstrau.jpg\r" + ], + [ + 0.031898, + " 80.0% Processing: Wallpaper/bigcollection/_T...er_Bavaria__Germany____F 3.jpg\r" + ], + [ + 0.029189, + " 80.1% Processing: Wallpaper/bigcollection/_T...pper_Bavaria__Germany____F.jpg\r" + ], + [ + 0.028065, + " 81.2% Processing: Wallpaper/bigcollection/_T...Image_BrokerRex_Features 1.jpg\r" + ], + [ + 0.03116, + " 81.3% Processing: Wallpaper/bigcollection/_T...n__Baden-W_rttemberg__Deut.jpg\r" + ], + [ + 0.026524, + " 82.3% Processing: Wallpaper/bigcollection/_U..._seltene_Blattschwanzgecko.jpg\r" + ], + [ + 0.028383, + " 82.4% Processing: Wallpaper/bigcollection/_V...en_und_Altstadt_von_Chania.jpg\r" + ], + [ + 0.032476, + " 83.5% Processing: Wallpaper/bigcollection/_V..._Hirta__St._Kilda__Schottl.jpg\r" + ], + [ + 0.030701, + " 84.7% Processing: Wallpaper/bigcollection/_W...wald__Insel_Sula__Solund__.jpg\r" + ], + [ + 0.034129, + " 84.8% Processing: Wallpaper/bigcollection/_W...nschafe__Kanton_Wallis__ 1.jpg\r" + ], + [ + 0.03033, + " 85.8% Processing: Wallpaper/bigcollection/_W...ionalpark_Plitvicer_Seen__.jpg\r" + ], + + [ + 0.031761, + " 87.2% Processing: Wallpaper/bigcollection/_W..._N_he_von_Ca_amares__Provi.jpg\r" + ], + [ + 0.031627, + " 87.3% Processing: Wallpaper/bigcollection/_W..._N_he_von_Cuenca__Spanien_.jpg\r" + ], + [ + 0.024242, + " 88.3% Processing: Wallpaper/bigcollection/_W...jeu__D_partement_Rh_ne__Re.jpg\r" + ], + [ + 0.027362, + " 89.3% Processing: Wallpaper/bigcollection/_W...guna_Colorada__Bolivien___.jpg\r" + ], + [ + 0.031448, + " 90.5% Processing: Wallpaper/bigcollection/_Z..._von_Autobahnen_in_Bangkok.jpg\r" + ], + [ + 0.027535, + " 90.6% Processing: Wallpaper/bigcollection/_Z...abara-Bucht__Rio_de_Janeir.jpg\r" + ], + [ + 0.025329, + " 92.1% Processing: Wallpaper/bigcollection/__...ptur_der_Landart-K_nstleri.jpg\r" + ], + [ + 0.044106, + " 92.2% Processing: Wallpaper/bigcollection/__...__Magic_Mountain_-Landmark.jpg\r" + ], + [ + 0.03068, + " 93.5% Processing: Wallpaper/bigcollection/_F...rte_Marina_Bay_zum_50._Nat.jpg\r" + ], + [ + 0.031039, + " 93.6% Processing: Wallpaper/bigcollection/_H...ing_Crane_Pond_Conservancy.jpg\r" + ], + [ + 0.020685, + " 95.0% Processing: Wallpaper/2048example/Palo...t_by_Beth___Jeremy_Jonkman.jpg\r" + ], + [ + 0.019863, + " 96.3% Processing: Wallpaper/evenmore/ChipDE ...jpg \r" + ], + [ + 0.056069, + " 96.4% Processing: Wallpaper/evenmore/ChipDE ...jpg \r" + ], + [ + 0.049869, + " 97.4% Processing: Wallpaper/evenmore/ChipDE 06.jpg \r" + ], + [ + 0.021021, + " 97.5% Processing: Wallpaper/evenmore/ChipDE ...jpg \r" + ], + [ + 0.019135, + " 98.4% Processing: Wallpaper/evenmore/ChipDE ...jpg \r" + ], + [ + 0.021483, + " 99.6% Processing: Wallpaper/deer.jpg ... \r" + ], + [ + 0.021593, + " 99.7% Processing: Wallpaper/deer.jpg ... \r" + ], + [ + 0.02037, + " 99.8% Processing: Wallpaper/deer.jpg ... \r" + ], + [ + 0.027858, + " 99.9% Processing: Wallpaper/deer.jpg ... \r" + ], + [ + 0.020864, + " \r" + ], + [ + 0.077955, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001068, + "\u001b]7;\u0007" + ], + [ + 0.000836, + "\u001b]7;\u0007" + ], + [ + 0.000104, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000234, + "\u001b[?2004h" + ], + [ + 2.471911, + "\u001b[32ml\u001b[39m" + ], + [ + 0.102688, + "\b\u001b[32ml\u001b[32ms\u001b[39m" + ], + [ + 0.272296, + " " + ], + [ + 0.220114, + "-" + ], + [ + 0.157165, + "l" + ], + [ + 0.074368, + "a" + ], + [ + 0.353976, + "\u001b[?1l\u001b>" + ], + [ + 0.000755, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000778, + "\u001b]2;ls --color=tty -la\u0007\u001b]1;ls\u0007" + ], + [ + 0.001633, + "total 573616\r\n" + ], + [ + 1.9e-05, + "drwxr-xr-x. 4 rugk rugk 4096 Jul 16 18:56 \u001b[0m\u001b[38;5;33m.\u001b[0m\r\ndrwxr-x---. 55 rugk rugk 4096 Jul 16 18:57 \u001b[38;5;33m..\u001b[0m\r\ndrwx------. 2 rugk rugk 4096 Jul 14 21:57 \u001b[38;5;33mWallpaper\u001b[0m\r\ndrwxr-xr-x. 6 rugk rugk 4096 Jul 14 21:55 \u001b[38;5;33mWallpaper.orig\u001b[0m\r\n-rw-------. 1 rugk rugk 587361454 Jul 16 18:57 \u001b[38;5;9mbackup.tar.gz\u001b[0m\r\n" + ], + [ + 0.000404, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001103, + "\u001b]7;\u0007" + ], + [ + 0.000992, + "\u001b]7;\u0007" + ], + [ + 7.3e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000158, + "\u001b[?2004h" + ], + [ + 3.04506, + "\u001b[?1l\u001b>" + ], + [ + 0.000385, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000485, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001512, + "\u001b]7;\u0007" + ], + [ + 0.001245, + "\u001b]7;\u0007" + ], + [ + 6.9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000129, + "\u001b[?1h\u001b=" + ], + [ + 0.000247, + "\u001b[?2004h" + ], + [ + 0.325892, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.228892, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.186392, + "\b\b\u001b[1m#\u001b[1m \u001b[1mY\u001b[0m\u001b[39m" + ], + [ + 0.112073, + "\b\u001b[1mY\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.139024, + "\b\u001b[1mo\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.151793, + "\b\u001b[1mu\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.106484, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.147932, + "\b\u001b[1mc\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.181458, + "\b\u001b[1ma\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.137456, + "\b\u001b[1mn\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.21885, + "\b\u001b[1m \u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.170788, + "\b\u001b[1me\u001b[1mv\u001b[0m\u001b[39m" + ], + [ + 0.133285, + "\b\u001b[1mv\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.28717, + "\b\u001b[1me\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.485291, + "\b\u001b[1mn\u001b[1m mount an archive or even the whole repository:\u001b[0m\u001b[39m" + ], + [ + 1.036008, + "\u001b[?1l\u001b>" + ], + [ + 0.001535, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001777, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.002934, + "\u001b]7;\u0007" + ], + [ + 0.002695, + "\u001b]7;\u0007" + ], + [ + 0.00014, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 9.4e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000881, + "\u001b[?2004h" + ], + [ + 1.264493, + "\u001b[1m\u001b[31mm\u001b[0m\u001b[39m" + ], + [ + 0.158178, + "\b\u001b[1m\u001b[31mm\u001b[1m\u001b[31mk\u001b[0m\u001b[39m" + ], + [ + 0.129344, + "\b\b\u001b[1m\u001b[31mm\u001b[1m\u001b[31mk\u001b[1m\u001b[31md\u001b[0m\u001b[39m" + ], + [ + 0.153746, + "\b\u001b[1m\u001b[31md\u001b[1m\u001b[31mi\u001b[0m\u001b[39m" + ], + [ + 0.106254, + "\b\b\b\b\u001b[0m\u001b[32mm\u001b[0m\u001b[32mk\u001b[0m\u001b[32md\u001b[0m\u001b[32mi\u001b[32mr\u001b[39m" + ], + [ + 0.178794, + " " + ], + [ + 0.328222, + "\u001b[4m/\u001b[24m" + ], + [ + 0.202794, + "\b\u001b[4m/\u001b[4mt\u001b[24m" + ], + [ + 0.246443, + "\b\u001b[4mt\u001b[4mm\u001b[24m" + ], + [ + 0.207634, + "\b\u001b[4mm\u001b[4mp\u001b[24m" + ], + [ + 0.88273, + "\b\u001b[4mp\u001b[4m/\u001b[24m" + ], + [ + 0.339887, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.210076, + "\b\u001b[4mm\u001b[4mo\u001b[24m" + ], + [ + 0.16667, + "\b\b\b\b\b\b\b\u001b[24m/\u001b[24mt\u001b[24mm\u001b[24mp\u001b[24m/\u001b[24mm\u001b[24mou" + ], + [ + 0.141564, + "n" + ], + [ + 0.184, + "t" + ], + [ + 1.4607, + "\u001b[?1l\u001b>" + ], + [ + 0.001306, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000794, + "\u001b]2;mkdir /tmp/mount\u0007" + ], + [ + 6.6e-05, + "\u001b]1;mkdir\u0007" + ], + [ + 0.00176, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00142, + "\u001b]7;\u0007" + ], + [ + 0.001308, + "\u001b]7;\u0007" + ], + [ + 7.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ " + ], + [ + 1.3e-05, + "\u001b[K" + ], + [ + 9.1e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000273, + "\u001b[?2004h" + ], + [ + 1.09686, + "\u001b[4mb\u001b[24m" + ], + [ + 0.187046, + "\b\u001b[24m\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.10907, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.12414, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.187573, + " " + ], + [ + 0.229364, + "m" + ], + [ + 0.195942, + "o" + ], + [ + 0.183861, + "u" + ], + [ + 0.138559, + "n" + ], + [ + 0.207537, + "t" + ], + [ + 1.01571, + " " + ], + [ + 0.55086, + ":" + ], + [ + 0.110713, + ":" + ], + [ + 0.27265, + " " + ], + [ + 0.462869, + "\u001b[4m/\u001b[24m" + ], + [ + 0.795464, + "\b\u001b[4m/\u001b[4mt\u001b[24m" + ], + [ + 0.295092, + "\b\u001b[4mt\u001b[4mm\u001b[24m" + ], + [ + 0.200509, + "\b\u001b[4mm\u001b[4mp\u001b[24m" + ], + [ + 0.878464, + "\b\u001b[4mp\u001b[4m/\u001b[24m" + ], + [ + 0.306666, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.24341, + "\b\u001b[4mm\u001b[4mo\u001b[24m" + ], + [ + 0.166203, + "\b\u001b[4mo\u001b[4mu\u001b[24m" + ], + [ + 0.138953, + "\b\u001b[4mu\u001b[4mn\u001b[24m" + ], + [ + 0.177723, + "\b\u001b[4mn\u001b[4mt\u001b[24m" + ], + [ + 1.371278, + "\u001b[?1l\u001b>" + ], + [ + 0.001184, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000603, + "\u001b]2;borg mount :: /tmp/mount\u0007\u001b]1;borg\u0007" + ], + [ + 0.651025, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001453, + "\u001b]7;\u0007" + ], + [ + 0.000984, + "\u001b]7;\u0007" + ], + [ + 7.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 2.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.0002, + "\u001b[?2004h" + ], + [ + 1.860515, + "\u001b[32ml\u001b[39m" + ], + [ + 0.107896, + "\b\u001b[32ml\u001b[32ms\u001b[39m" + ], + [ + 0.253911, + " " + ], + [ + 0.203092, + "-" + ], + [ + 0.178525, + "l" + ], + [ + 0.111795, + "a" + ], + [ + 0.200138, + " " + ], + [ + 0.353001, + "\u001b[4m/\u001b[24m" + ], + [ + 0.264827, + "\b\u001b[4m/\u001b[4mt\u001b[24m" + ], + [ + 0.205749, + "\b\u001b[4mt\u001b[4mm\u001b[24m" + ], + [ + 0.168679, + "\b\u001b[4mm\u001b[4mp\u001b[24m" + ], + [ + 0.016649, + "\b\b\b\b\u001b[24m/\u001b[24mt\u001b[24mm\u001b[24mp" + ], + [ + 0.712108, + "\b\b\b\b\u001b[4m/\u001b[4mt\u001b[4mm\u001b[4mp\u001b[24m \b" + ], + [ + 0.383057, + "\b\u001b[4mp\u001b[4m/\u001b[24m" + ], + [ + 0.159994, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.187645, + "\b\u001b[4mm\u001b[4mo\u001b[24m" + ], + [ + 0.168813, + "\b\u001b[4mo\u001b[4mu\u001b[24m" + ], + [ + 0.12933, + "\b\u001b[4mu\u001b[4mn\u001b[24m" + ], + [ + 0.421583, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.018359, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mls\u001b[39m -la \u001b[4m/tmp/mount\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.602087, + "\b\b\u001b[4mt\u001b[24m\u001b[0m\u001b[24m \b" + ], + [ + 2.5e-05, + "\u001b[?1l\u001b>" + ], + [ + 0.000874, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000682, + "\u001b]2;ls --color=tty -la /tmp/mount\u0007\u001b]1;ls\u0007" + ], + [ + 0.002495, + "total 0\r\n" + ], + [ + 4.4e-05, + "drwxr-xr-x. 1 rugk rugk 0 Jul 16 18:58 \u001b[0m\u001b[38;5;33m.\u001b[0m\r\ndrwxrwxrwt. 27 root root 660 Jul 16 18:58 \u001b[48;5;10;38;5;16m..\u001b[0m\r\ndrwxr-xr-x. 1 rugk rugk 0 Jul 16 18:58 \u001b[38;5;33mbackup-block-device\u001b[0m\r\ndrwxr-xr-x. 1 rugk rugk 0 Jul 16 18:58 \u001b[38;5;33mbackup1\u001b[0m\r\ndrwxr-xr-x. 1 rugk rugk 0 Jul 16 18:58 \u001b[38;5;33mbackup2\u001b[0m\r\ndrwxr-xr-x. 1 rugk rugk 0 Jul 16 18:58 \u001b[38;5;33mbackup3\u001b[0m\r\ndrwxr-xr-x. 1 rugk rugk 0 Jul 16 18:58 \u001b[38;5;33mrugk-2017-07-16T18:51:34\u001b[0m\r\ndrwxr-xr-x. 1 rugk rugk 0 Jul 16 18:58 \u001b[38;5;33mrugk-2017-07-16T18:52:19\u001b[0m\r\n" + ], + [ + 0.000169, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000984, + "\u001b]7;\u0007" + ], + [ + 0.00097, + "\u001b]7;\u0007" + ], + [ + 2.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000122, + "\u001b[?1h\u001b=" + ], + [ + 0.000251, + "\u001b[?2004h" + ], + [ + 0.482339, + "\u001b[4mb\u001b[24m" + ], + [ + 0.179808, + "\b\u001b[24m\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.105817, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.116974, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.173004, + " " + ], + [ + 0.216291, + "u" + ], + [ + 0.168732, + "m" + ], + [ + 0.228004, + "o" + ], + [ + 0.19268, + "u" + ], + [ + 0.14303, + "n" + ], + [ + 0.175557, + "t" + ], + [ + 0.406596, + " " + ], + [ + 0.69215, + "\u001b[4m/\u001b[24m" + ], + [ + 0.242216, + "\b\u001b[4m/\u001b[4mt\u001b[24m" + ], + [ + 0.260453, + "\b\u001b[4mt\u001b[4mm\u001b[24m" + ], + [ + 0.2605, + "\b\u001b[4mm\u001b[4mp\u001b[24m" + ], + [ + 0.014483, + "\b\b\b\b\u001b[24m/\u001b[24mt\u001b[24mm\u001b[24mp" + ], + [ + 0.597766, + "\b\b\b\b\u001b[4m/\u001b[4mt\u001b[4mm\u001b[4mp\u001b[24m \b" + ], + [ + 0.482551, + "\b\u001b[4mp\u001b[4m/\u001b[24m" + ], + [ + 0.236361, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.212317, + "\b\u001b[4mm\u001b[4mo\u001b[24m" + ], + [ + 0.160611, + "\b\u001b[4mo\u001b[4mu\u001b[24m" + ], + [ + 0.142036, + "\b\u001b[4mu\u001b[4mn\u001b[24m" + ], + [ + 0.335664, + "\b\u001b[4mn\u001b[4mt\u001b[24m" + ], + [ + 1.159614, + "\u001b[?1l\u001b>" + ], + [ + 0.001057, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000642, + "\u001b]2;borg umount /tmp/mount\u0007\u001b]1;borg\u0007" + ], + [ + 0.596849, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.001387, + "\u001b]7;\u0007" + ], + [ + 0.001067, + "\u001b]7;\u0007" + ], + [ + 9.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000187, + "\u001b[?2004h" + ], + [ + 1.467084, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.264583, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.277487, + "\b\b\u001b[1m#\u001b[1m \u001b[1mT\u001b[0m\u001b[39m" + ], + [ + 0.12184, + "\b\u001b[1mT\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.103403, + "\b\u001b[1mh\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.125651, + "\b\u001b[1ma\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.465663, + "\b\u001b[1mt\u001b[1m'\u001b[0m\u001b[39m" + ], + [ + 0.298764, + "\b\u001b[1m'\u001b[1ms it, but of course there is more to explore, so have a look at the d\u001b[1mo\u001b[1mcs.\u001b[0m\u001b[39m\u001b[K\r\r\n\u001b[K\u001b[A\u001b[4C" + ], + [ + 1.453815, + "\u001b[1B\r\u001b[K\u001b[A\u001b[4C" + ], + [ + 2e-05, + "\u001b[?1l\u001b>" + ], + [ + 0.000725, + "\u001b[?2004l\u001b[1B\r\r\n" + ], + [ + 0.00054, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00118, + "\u001b]7;\u0007" + ], + [ + 0.000909, + "\u001b]7;\u0007" + ], + [ + 7.8e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000524, + "\u001b[?2004h" + ], + [ + 1.74407, + "\u001b[?2004l\r\r\n" + ] + ] +} diff --git a/docs/misc/asciinema/advanced.sh b/docs/misc/asciinema/advanced.sh new file mode 100644 index 00000000..e09d7bcb --- /dev/null +++ b/docs/misc/asciinema/advanced.sh @@ -0,0 +1,65 @@ +# For the pro users, here are some advanced features of borg, so you can impress your friends. ;) +# Note: This screencast was made with borg version 1.1.0 – older or newer borg versions may behave differently. + +# First of all, we can use several environment variables for borg. +# E.g. we do not want to type in our repo path and password again and again… +export BORG_REPO='/media/backup/borgdemo' +export BORG_PASSPHRASE='1234' +# Problem solved, borg will use this automatically… :) +# We'll use this right away… + +## ADVANCED CREATION ## + +# We can also use some placeholders in our archive name… +borg create --stats --progress --compression lz4 ::{user}-{now} Wallpaper +# Notice the backup name. + +# And we can put completely different data, with different backup settings, in our backup. It will be deduplicated, anyway: +borg create --stats --progress --compression zlib,6 --exclude ~/Downloads/big ::{user}-{now} ~/Downloads + +# Or let's backup a device via STDIN. +sudo dd if=/dev/loop0 bs=10M | borg create --progress --stats ::specialbackup - + +# Let's continue with some simple things: +## USEFUL COMMANDS ## +# You can show some information about an archive. You can even do it without needing to specify the archive name: +borg info :: --last 1 + +# So let's rename our last archive: +borg rename ::specialbackup backup-block-device + +borg info :: --last 1 + +# A very important step if you choose keyfile mode (where the keyfile is only saved locally) is to export your keyfile and possibly print it, etc. +borg key export :: --qr-code file.html # this creates a nice HTML, but when you want something simpler… +< remove comment > +< let there: borg check > --paper # this is a "manual input"-only backup (but it is also included in the --qr-code option) + +## MAINTENANCE ## +# Sometimes backups get broken or we want a regular "checkup" that everything is okay… +borg check -v :: + +# Next problem: Usually you do not have infinite disk space. So you may need to prune your archive… +# You can tune this in every detail. See the docs for details. Here only a simple example: +borg prune --list --keep-last 1 --dry-run +# When actually executing it in a script, you have to use it without the --dry-run option, of course. + +## RESTORE ## + +# When you want to see the diff between two archives use this command. +# E.g. what happened between the first two backups? +borg diff ::backup1 backup2 +# Ah, we added a file, right… + +# There are also other ways to extract the data. +# E.g. as a tar archive. +borg export-tar --progress ::backup2 backup.tar.gz +ls -l + +# You can mount an archive or even the whole repository: +mkdir /tmp/mount +borg mount :: /tmp/mount +ls -la /tmp/mount +borg umount /tmp/mount + +# That's it, but of course there is more to explore, so have a look at the docs. diff --git a/docs/misc/asciinema/basic.json b/docs/misc/asciinema/basic.json new file mode 100644 index 00000000..9eaf8935 --- /dev/null +++ b/docs/misc/asciinema/basic.json @@ -0,0 +1,4862 @@ +{ + "version": 1, + "width": 78, + "height": 25, + "duration": 379.234504, + "command": null, + "title": null, + "env": { + "TERM": "xterm-256color", + "SHELL": "/bin/zsh" + }, + "stdout": [ + [ + 0.000155, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000133, + "\u001b[?1h\u001b=" + ], + [ + 0.000183, + "\u001b[?2004h" + ], + [ + 0.468833, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.413214, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.440799, + "\b\b\u001b[1m#\u001b[1m \u001b[1mH\u001b[0m\u001b[39m" + ], + [ + 0.155436, + "\b\u001b[1mH\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.153888, + "\b\u001b[1me\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.145046, + "\b\u001b[1mr\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.191005, + "\b\u001b[1me\u001b[1m you'll see some basic commands to start working with borg.\u001b[0m\u001b[39m" + ], + [ + 0.328571, + "\u001b[?1l\u001b>" + ], + [ + 0.000462, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000787, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 7.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 1.4e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.0003, + "\u001b[?2004h" + ], + [ + 0.553943, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.254153, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.205346, + "\b\b\u001b[1m#\u001b[1m \u001b[1mN\u001b[0m\u001b[39m" + ], + [ + 0.164037, + "\b\u001b[1mN\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.198817, + "\b\u001b[1mo\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.157487, + "\b\u001b[1mt\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.348855, + "\b\u001b[1me\u001b[1m:\u001b[0m\u001b[39m" + ], + [ + 0.308837, + "\b\u001b[1m:\u001b[1m This teaser screencast was made with borg version 1.1.0 – older or n\u001b[1me\u001b[1mwer borg versions may behave differently.\u001b[0m\u001b[39m\u001b[K" + ], + [ + 0.760183, + "\u001b[?1l\u001b>" + ], + [ + 0.001229, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001043, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000111, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000699, + "\u001b[?2004h" + ], + [ + 0.617302, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.269944, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.231147, + "\b\b\u001b[1m#\u001b[1m \u001b[1mB\u001b[0m\u001b[39m" + ], + [ + 0.157768, + "\b\u001b[1mB\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.145012, + "\b\u001b[1mu\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.360132, + "\b\u001b[1mt\u001b[1m let's start.\u001b[0m\u001b[39m" + ], + [ + 0.808076, + "\u001b[?1l\u001b>" + ], + [ + 0.000384, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001063, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.2e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000271, + "\u001b[?2004h" + ], + [ + 1.213811, + "\u001b[?1l\u001b>" + ], + [ + 0.000271, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001041, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 7e-06, + "\u001b]1;~/Pictures\u0007" + ], + [ + 4.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 5.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000167, + "\u001b[?2004h" + ], + [ + 0.326924, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.245919, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.173421, + "\b\b\u001b[1m#\u001b[1m \u001b[1mF\u001b[0m\u001b[39m" + ], + [ + 0.121947, + "\b\u001b[1mF\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.196316, + "\b\u001b[1mi\u001b[1mr\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.224037, + "\b\u001b[1ms\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.323925, + "\b\u001b[1mt\u001b[1m of all, you can always get help:\u001b[0m\u001b[39m" + ], + [ + 0.738987, + "\u001b[?1l\u001b>" + ], + [ + 0.000395, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000643, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000107, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 3.8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00031, + "\u001b[?2004h" + ], + [ + 1.268663, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.19562, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.100091, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.157538, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.196595, + " " + ], + [ + 0.210071, + "h" + ], + [ + 0.124892, + "e" + ], + [ + 0.177906, + "l" + ], + [ + 0.121006, + "p" + ], + [ + 0.314487, + "\u001b[?1l\u001b>" + ], + [ + 0.000695, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000777, + "\u001b]2;borg help\u0007\u001b]1;borg\u0007" + ], + [ + 0.538908, + "usage: borg [-V] [-h] [--critical] [--error] [--warning] [--info] [--debug]\r\n [--debug-topic TOPIC] [-p] [--log-json] [--lock-wait N]\r\n [--show-version] [--show-rc] [--no-files-cache] [--umask M]\r\n [--remote-path PATH] [--remote-ratelimit rate]\r\n [--consider-part-files] [--debug-profile FILE]\r\n ...\r\n\r\nBorg - Deduplicated Backups\r\n\r\noptional arguments:\r\n -V, --version show version number and exit\r\n\r\nCommon options:\r\n -h, --help show this help message and exit\r\n --critical work on log level CRITICAL\r\n --error work on log level ERROR\r\n --warning work on log level WARNING (default)\r\n --info, -v, --verbose\r\n work on log level INFO\r\n --debug enable debug output, work on log level DEBUG\r\n --debug-topic TOPIC enable TOPIC debugging (can be specified multiple\r\n times). The logger path is borg.debug. if TOPIC\r\n " + ], + [ + 4.1e-05, + " is not fully qualified.\r\n -p, --progress show progress information\r\n --log-json Output one JSON object per log line instead of\r\n formatted text.\r\n --lock-wait N wait for the lock, but max. N seconds (default: 1).\r\n --show-version show/log the borg version\r\n --show-rc show/log the return code (rc)\r\n --no-files-cache do not load/update the file metadata cache used to\r\n detect unchanged files\r\n --umask M set umask to M (local and remote, default: 0077)\r\n --remote-path PATH use PATH as borg executable on the remote (default:\r\n \"borg\")\r\n --remote-ratelimit rate\r\n set remote network upload rate limit in kiByte/s\r\n (default: 0=unlimited)\r\n --consider-part-files\r\n treat part files like normal files (e.g. to\r\n list/extract them)\r\n --debug-profile FILE Write execution profile" + ], + [ + 1.6e-05, + " in Borg format into FILE. For\r\n local use a Python-compatible file can be generated by\r\n suffixing FILE with \".pyprof\".\r\n\r\nrequired arguments:\r\n \r\n serve start repository server process\r\n init initialize empty repository\r\n check verify repository\r\n key manage repository key\r\n change-passphrase change repository passphrase\r\n create create backup\r\n extract extract archive contents\r\n export-tar create tarball from archive\r\n diff find differences in archive contents\r\n rename rename archive\r\n delete delete archive\r\n list list archive or repository contents\r\n mount mount repository\r\n umount umount repository\r\n info show repository or archive information\r\n break-lock break repository and cache locks\r\n prune " + ], + [ + 2e-05, + " prune archives\r\n upgrade upgrade repository format\r\n recreate Re-create archives\r\n with-lock run user command with lock held\r\n debug debugging command (not intended for normal use)\r\n benchmark benchmark command\r\n" + ], + [ + 0.043747, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 8e-06, + "\u001b]1;~/Pictures\u0007" + ], + [ + 5.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 4.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000163, + "\u001b[?2004h" + ], + [ + 1.509225, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.593308, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.334291, + "\b\b\u001b[1m#\u001b[1m \u001b[1mT\u001b[0m\u001b[39m" + ], + [ + 0.170683, + "\b\u001b[1mT\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.07295, + "\b\u001b[1mh\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.184509, + "\b\u001b[1me\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.136032, + "\b\u001b[1ms\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.250718, + "\b\u001b[1me\u001b[1m are a lot of commands, so better we start with a few:\u001b[0m\u001b[39m" + ], + [ + 1.088446, + "\u001b[?1l\u001b>" + ], + [ + 0.000396, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000604, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000101, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 4.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000296, + "\u001b[?2004h" + ], + [ + 0.921744, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.276219, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.204903, + "\b\b\u001b[1m#\u001b[1m \u001b[1mL\u001b[0m\u001b[39m" + ], + [ + 0.137064, + "\b\u001b[1mL\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.16386, + "\b\u001b[1me\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.340061, + "\b\u001b[1mt\u001b[1m'\u001b[0m\u001b[39m" + ], + [ + 0.115905, + "\b\u001b[1m'\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.213255, + "\b\u001b[1ms\u001b[1m create a repo on an external drive:\u001b[0m\u001b[39m" + ], + [ + 1.086717, + "\u001b[?1l\u001b>" + ], + [ + 0.000391, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000606, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000133, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000274, + "\u001b[?2004h" + ], + [ + 1.935612, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.184978, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.115803, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.134282, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.266061, + " " + ], + [ + 0.599046, + "i" + ], + [ + 0.183493, + "n" + ], + [ + 0.181453, + "i" + ], + [ + 0.258375, + "t" + ], + [ + 0.712329, + " " + ], + [ + 0.381053, + "" + ], + [ + 0.381053, + "-" + ], + [ + 0.119206, + "-" + ], + [ + 0.18993, + "e" + ], + [ + 0.175168, + "n" + ], + [ + 0.258977, + "c" + ], + [ + 0.139364, + "r" + ], + [ + 0.111012, + "y" + ], + [ + 0.55406, + "p" + ], + [ + 0.261667, + "t" + ], + [ + 0.284611, + "i" + ], + [ + 0.142087, + "o" + ], + [ + 0.195185, + "n" + ], + [ + 0.23882, + "=" + ], + [ + 0.31059, + "r" + ], + [ + 0.151355, + "e" + ], + [ + 0.165925, + "p" + ], + [ + 0.132833, + "o" + ], + [ + 0.253402, + "k" + ], + [ + 0.174711, + "e" + ], + [ + 0.245888, + "y" + ], + [ + 0.759586, + " " + ], + [ + 0.383355, + "\u001b[4m/\u001b[24m" + ], + [ + 0.189694, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.16364, + "\b\u001b[4mm\u001b[4me\u001b[24m" + ], + [ + 0.151451, + "\b\u001b[4me\u001b[4md\u001b[24m" + ], + [ + 0.239109, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.006487, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m init --encryption=repokey \u001b[4m/media\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.268216, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003429, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m init --encryption=repokey \u001b[4m/media/backup\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.232352, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003575, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m init --encryption=repokey \u001b[4m/media/backup/borgdemo\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.492094, + "\b\b\u001b[4mo\u001b[24m\u001b[0m\u001b[24m \b" + ], + [ + 0.748712, + "\u001b[?1l\u001b>" + ], + [ + 0.001017, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000712, + "\u001b]2;borg init --encryption=repokey /media/backup/borgdemo\u0007\u001b]1;borg\u0007" + ], + [ + 0.548105, + "Enter new passphrase: " + ], + [ + 2.119749, + "\r\n" + ], + [ + 0.000155, + "Enter same passphrase again: " + ], + [ + 1.606761, + "\r\n" + ], + [ + 5.8e-05, + "Do you want your passphrase to be displayed for verification? [yN]: " + ], + [ + 0.901237, + "\r\n" + ], + [ + 0.362453, + "\r\nBy default repositories initialized with this version will produce security\r\nerrors if written to with an older version (up to and including Borg 1.0.8).\r\n\r\nIf you want to use these older versions, you can disable the check by running:\r\nborg upgrade --disable-tam '/media/backup/borgdemo'\r\n\r\nSee https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability for details about the security implications.\r\n" + ], + [ + 0.050488, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 5e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 5.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000166, + "\u001b[?2004h" + ], + [ + 2.49308, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.308744, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.256774, + "\b\b\u001b[1m#\u001b[1m \u001b[1mT\u001b[0m\u001b[39m" + ], + [ + 0.157732, + "\b\u001b[1mT\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.127107, + "\b\u001b[1mh\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.178449, + "\b\u001b[1mi\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.179372, + "\b\u001b[1ms\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.383584, + "\b\u001b[1m \u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.103361, + "\b\u001b[1mu\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.155066, + "\b\u001b[1ms\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.133308, + "\b\u001b[1me\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.23615, + "\b\u001b[1ms\u001b[1m the repokey encryption. You may look at \"borg help init\" or the \u001b[1mo\u001b[1mnline doc at https://borgbackup.readthedocs.io/ for other modes.\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.159159, + "\u001b[?1l\u001b>" + ], + [ + 0.0004, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000738, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000111, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000286, + "\u001b[?2004h" + ], + [ + 1.645569, + "\u001b[?1l\u001b>" + ], + [ + 0.000452, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000619, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.0001, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000161, + "\u001b[?2004h" + ], + [ + 1.17234, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.575706, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.205759, + "\b\b\u001b[1m#\u001b[1m \u001b[1mS\u001b[0m\u001b[39m" + ], + [ + 0.343517, + "\b\u001b[1mS\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.245497, + "\b\u001b[1mo\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.218486, + "\b\u001b[1m \u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.171258, + "\b\u001b[1mn\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.146364, + "\b\u001b[1mo\u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.25775, + "\b\u001b[1mw\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.271708, + "\b\b\u001b[1mw\u001b[0m\u001b[39m\u001b[0m\u001b[39m \b" + ], + [ + 0.213838, + "\b\u001b[1mw\u001b[1m,\u001b[0m\u001b[39m" + ], + [ + 0.422324, + "\b\u001b[1m,\u001b[1m let's create our first (compressed) backup.\u001b[0m\u001b[39m" + ], + [ + 0.561514, + "\u001b[?1l\u001b>" + ], + [ + 0.000855, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000773, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 4.8e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.2e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000411, + "\u001b[?2004h" + ], + [ + 1.326196, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.191851, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.136657, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.142499, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.173217, + " " + ], + [ + 0.294445, + "c" + ], + [ + 0.200519, + "r" + ], + [ + 0.153078, + "e" + ], + [ + 0.133383, + "a" + ], + [ + 0.12891, + "t" + ], + [ + 0.151491, + "e" + ], + [ + 0.728709, + " " + ], + [ + 0.592118, + "-" + ], + [ + 0.118108, + "-" + ], + [ + 0.277349, + "s" + ], + [ + 0.134588, + "t" + ], + [ + 0.148057, + "a" + ], + [ + 0.090202, + "t" + ], + [ + 0.150971, + "s" + ], + [ + 0.307217, + " " + ], + [ + 0.481688, + "-" + ], + [ + 0.112243, + "-" + ], + [ + 0.234317, + "p" + ], + [ + 0.12453, + "r" + ], + [ + 0.116446, + "o" + ], + [ + 0.213657, + "g" + ], + [ + 0.12239, + "r" + ], + [ + 0.165156, + "e" + ], + [ + 0.256082, + "s" + ], + [ + 0.175158, + "s" + ], + [ + 0.302493, + " " + ], + [ + 0.490303, + "-" + ], + [ + 0.117279, + "-" + ], + [ + 0.130499, + "c" + ], + [ + 0.146261, + "o" + ], + [ + 0.139848, + "m" + ], + [ + 0.156108, + "p" + ], + [ + 0.190058, + "r" + ], + [ + 0.166862, + "e" + ], + [ + 0.261225, + "s" + ], + [ + 0.157133, + "s" + ], + [ + 0.281205, + "i" + ], + [ + 0.142487, + "o" + ], + [ + 0.179023, + "n" + ], + [ + 0.854723, + " " + ], + [ + 0.580178, + "l" + ], + [ + 0.29757, + "z" + ], + [ + 0.3111, + "4" + ], + [ + 1.085772, + " " + ], + [ + 0.635539, + "\u001b[4m/\u001b[24m" + ], + [ + 0.268857, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.121341, + "\b\u001b[4mm\u001b[4me\u001b[24m" + ], + [ + 0.141645, + "\b\u001b[4me\u001b[4md\u001b[24m" + ], + [ + 0.230858, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.010346, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m create --stats --progress --compression lz4 \u001b[4m/media\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.416084, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.004048, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m create --stats --progress --compression lz4 \u001b[4m/media/backup\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.346657, + "\u001b[?7l" + ], + [ + 2.7e-05, + "\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003996, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m create --stats --progress --compression lz4 \u001b[4m/media/backup/borgdemo\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 1.068791, + "\b\b\u001b[4mo\u001b[24m\u001b[0m\u001b[24m \b" + ], + [ + 1.210608, + "\u001b[22D\u001b[24m/\u001b[24mm\u001b[24me\u001b[24md\u001b[24mi\u001b[24ma\u001b[24m/\u001b[24mb\u001b[24ma\u001b[24mc\u001b[24mk\u001b[24mu\u001b[24mp\u001b[24m/\u001b[24mb\u001b[24mo\u001b[24mr\u001b[24mg\u001b[24md\u001b[24me\u001b[24mm\u001b[24mo:" + ], + [ + 0.125995, + ":" + ], + [ + 0.376036, + "b" + ], + [ + 0.101011, + "a" + ], + [ + 0.178171, + "c \r\u001b[K" + ], + [ + 0.133561, + "k" + ], + [ + 0.162923, + "\rku" + ], + [ + 0.241519, + "p" + ], + [ + 1.426974, + "1" + ], + [ + 0.432275, + " " + ], + [ + 0.295102, + "\u001b[4mW\u001b[24m" + ], + [ + 0.158768, + "\b\u001b[4mW\u001b[4ma\u001b[24m" + ], + [ + 0.270666, + "\b\u001b[4ma\u001b[4ml\u001b[24m" + ], + [ + 0.13015, + "\b\u001b[4ml\u001b[4ml\u001b[24m" + ], + [ + 0.267749, + "\b\u001b[4ml\u001b[4mp\u001b[24m" + ], + [ + 0.173461, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003997, + "\u001b[A\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ " + ], + [ + 3.5e-05, + "\u001b[32mborg\u001b[39m create --stats --progress --compression lz4 /media/backup/borgdemo::backup1 \u001b[4mWallpaper\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.997225, + "\b\b\u001b[4mr\u001b[24m\u001b[K" + ], + [ + 0.447022, + "\u001b[?1l\u001b>" + ], + [ + 0.002978, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000917, + "\u001b]2;borg create --stats --progress --compression lz4 Wallpaper\u0007\u001b]1;borg\u0007" + ], + [ + 0.630228, + "Enter passphrase for key /media/backup/borgdemo: " + ], + [ + 2.264647, + "\r\n" + ], + [ + 0.108689, + "0 B O 0 B C 0 B D 0 N Wallpaper \r" + ], + [ + 0.024194, + "Initializing cache transaction: Reading config \r" + ], + [ + 0.000234, + "Initializing cache transaction: Reading chunks \r" + ], + [ + 0.000225, + "Initializing cache transaction: Reading files \r" + ], + [ + 0.000215, + " \r" + ], + [ + 0.179719, + "5.21 MB O 5.21 MB C 5.21 MB D 8 N Wallpaper/bigcollec...em_Wasserloch_im_Q.jpg\r" + ], + [ + 0.206362, + "17.55 MB O 17.51 MB C 15.18 MB D 31 N Wallpaper/bigcoll...ckr_OpenGetty_Im.jpg\r" + ], + [ + 0.202173, + "28.63 MB O 28.59 MB C 26.26 MB D 49 N Wallpaper/bigcoll...ortugal____Stefa.jpg\r" + ], + [ + 0.201105, + "41.30 MB O 41.26 MB C 38.94 MB D 71 N Wallpaper/bigcoll...e_in_der_Gro_auf.jpg\r" + ], + [ + 0.205913, + "53.63 MB O 53.54 MB C 51.21 MB D 93 N Wallpaper/bigcoll...De_Janeiro__Bras.jpg\r" + ], + [ + 0.201657, + "66.10 MB O 65.99 MB C 63.66 MB D 115 N Wallpaper/bigcol...Kenai-Fjords-Nat.jpg\r" + ], + [ + 0.222663, + "78.06 MB O 77.92 MB C 75.59 MB D 135 N Wallpaper/bigcol...ien____Nora_De_A.jpg\r" + ], + [ + 0.206809, + "89.99 MB O 89.82 MB C 85.43 MB D 155 N Wallpaper/bigcol...__De_Pere__Wisco.jpg\r" + ], + [ + 0.204475, + "101.51 MB O 101.32 MB C 96.93 MB D 175 N Wallpaper/bigco..._Silver_Falls_S.jpg\r" + ], + [ + 0.206201, + "115.08 MB O 114.89 MB C 110.50 MB D 199 N Wallpaper/bigco..._Garret_Suhrie.jpg\r" + ], + [ + 0.202147, + "126.51 MB O 126.28 MB C 119.47 MB D 220 N Wallpaper/bigco...fenmesserfisch.jpg\r" + ], + [ + 0.206629, + "138.74 MB O 138.50 MB C 131.69 MB D 243 N Wallpaper/bigco...tswana____Mich.jpg\r" + ], + [ + 0.214855, + "152.84 MB O 152.60 MB C 142.74 MB D 269 N Wallpaper/bigco...fest__Munich__.jpg\r" + ], + [ + 0.200083, + "163.05 MB O 162.80 MB C 152.94 MB D 288 N Wallpaper/bigco..._Marco_RomaniG.jpg\r" + ], + [ + 0.208535, + "175.85 MB O 175.57 MB C 164.47 MB D 308 N Wallpaper/bigco...gway__Colorado.jpg\r" + ], + [ + 0.21234, + "184.65 MB O 184.36 MB C 173.25 MB D 324 N Wallpaper/bigco...nstanz__Baden-.jpg\r" + ], + [ + 0.200087, + "194.92 MB O 194.59 MB C 183.49 MB D 343 N Wallpaper/bigco...op__Caledon__P.jpg\r" + ], + [ + 0.201257, + "204.71 MB O 204.38 MB C 191.68 MB D 361 N Wallpaper/bigco...izian_in_Jamni.jpg\r" + ], + [ + 0.213355, + "217.22 MB O 216.88 MB C 202.98 MB D 382 N Wallpaper/bigco...appadokien__T_.jpg\r" + ], + [ + 0.202274, + "230.56 MB O 230.16 MB C 212.45 MB D 404 N Wallpaper/bigco...eleiGetty_Imag.jpg\r" + ], + [ + 0.204836, + "242.95 MB O 242.53 MB C 224.34 MB D 426 N Wallpaper/bigco...g__Thailand___.jpg\r" + ], + [ + 0.205093, + "254.42 MB O 254.02 MB C 232.75 MB D 446 N Wallpaper/bigco...ame_Reserve__O.jpg\r" + ], + [ + 0.201488, + "265.77 MB O 265.39 MB C 242.76 MB D 466 N Wallpaper/bigco...e_Republik____.jpg\r" + ], + [ + 0.20036, + "278.64 MB O 278.26 MB C 254.62 MB D 488 N Wallpaper/bigco...ien____Patty_P.jpg\r" + ], + [ + 0.209301, + "288.82 MB O 288.45 MB C 264.81 MB D 505 N Wallpaper/bigco...Ruhpolding__Ch.jpg\r" + ], + [ + 0.214561, + "298.04 MB O 297.68 MB C 274.04 MB D 520 N Wallpaper/bigco...wo__Landkreis_.jpg\r" + ], + [ + 0.222111, + "311.03 MB O 310.66 MB C 287.02 MB D 543 N Wallpaper/bigco...a__Portugal___.jpg\r" + ], + [ + 0.204945, + "319.53 MB O 319.17 MB C 295.53 MB D 558 N Wallpaper/bigco...hinos__Hondura.jpg\r" + ], + [ + 0.213928, + "328.19 MB O 327.77 MB C 304.13 MB D 574 N Wallpaper/bigco...ndon__Gro_brit.jpg\r" + ], + [ + 0.206827, + "338.25 MB O 337.81 MB C 314.17 MB D 591 N Wallpaper/bigco...l_Forest__Bund.jpg\r" + ], + [ + 0.209094, + "347.40 MB O 346.96 MB C 323.32 MB D 606 N Wallpaper/bigco...tlantischen_Oz.jpg\r" + ], + [ + 0.200671, + "361.16 MB O 360.71 MB C 334.04 MB D 628 N Wallpaper/bigco...lpark__British.jpg\r" + ], + [ + 0.208778, + "375.20 MB O 374.77 MB C 348.09 MB D 650 N Wallpaper/bigco...swagen_beim_Ro.jpg\r" + ], + [ + 0.2023, + "385.94 MB O 385.47 MB C 358.79 MB D 669 N Wallpaper/bigco...-Bessin__Frank.jpg\r" + ], + [ + 0.201448, + "396.55 MB O 396.10 MB C 368.89 MB D 687 N Wallpaper/bigco...nian_Switzerla.jpg\r" + ], + [ + 0.200229, + "411.96 MB O 411.41 MB C 373.94 MB D 711 N Wallpaper/bigco...CREATISTAGetty.jpg\r" + ], + [ + 0.202083, + "420.92 MB O 420.38 MB C 382.91 MB D 727 N Wallpaper/bigco...LLCCorbisVCG_G.jpg\r" + ], + [ + 0.202677, + "430.76 MB O 430.21 MB C 392.74 MB D 745 N Wallpaper/bigco...r__Tansania___.jpg\r" + ], + [ + 0.206733, + "441.45 MB O 440.87 MB C 400.76 MB D 763 N Wallpaper/bigco...andenburg__Deu.jpg\r" + ], + [ + 0.205541, + "449.42 MB O 448.83 MB C 408.72 MB D 776 N Wallpaper/bigco...Wind_Cave_Nati.jpg\r" + ], + [ + 0.201764, + "458.56 MB O 457.97 MB C 417.20 MB D 792 N Wallpaper/bigco...dney_Harbour_B.jpg\r" + ], + [ + 0.206272, + "470.73 MB O 470.08 MB C 428.74 MB D 815 N Wallpaper/bigco...hland____Patri.jpg\r" + ], + [ + 0.210875, + "485.80 MB O 485.15 MB C 443.01 MB D 843 N Wallpaper/bigco...Hokkaido__Japa.jpg\r" + ], + [ + 0.227162, + "498.93 MB O 498.27 MB C 450.80 MB D 867 N Wallpaper/bigco...topher_Collins.jpg\r" + ], + [ + 0.206293, + "510.73 MB O 510.07 MB C 462.15 MB D 887 N Wallpaper/bigco...itzeinschlag_i.jpg\r" + ], + [ + 0.200265, + "520.54 MB O 519.86 MB C 471.39 MB D 903 N Wallpaper/bigco..._zwischen_Boli.jpg\r" + ], + [ + 0.204067, + "528.01 MB O 527.33 MB C 478.86 MB D 916 N Wallpaper/bigco...jall__Island__.jpg\r" + ], + [ + 0.209223, + "539.61 MB O 538.94 MB C 490.47 MB D 934 N Wallpaper/bigco..._amares__Provi.jpg\r" + ], + [ + 0.215843, + "551.16 MB O 550.49 MB C 501.50 MB D 952 N Wallpaper/bigco...tionalpark__Ut.jpg\r" + ], + [ + 0.212909, + "561.29 MB O 560.60 MB C 511.22 MB D 970 N Wallpaper/bigco..._Inseln__Niede.jpg\r" + ], + [ + 0.209655, + "571.59 MB O 570.86 MB C 520.92 MB D 989 N Wallpaper/bigco...rbeskopf__Huns.jpg\r" + ], + [ + 0.232431, + "582.52 MB O 581.80 MB C 525.99 MB D 1006 N Wallpaper/bigc...n__an_art_in 2.jpg\r" + ], + [ + 0.201199, + "593.36 MB O 592.12 MB C 536.31 MB D 1036 N Wallpaper/more/Green_Curves.jpg \r" + ], + [ + 0.205747, + "604.80 MB O 603.52 MB C 547.71 MB D 1044 N Wallpaper/evenmore/ChipDE 06.jpg \r" + ], + [ + 0.23016, + "Compacting segments 0% \r" + ], + [ + 0.174726, + "Compacting segments 50% \r" + ], + [ + 4.5e-05, + " \r" + ], + [ + 0.04695, + "Saving files cache \r" + ], + [ + 0.005688, + "Saving chunks cache \r" + ], + [ + 0.000299, + "Saving cache config \r" + ], + [ + 0.107527, + " \r" + ], + [ + 3.7e-05, + " \r" + ], + [ + 0.000355, + "------------------------------------------------------------------------------\r\n" + ], + [ + 3.7e-05, + "Archive name: backup1\r\n" + ], + [ + 1.4e-05, + "Archive fingerprint: 9758c7db339a066360bffad17b2ffac4fb368c6722c0be3a47a7a9b631f06407\r\n" + ], + [ + 0.000106, + "Time (start): Fri, 2017-07-14 21:54:06\r\nTime (end): Fri, 2017-07-14 21:54:17\r\n" + ], + [ + 3.9e-05, + "Duration: 11.40 seconds\r\n" + ], + [ + 3.4e-05, + "Number of files: 1050\r\n" + ], + [ + 7.2e-05, + "Utilization of maximum supported archive size: 0%\r\n------------------------------------------------------------------------------\r\n" + ], + [ + 4.7e-05, + " Original size Compressed size Deduplicated size\r\n" + ], + [ + 1.1e-05, + "This archive: 618.96 MB 617.47 MB 561.67 MB\r\n" + ], + [ + 2.7e-05, + "All archives: 618.96 MB 617.47 MB 561.67 MB\r\n" + ], + [ + 2.4e-05, + "\r\n" + ], + [ + 2.3e-05, + " Unique chunks Total chunks\r\n" + ], + [ + 1.3e-05, + "Chunk index: 999 1093\r\n" + ], + [ + 2.4e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 0.04885, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000195, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.4e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000196, + "\u001b[?2004h" + ], + [ + 1.403148, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.918581, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.361872, + "\b\b\u001b[1m#\u001b[1m \u001b[1mT\u001b[0m\u001b[39m" + ], + [ + 0.12148, + "\b\u001b[1mT\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.21559, + "\b\u001b[1mh\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.152309, + "\b\u001b[1ma\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.941741, + "\b\u001b[1mt\u001b[1m's nice, so far.\u001b[0m\u001b[39m" + ], + [ + 1.005262, + "\u001b[?1l\u001b>" + ], + [ + 0.00039, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001061, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 8.3e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.1e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000287, + "\u001b[?2004h" + ], + [ + 2.564637, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.34769, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.160447, + "\b\b\u001b[1m#\u001b[1m \u001b[1mS\u001b[0m\u001b[39m" + ], + [ + 0.153165, + "\b\u001b[1mS\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.17514, + "\b\u001b[1mo\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.198658, + "\b\u001b[1m \u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.204631, + "\b\u001b[1ml\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.250815, + "\b\u001b[1me\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 1.190059, + "\b\u001b[1mt\u001b[1m's add a new file…\u001b[0m\u001b[39m" + ], + [ + 1.216941, + "\u001b[?1l\u001b>" + ], + [ + 0.000401, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000756, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 2.1e-05, + "\u001b]1;~/Pictures\u0007" + ], + [ + 8.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 5.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000277, + "\u001b[?2004h" + ], + [ + 1.289557, + "\u001b[1m\u001b[31me\u001b[0m\u001b[39m" + ], + [ + 0.216875, + "\b\u001b[1m\u001b[31me\u001b[1m\u001b[31mc\u001b[0m\u001b[39m" + ], + [ + 0.184187, + "\b\b\u001b[1m\u001b[31me\u001b[1m\u001b[31mc\u001b[1m\u001b[31mh\u001b[0m\u001b[39m" + ], + [ + 0.177444, + "\b\b\b\u001b[0m\u001b[32me\u001b[0m\u001b[32mc\u001b[0m\u001b[32mh\u001b[32mo\u001b[39m" + ], + [ + 0.226152, + " " + ], + [ + 0.320216, + "\u001b[33m\"\u001b[39m" + ], + [ + 0.404454, + "\b\u001b[33m\"\u001b[33ma\u001b[39m" + ], + [ + 0.267657, + "\b\u001b[33ma\u001b[33md\u001b[39m" + ], + [ + 0.130258, + "\b\u001b[33md\u001b[33md\u001b[39m" + ], + [ + 1.613237, + "\b\u001b[33md\u001b[33me\u001b[39m" + ], + [ + 0.175381, + "\b\u001b[33me\u001b[33md\u001b[39m" + ], + [ + 0.404248, + "\b\u001b[33md\u001b[33m \u001b[39m" + ], + [ + 0.669276, + "\b\u001b[33m \u001b[33mn\u001b[39m" + ], + [ + 0.128663, + "\b\u001b[33mn\u001b[33me\u001b[39m" + ], + [ + 0.132483, + "\b\u001b[33me\u001b[33mw\u001b[39m" + ], + [ + 0.175823, + "\b\u001b[33mw\u001b[33m \u001b[39m" + ], + [ + 0.220023, + "\b\u001b[33m \u001b[33mn\u001b[39m" + ], + [ + 0.156931, + "\b\u001b[33mn\u001b[33mi\u001b[39m" + ], + [ + 0.10604, + "\b\u001b[33mi\u001b[33mc\u001b[39m" + ], + [ + 0.166585, + "\b\u001b[33mc\u001b[33me\u001b[39m" + ], + [ + 0.306911, + "\b\u001b[33me\u001b[33m \u001b[39m" + ], + [ + 0.228895, + "\b\u001b[33m \u001b[33mf\u001b[39m" + ], + [ + 0.160772, + "\b\u001b[33mf\u001b[33mi\u001b[39m" + ], + [ + 0.144448, + "\b\u001b[33mi\u001b[33ml\u001b[39m" + ], + [ + 0.125193, + "\b\u001b[33ml\u001b[33me\u001b[39m" + ], + [ + 0.828758, + "\b\u001b[33me\u001b[33m\"\u001b[39m" + ], + [ + 0.566156, + " " + ], + [ + 0.349791, + ">" + ], + [ + 0.577663, + " " + ], + [ + 0.28936, + "\u001b[4mW\u001b[24m" + ], + [ + 0.157708, + "\b\u001b[4mW\u001b[4ma\u001b[24m" + ], + [ + 0.226616, + "\b\u001b[4ma\u001b[4ml\u001b[24m" + ], + [ + 0.106124, + "\b\u001b[4ml\u001b[4ml\u001b[24m" + ], + [ + 0.099397, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.00361, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mecho\u001b[39m \u001b[33m\"added new nice file\"\u001b[39m > \u001b[4mWallpaper\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.822747, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003743, + "\r\r\u001b[40C\u001b[0m\u001b[4m/\u001b[24m" + ], + [ + 0.00018, + "\r\r\n\u001b[J" + ], + [ + 5.1e-05, + "\u001b[38;5;33m2048example\u001b[0m/ \u001b[38;5;13mdeer.jpg\u001b[0m \u001b[38;5;33mmore\u001b[0m/ \r\n\u001b[J\u001b[38;5;33mbigcollection\u001b[0m/ \u001b[J\u001b[38;5;33mevenmore\u001b[0m/ \u001b[J \u001b[A\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mecho\u001b[39m \u001b[33m\"added new nice file\"\u001b[39m > \u001b[4mWallpaper/\u001b[24m\u001b[K" + ], + [ + 1.173525, + "\u001b[10D\u001b[24mW\u001b[24ma\u001b[24ml\u001b[24ml\u001b[24mp\u001b[24ma\u001b[24mp\u001b[24me\u001b[24mr\u001b[24m/n" + ], + [ + 0.118482, + "e" + ], + [ + 0.130187, + "w" + ], + [ + 0.499912, + "f" + ], + [ + 0.161863, + "i" + ], + [ + 0.13679, + "l" + ], + [ + 0.093681, + "e" + ], + [ + 0.261183, + "." + ], + [ + 0.312651, + "t" + ], + [ + 0.10665, + "x" + ], + [ + 0.131562, + "t" + ], + [ + 0.79879, + "\u001b[?1l\u001b>" + ], + [ + 0.001397, + "\u001b[?2004l\r\r\n\u001b[J" + ], + [ + 0.000679, + "\u001b]2;echo \"added new nice file\" > Wallpaper/newfile.txt\u0007\u001b]1;echo\u0007" + ], + [ + 0.000151, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 5.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000183, + "\u001b[?2004h" + ], + [ + 2.785656, + "\u001b[32mecho\u001b[39m \u001b[33m\"added new nice file\"\u001b[39m > \u001b[4mWallpaper/newfile.txt\u001b[24m" + ], + [ + 0.206019, + "\u001b[50D\u001b[1m#\u001b[1m \u001b[1mS\u001b[1mo\u001b[1m \u001b[1ml\u001b[1me\u001b[1mt\u001b[1m'\u001b[1ms\u001b[1m \u001b[1ma\u001b[1md\u001b[1md\u001b[1m \u001b[1ma\u001b[1m \u001b[1mn\u001b[1me\u001b[1mw\u001b[1m \u001b[1mf\u001b[1mi\u001b[1ml\u001b[1me\u001b[1m…\u001b[0m\u001b[39m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24m \u001b[24D" + ], + [ + 0.251309, + "\u001b[24D\u001b[1mT\u001b[1mh\u001b[1ma\u001b[1mt\u001b[1m'\u001b[1ms\u001b[1m \u001b[1mn\u001b[1mi\u001b[1mc\u001b[1me\u001b[1m,\u001b[1m \u001b[1ms\u001b[1mo\u001b[1m \u001b[1mf\u001b[1ma\u001b[1mr\u001b[1m.\u001b[0m\u001b[39m\u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \b\b\b\b" + ], + [ + 0.372268, + "\u001b[22D\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[0m\u001b[32mg\u001b[39m\u001b[0m\u001b[39m \u001b[0m\u001b[39mc\u001b[0m\u001b[39mr\u001b[0m\u001b[39me\u001b[0m\u001b[39ma\u001b[0m\u001b[39mt\u001b[0m\u001b[39me\u001b[0m\u001b[39m \u001b[0m\u001b[39m-\u001b[0m\u001b[39m-\u001b[0m\u001b[39ms\u001b[0m\u001b[39mt\u001b[0m\u001b[39ma\u001b[0m\u001b[39mt\u001b[0m\u001b[39ms\u001b[0m\u001b[39m \u001b[0m\u001b[39m-\u001b[0m\u001b[39m-progress --compression lz4 /media/backup/borgdemo::backup1 \u001b[4mWallpaper\u001b[24m\u001b[K" + ], + [ + 0.686798, + "\b" + ], + [ + 0.49974, + "\b" + ], + [ + 0.029256, + "\b" + ], + [ + 0.030383, + "\b" + ], + [ + 0.030965, + "\b" + ], + [ + 0.02928, + "\b" + ], + [ + 0.030139, + "\b" + ], + [ + 0.029254, + "\b" + ], + [ + 0.03083, + "\b" + ], + [ + 0.030284, + "\b" + ], + [ + 0.030187, + "\b" + ], + [ + 0.030317, + "\b" + ], + [ + 0.439014, + "\u001b[1C" + ], + [ + 0.357869, + "\u001b[P\u001b[10C \u001b[11D" + ], + [ + 0.141225, + "2\u001b[24m \u001b[4mW\u001b[4ma\u001b[4ml\u001b[4ml\u001b[4mp\u001b[4ma\u001b[4mp\u001b[4me\u001b[4mr\u001b[24m\u001b[10D" + ], + [ + 0.615794, + "\u001b[?1l\u001b>" + ], + [ + 0.001653, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000779, + "\u001b]2;borg create --stats --progress --compression lz4 Wallpaper\u0007\u001b]1;borg\u0007" + ], + [ + 0.627474, + "Enter passphrase for key /media/backup/borgdemo: " + ], + [ + 3.666123, + "\r\n" + ], + [ + 0.128711, + "0 B O 0 B C 0 B D 0 N Wallpaper \r" + ], + [ + 0.006399, + "Initializing cache transaction: Reading config \r" + ], + [ + 0.000208, + "Initializing cache transaction: Reading chunks \r" + ], + [ + 0.000253, + "Initializing cache transaction: Reading files \r" + ], + [ + 0.000269, + " \r" + ], + [ + 0.247567, + "584.80 MB O 584.09 MB C 65 B D 1011 N Wallpaper/newfile.txt \r" + ], + [ + 0.264517, + "Compacting segments 0% \r" + ], + [ + 0.000942, + "Compacting segments 50% \r" + ], + [ + 4e-05, + " \r" + ], + [ + 0.0606, + "Saving files cache \r" + ], + [ + 0.005405, + "Saving chunks cache \r" + ], + [ + 0.000411, + "Saving cache config \r" + ], + [ + 0.079766, + " \r" + ], + [ + 4.7e-05, + " \r" + ], + [ + 0.000375, + "------------------------------------------------------------------------------\r\n" + ], + [ + 2.4e-05, + "Archive name: backup2\r\n" + ], + [ + 2.7e-05, + "Archive fingerprint: 5aaf03d1c710cf774f9c9ff1c6317b621c14e519c6bac459f6d64b31e3bbd200\r\n" + ], + [ + 0.000102, + "Time (start): Fri, 2017-07-14 21:54:56\r\n" + ], + [ + 2.1e-05, + "Time (end): Fri, 2017-07-14 21:54:56\r\nDuration: 0.33 seconds\r\n" + ], + [ + 7.4e-05, + "Number of files: 1051\r\n" + ], + [ + 8.3e-05, + "Utilization of maximum supported archive size: 0%\r\n------------------------------------------------------------------------------\r\n" + ], + [ + 7e-06, + " Original size Compressed size Deduplicated size\r\n" + ], + [ + 2.8e-05, + "This archive: 618.96 MB 617.47 MB 106.70 kB\r\n" + ], + [ + 2.2e-05, + "All archives: 1.24 GB 1.23 GB 561.77 MB\r\n" + ], + [ + 5.3e-05, + "\r\n" + ], + [ + 7e-06, + " Unique chunks Total chunks\r\n" + ], + [ + 2.2e-05, + "Chunk index: 1002 2187\r\n" + ], + [ + 2.3e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 0.046167, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 8.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000212, + "\u001b[?2004h" + ], + [ + 1.922718, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.225243, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.166756, + "\b\b\u001b[1m#\u001b[1m \u001b[1mW\u001b[0m\u001b[39m" + ], + [ + 0.162323, + "\b\u001b[1mW\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.097757, + "\b\u001b[1mo\u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.265877, + "\b\u001b[1mw\u001b[1m, this was a lot faster!\u001b[0m\u001b[39m" + ], + [ + 0.789811, + "\u001b[?1l\u001b>" + ], + [ + 0.000392, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000754, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 7.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.2e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000273, + "\u001b[?2004h" + ], + [ + 1.15181, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.234049, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.209548, + "\b\b\u001b[1m#\u001b[1m \u001b[1mN\u001b[0m\u001b[39m" + ], + [ + 0.168421, + "\b\u001b[1mN\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.232312, + "\b\u001b[1mo\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.201133, + "\b\u001b[1mt\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.338758, + "\b\u001b[1mi\u001b[1mce the \"Deduplicated size\" in \"This archive\"?\u001b[0m\u001b[39m" + ], + [ + 2.236964, + "\u001b[?1l\u001b>" + ], + [ + 0.000951, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001084, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 9.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 9.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000505, + "\u001b[?2004h" + ], + [ + 2.51909, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.240091, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.216793, + "\b\b\u001b[1m#\u001b[1m \u001b[1mB\u001b[0m\u001b[39m" + ], + [ + 0.192027, + "\b\u001b[1mB\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.138706, + "\b\u001b[1mo\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.129501, + "\b\u001b[1mr\u001b[1mg\u001b[0m\u001b[39m" + ], + [ + 0.536844, + "\b\u001b[1mg\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.143314, + "\b\u001b[1m \u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.138384, + "\b\u001b[1mr\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.197658, + "\b\u001b[1me\u001b[1mcognized that most files did not change and deduplicated them.\u001b[0m\u001b[39m" + ], + [ + 1.432604, + "\u001b[?1l\u001b>" + ], + [ + 0.000397, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00069, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 6.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.1e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000319, + "\u001b[?2004h" + ], + [ + 1.153873, + "\u001b[?1l\u001b>" + ], + [ + 0.000537, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000623, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000101, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000108, + "\u001b[?1h\u001b=" + ], + [ + 0.000309, + "\u001b[?2004h" + ], + [ + 0.447325, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.257975, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.210602, + "\b\b\u001b[1m#\u001b[1m \u001b[1mB\u001b[0m\u001b[39m" + ], + [ + 0.182148, + "\b\u001b[1mB\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.159923, + "\b\u001b[1mu\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.165905, + "\b\u001b[1mt\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.175925, + "\b\u001b[1m \u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.116184, + "\b\u001b[1mw\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.125029, + "\b\u001b[1mh\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.110311, + "\b\u001b[1ma\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.26718, + "\b\u001b[1mt\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.393846, + "\b\u001b[1m \u001b[1mhappens, when we move a dir and create a new backup?\u001b[0m\u001b[39m" + ], + [ + 1.840157, + "\u001b[?1l\u001b>" + ], + [ + 0.000398, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000678, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000105, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000102, + "\u001b[?1h\u001b=" + ], + [ + 0.000242, + "\u001b[?2004h" + ], + [ + 1.044202, + "\u001b[1m\u001b[31mm\u001b[0m\u001b[39m" + ], + [ + 0.167573, + "\b\u001b[0m\u001b[32mm\u001b[32mv\u001b[39m" + ], + [ + 0.203794, + " " + ], + [ + 0.199502, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.002962, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mmv\u001b[39m \u001b[4mWallpaper\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.399299, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.004451, + "\r\r\u001b[14C\u001b[0m\u001b[4m/\u001b[24m" + ], + [ + 0.000168, + "\r\r\n\u001b[J" + ], + [ + 3.2e-05, + "\u001b[38;5;33m2048example\u001b[0m/ \u001b[38;5;13mdeer.jpg\u001b[0m \u001b[38;5;33mmore\u001b[0m/ \r\n\u001b[J\u001b[38;5;33mbigcollection\u001b[0m/ \u001b[J\u001b[38;5;33mevenmore\u001b[0m/ \u001b[Jnewfile.txt \u001b[J \u001b[A\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mmv\u001b[39m \u001b[4mWallpaper/\u001b[24m\u001b[K" + ], + [ + 0.416097, + "\u001b[?7l" + ], + [ + 1.3e-05, + "\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.002339, + "\u001b[10D\u001b[24mW\u001b[24ma\u001b[24ml\u001b[24ml\u001b[24mp\u001b[24ma\u001b[24mp\u001b[24me\u001b[24mr\u001b[24m/2048example\u001b[1m/\u001b[0m" + ], + [ + 0.000184, + "\r\r\n" + ], + [ + 0.000156, + "\u001b[7m2048example/ \u001b[0m \u001b[38;5;13mdeer.jpg\u001b[0m \u001b[38;5;33mmore\u001b[0m/ \u001b[K\r\n\u001b[J\u001b[38;5;33mbigcollection\u001b[0m/ \u001b[J\u001b[38;5;33mevenmore\u001b[0m/ \u001b[Jnewfile.txt \u001b[J \u001b[A\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mmv\u001b[39m Wallpaper/2048example\u001b[1m/\u001b[0m\u001b[K" + ], + [ + 0.23342, + "\r\r\n" + ], + [ + 1.4e-05, + "\u001b[7m2048example/ \u001b[0m \r\u001b[7m2048example/ \u001b[0m \r\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mmv\u001b[39m Wallpaper/2048example\u001b[1m/\u001b[0m\u001b[K\u001b[12Dbigcollecti\u001b[0mon\u001b[1m/\u001b[0m" + ], + [ + 0.000154, + "\r\r\n" + ], + [ + 2.5e-05, + "\u001b[38;5;33m2048example\u001b[0m/ \r\u001b[1B\u001b[7mbigcollection/\u001b[0m \r\u001b[A\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mmv\u001b[39m Wallpaper/bigcollection\u001b[1m/\u001b[0m\u001b[K" + ], + [ + 0.378809, + "\r\r\n\u001b[J\u001b[A\u001b[29C" + ], + [ + 0.002159, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mmv\u001b[39m \u001b[4mWallpaper/bigcollection\u001b[24m\u001b[K\u001b[1C" + ], + [ + 0.35586, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.007824, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mmv\u001b[39m \u001b[4mWallpaper/bigcollection\u001b[24m \u001b[4mWallpaper\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.248908, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.002608, + "\r\r\u001b[38C\u001b[0m\u001b[4m/\u001b[24m" + ], + [ + 0.000171, + "\r\r\n\u001b[J" + ], + [ + 5.4e-05, + "\u001b[38;5;33m2048example\u001b[0m/ \u001b[38;5;13mdeer.jpg\u001b[0m \u001b[38;5;33mmore\u001b[0m/ \r\n\u001b[J\u001b[38;5;33mbigcollection\u001b[0m/ \u001b[J\u001b[38;5;33mevenmore\u001b[0m/ \u001b[Jnewfile.txt \u001b[J \u001b[A\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mmv\u001b[39m \u001b[4mWallpaper/bigcollection\u001b[24m \u001b[4mWallpaper/\u001b[24m\u001b[K" + ], + [ + 0.248788, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.004567, + "\u001b[10D\u001b[24mW\u001b[24ma\u001b[24ml\u001b[24ml\u001b[24mp\u001b[24ma\u001b[24mp\u001b[24me\u001b[24mr\u001b[24m/2048example\u001b[1m/\u001b[0m" + ], + [ + 0.000182, + "\r\r\n" + ], + [ + 9.1e-05, + "\u001b[7m2048example/ \u001b[0m \u001b[38;5;13mdeer.jpg\u001b[0m \u001b[38;5;33mmore\u001b[0m/ \u001b[K\r\n\u001b[J\u001b[38;5;33mbigcollection\u001b[0m/ \u001b[J\u001b[38;5;33mevenmore\u001b[0m/ \u001b[Jnewfile.txt \u001b[J \u001b[A\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mmv\u001b[39m \u001b[4mWallpaper/bigcollection\u001b[24m Wallpaper/2048example\u001b[1m/\u001b[0m\u001b[K" + ], + [ + 0.24704, + "\r\r\n" + ], + [ + 3.2e-05, + "\u001b[7m2048example/ \u001b[0m \r\u001b[7m2048example/ \u001b[0m \r\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mmv\u001b[39m \u001b[4mWallpaper/bigcollection\u001b[24m Wallpaper/2048example\u001b[1m/\u001b[0m\u001b[K\u001b[12Dbigcollecti\u001b[0mon\u001b[1m/\u001b[0m" + ], + [ + 0.000389, + "\r\r\n" + ], + [ + 3e-05, + "\u001b[38;5;33m2048example\u001b[0m/ \r\u001b[1B\u001b[7mbigcollection/\u001b[0m \r\u001b[A\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mmv\u001b[39m \u001b[4mWallpaper/bigcollection\u001b[24m Wallpaper/bigcollection\u001b[1m/\u001b[0m\u001b[K" + ], + [ + 0.595335, + "\r\r\n\u001b[J\u001b[A\u001b[53C" + ], + [ + 0.003755, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mmv\u001b[39m \u001b[4mWallpaper/bigcollection\u001b[24m \u001b[4mWallpaper/bigcollection\u001b[24m\u001b[K\u001b[1C" + ], + [ + 0.271014, + "\b" + ], + [ + 0.554135, + "\u001b[23D\u001b[24mW\u001b[24ma\u001b[24ml\u001b[24ml\u001b[24mp\u001b[24ma\u001b[24mp\u001b[24me\u001b[24mr\u001b[24m/\u001b[24mb\u001b[24mi\u001b[24mg\u001b[24mc\u001b[24mo\u001b[24ml\u001b[24ml\u001b[24me\u001b[24mc\u001b[24mt\u001b[24mi\u001b[24mo\u001b[24mn_" + ], + [ + 0.317529, + "N" + ], + [ + 0.104435, + "E" + ], + [ + 0.175308, + "W" + ], + [ + 0.956051, + "\u001b[?1l\u001b>" + ], + [ + 0.001192, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000754, + "\u001b]2;mv -i Wallpaper/bigcollection Wallpaper/bigcollection_NEW\u0007\u001b]1;mv\u0007" + ], + [ + 0.001182, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 9.8e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.7e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000188, + "\u001b[?2004h" + ], + [ + 1.857261, + "\u001b[32mmv\u001b[39m Wallpaper/bigcollection \u001b[4mWallpaper/bigcollection_NEW\u001b[24m" + ], + [ + 0.208181, + "\u001b[54D\u001b[1m#\u001b[1m \u001b[1mB\u001b[1mu\u001b[1mt\u001b[1m \u001b[1mw\u001b[1mh\u001b[1ma\u001b[1mt\u001b[1m \u001b[1mh\u001b[1ma\u001b[1mp\u001b[1mp\u001b[1me\u001b[1mn\u001b[1ms\u001b[1m,\u001b[1m \u001b[1mw\u001b[1mh\u001b[1me\u001b[1mn\u001b[1m \u001b[1mw\u001b[1me\u001b[24m\u001b[1m \u001b[24m\u001b[1mm\u001b[24m\u001b[1mo\u001b[24m\u001b[1mv\u001b[24m\u001b[1me\u001b[24m\u001b[1m \u001b[24m\u001b[1ma\u001b[24m\u001b[1m \u001b[24m\u001b[1md\u001b[24m\u001b[1mi\u001b[24m\u001b[1mr\u001b[24m\u001b[1m \u001b[24m\u001b[1ma\u001b[24m\u001b[1mn\u001b[24m\u001b[1md\u001b[24m\u001b[1m \u001b[24m\u001b[1mc\u001b[24m\u001b[1mr\u001b[24m\u001b[1me\u001b[24m\u001b[1ma\u001b[24m\u001b[1mt\u001b[24m\u001b[1me\u001b[24m\u001b[1m \u001b[24m\u001b[1ma\u001b[24m\u001b[1m \u001b[24m\u001b[1mn\u001b[24m\u001b[1me\u001b[1mw backup?\u001b[0m\u001b[39m" + ], + [ + 0.2399, + "\u001b[60D\u001b[1mo\u001b[1mr\u001b[1mg\u001b[1m \u001b[1mr\u001b[1me\u001b[1mc\u001b[1mo\u001b[1mg\u001b[1mn\u001b[1mi\u001b[1mz\u001b[1me\u001b[1md\u001b[1m \u001b[1mt\u001b[1mh\u001b[1ma\u001b[1mt\u001b[1m \u001b[1mm\u001b[1mo\u001b[1ms\u001b[1mt\u001b[1m \u001b[1mf\u001b[1mi\u001b[1ml\u001b[1me\u001b[1ms\u001b[1m \u001b[1md\u001b[1mi\u001b[1md\u001b[1m \u001b[1mn\u001b[1mo\u001b[1mt\u001b[1m \u001b[1mc\u001b[1mh\u001b[1ma\u001b[1mn\u001b[1mg\u001b[1me\u001b[1m \u001b[1ma\u001b[1mn\u001b[1md\u001b[1m \u001b[1md\u001b[1me\u001b[1md\u001b[1mu\u001b[1mp\u001b[1ml\u001b[1mi\u001b[1mc\u001b[1ma\u001b[1mt\u001b[1med them.\u001b[0m\u001b[39m" + ], + [ + 0.227963, + "\u001b[69D\u001b[1mN\u001b[1mo\u001b[1mt\u001b[1mi\u001b[1mc\u001b[1me\u001b[1m \u001b[1mt\u001b[1mh\u001b[1me\u001b[1m \u001b[1m\"\u001b[1mD\u001b[2C\u001b[0m\u001b[39m\u001b[39P\u001b[10C\u001b[1ms\u001b[1mi\u001b[1mz\u001b[1me\u001b[1m\"\u001b[1m in \"This archive\"?\u001b[0m\u001b[39m \u001b[20D" + ], + [ + 0.344233, + "\u001b[49D\u001b[1mW\u001b[1mo\u001b[1mw\u001b[1m,\u001b[1m \u001b[1mt\u001b[1mh\u001b[1mi\u001b[1ms\u001b[1m \u001b[1mw\u001b[1ma\u001b[1ms\u001b[1m \u001b[1ma\u001b[1m \u001b[1ml\u001b[1mo\u001b[1mt\u001b[1m \u001b[1mf\u001b[1ma\u001b[1ms\u001b[1mt\u001b[1me\u001b[1mr\u001b[1m!\u001b[0m\u001b[39m\u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[39m \u001b[22D" + ], + [ + 0.396096, + "\u001b[29D\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[0m\u001b[32mg\u001b[39m\u001b[0m\u001b[39m \u001b[0m\u001b[39mc\u001b[0m\u001b[39mr\u001b[0m\u001b[39me\u001b[0m\u001b[39ma\u001b[0m\u001b[39mt\u001b[0m\u001b[39me\u001b[0m\u001b[39m \u001b[0m\u001b[39m-\u001b[0m\u001b[39m-\u001b[0m\u001b[39ms\u001b[0m\u001b[39mt\u001b[0m\u001b[39ma\u001b[0m\u001b[39mt\u001b[0m\u001b[39ms\u001b[0m\u001b[39m \u001b[0m\u001b[39m-\u001b[0m\u001b[39m-\u001b[0m\u001b[39mp\u001b[0m\u001b[39mr\u001b[0m\u001b[39mo\u001b[0m\u001b[39mg\u001b[0m\u001b[39mr\u001b[0m\u001b[39me\u001b[0m\u001b[39mss --compression lz4 /media/backup/borgdemo::backup2 \u001b[4mWallpaper\u001b[24m\u001b[K" + ], + [ + 0.854343, + "\b" + ], + [ + 0.192067, + "\b" + ], + [ + 0.161921, + "\b" + ], + [ + 0.152949, + "\b" + ], + [ + 0.158914, + "\b" + ], + [ + 0.150013, + "\b" + ], + [ + 0.168061, + "\b" + ], + [ + 0.170964, + "\b" + ], + [ + 0.156237, + "\b" + ], + [ + 0.161813, + "\b" + ], + [ + 0.698972, + "\b\u001b[P\u001b[10C \u001b[11D" + ], + [ + 0.185005, + "3\u001b[24m \u001b[4mW\u001b[4ma\u001b[4ml\u001b[4ml\u001b[4mp\u001b[4ma\u001b[4mp\u001b[4me\u001b[4mr\u001b[24m\u001b[10D" + ], + [ + 0.670037, + "\u001b[?1l\u001b>" + ], + [ + 0.002029, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000793, + "\u001b]2;borg create --stats --progress --compression lz4 Wallpaper\u0007\u001b]1;borg\u0007" + ], + [ + 0.621587, + "Enter passphrase for key /media/backup/borgdemo: " + ], + [ + 5.034162, + "\r\n" + ], + [ + 0.136527, + "0 B O 0 B C 0 B D 0 N Wallpaper \r" + ], + [ + 0.028491, + "Initializing cache transaction: Reading config \r" + ], + [ + 0.000245, + "Initializing cache transaction: Reading chunks \r" + ], + [ + 0.000278, + "Initializing cache transaction: Reading files \r" + ], + [ + 0.000296, + " \r" + ], + [ + 0.173817, + "10.07 MB O 10.04 MB C 0 B D 17 N Wallpaper/bigcollec...rland__England____A.jpg\r" + ], + [ + 0.20311, + "29.10 MB O 29.05 MB C 0 B D 50 N Wallpaper/bigcollec...Creek_Redwoods_Stat.jpg\r" + ], + [ + 0.202422, + "47.67 MB O 47.62 MB C 0 B D 83 N Wallpaper/bigcollec...rson-Wildschutzgebi.jpg\r" + ], + [ + 0.216811, + "64.30 MB O 64.19 MB C 0 B D 112 N Wallpaper/bigcollec..._Planten_un_Blomen.jpg\r" + ], + [ + 0.214409, + "80.89 MB O 80.75 MB C 0 B D 140 N Wallpaper/bigcollec...g__Cologne__German.jpg\r" + ], + [ + 0.202244, + "100.45 MB O 100.26 MB C 0 B D 173 N Wallpaper/bigcolle..._Menorca__Spanien.jpg\r" + ], + [ + 0.202027, + "116.80 MB O 116.61 MB C 0 B D 202 N Wallpaper/bigcolle...artenkirchen__Bay.jpg\r" + ], + [ + 0.202003, + "130.38 MB O 130.15 MB C 0 B D 227 N Wallpaper/bigcolle..._zur_Felsenkirche.jpg\r" + ], + [ + 0.234918, + "143.32 MB O 143.09 MB C 0 B D 251 N Wallpaper/bigcolle...land__Antarktis__.jpg\r" + ], + [ + 0.204976, + "156.31 MB O 156.07 MB C 0 B D 275 N Wallpaper/bigcolle...-Stadion__Rio_de_.jpg\r" + ], + [ + 0.205408, + "173.36 MB O 173.09 MB C 0 B D 304 N Wallpaper/bigcolle...lpark__Alaska__US.jpg\r" + ], + [ + 0.221776, + "183.65 MB O 183.35 MB C 0 B D 322 N Wallpaper/bigcolle...lmeer____Pasquale.jpg\r" + ], + [ + 0.201052, + "195.95 MB O 195.63 MB C 0 B D 345 N Wallpaper/bigcolle...Schutzgebiet_Mary.jpg\r" + ], + [ + 0.240687, + "217.22 MB O 216.88 MB C 0 B D 382 N Wallpaper/bigcolle...__Kappadokien__T_.jpg\r" + ], + [ + 0.20767, + "233.09 MB O 232.68 MB C 0 B D 409 N Wallpaper/bigcolle...epublic_ImagesShu.jpg\r" + ], + [ + 0.210433, + "250.21 MB O 249.81 MB C 0 B D 439 N Wallpaper/bigcolle...ter__Pr_fektur_Fu.jpg\r" + ], + [ + 0.200954, + "268.90 MB O 268.51 MB C 0 B D 472 N Wallpaper/bigcolle...uth_Carolina__USA.jpg\r" + ], + [ + 0.212828, + "286.72 MB O 286.35 MB C 0 B D 502 N Wallpaper/bigcolle...l_Park__Cobham__E.jpg\r" + ], + [ + 0.206527, + "296.84 MB O 296.47 MB C 0 B D 518 N Wallpaper/bigcolle...entAlamy______Bin.jpg\r" + ], + [ + 0.205003, + "310.38 MB O 310.00 MB C 0 B D 542 N Wallpaper/bigcolle...ationalpark__Flor.jpg\r" + ], + [ + 0.209538, + "320.38 MB O 320.03 MB C 0 B D 559 N Wallpaper/bigcolle...ma__Bahamas____Ji.jpg\r" + ], + [ + 0.201896, + "331.76 MB O 331.35 MB C 0 B D 580 N Wallpaper/bigcolle...rd_Bay__Eyre-Halb.jpg\r" + ], + [ + 0.207585, + "347.40 MB O 346.96 MB C 0 B D 606 N Wallpaper/bigcolle...s_Atlantischen_Oz.jpg\r" + ], + [ + 0.200781, + "369.05 MB O 368.62 MB C 0 B D 640 N Wallpaper/bigcolle...ankreich____John_.jpg\r" + ], + [ + 0.202326, + "379.22 MB O 378.78 MB C 0 B D 657 N Wallpaper/bigcolle...chtanemone__Insel.jpg\r" + ], + [ + 0.211929, + "389.83 MB O 389.36 MB C 0 B D 676 N Wallpaper/bigcolle...ugal____Mikael_Sv.jpg\r" + ], + [ + 0.219553, + "402.12 MB O 401.68 MB C 0 B D 695 N Wallpaper/bigcolle...rk_Sarek__Schwede.jpg\r" + ], + [ + 0.20375, + "416.03 MB O 415.48 MB C 0 B D 718 N Wallpaper/bigcolle...em_taubenetzten_G.jpg\r" + ], + [ + 0.201474, + "428.93 MB O 428.38 MB C 0 B D 742 N Wallpaper/bigcolle...Francisco_Bay__Ka.jpg\r" + ], + [ + 0.200248, + "437.92 MB O 437.35 MB C 0 B D 756 N Wallpaper/bigcolle..._der_N_he_von_Tro.jpg\r" + ], + [ + 0.215254, + "446.04 MB O 445.46 MB C 0 B D 770 N Wallpaper/bigcolle...enver__Colorado__.jpg\r" + ], + [ + 0.202133, + "455.95 MB O 455.36 MB C 0 B D 787 N Wallpaper/bigcolle..._Son_Doong-H_hle_.jpg\r" + ], + [ + 0.208499, + "471.36 MB O 470.71 MB C 0 B D 816 N Wallpaper/bigcolle...ly_National_Monum.jpg\r" + ], + [ + 0.205116, + "491.46 MB O 490.81 MB C 0 B D 853 N Wallpaper/bigcolle...ted_during_the_ 1.jpg\r" + ], + [ + 0.220215, + "510.73 MB O 510.07 MB C 0 B D 887 N Wallpaper/bigcolle..._Blitzeinschlag_i.jpg\r" + ], + [ + 0.201825, + "522.32 MB O 521.65 MB C 0 B D 906 N Wallpaper/bigcolle...vador__Santiago__.jpg\r" + ], + [ + 0.202937, + "534.02 MB O 533.34 MB C 0 B D 925 N Wallpaper/bigcolle...doah_National_Par.jpg\r" + ], + [ + 0.202635, + "550.50 MB O 549.83 MB C 0 B D 951 N Wallpaper/bigcolle...liffs_National_Mo.jpg\r" + ], + [ + 0.202296, + "564.18 MB O 563.47 MB C 0 B D 976 N Wallpaper/bigcolle...n_in_Aktion____Va.jpg\r" + ], + [ + 0.203791, + "576.43 MB O 575.71 MB C 0 B D 996 N Wallpaper/bigcolle...______WRIGHTSuper.jpg\r" + ], + [ + 0.439796, + "Compacting segments 0% \r" + ], + [ + 0.000919, + "Compacting segments 50% \r" + ], + [ + 3.7e-05, + " \r" + ], + [ + 0.040817, + "Saving files cache \r" + ], + [ + 0.010023, + "Saving chunks cache \r" + ], + [ + 0.000278, + "Saving cache config \r" + ], + [ + 0.093829, + " \r" + ], + [ + 1.6e-05, + " \r" + ], + [ + 0.000308, + "------------------------------------------------------------------------------\r\n" + ], + [ + 9e-06, + "Archive name: backup3\r\n" + ], + [ + 3.8e-05, + "Archive fingerprint: 36cd8fdf9b8b2e3bbb3fc2bb600acd48609efaf3a0880f900e0701a47ff69d4d\r\n" + ], + [ + 2e-05, + "Time (start): Fri, 2017-07-14 21:55:37\r\n" + ], + [ + 2.4e-05, + "Time (end): Fri, 2017-07-14 21:55:46\r\n" + ], + [ + 2.2e-05, + "Duration: 8.58 seconds\r\n" + ], + [ + 2.6e-05, + "Number of files: 1051\r\n" + ], + [ + 2.6e-05, + "Utilization of maximum supported archive size: 0%\r\n" + ], + [ + 2.1e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 2.6e-05, + " Original size Compressed size Deduplicated size\r\n" + ], + [ + 2.4e-05, + "This archive: 618.96 MB 617.47 MB 107.55 kB\r\n" + ], + [ + 2.1e-05, + "All archives: 1.86 GB 1.85 GB 561.88 MB\r\n" + ], + [ + 2.5e-05, + "\r\n" + ], + [ + 3.9e-05, + " Unique chunks Total chunks\r\n" + ], + [ + 1.1e-05, + "Chunk index: 1006 3283\r\n" + ], + [ + 4.8e-05, + "------------------------------------------------------------------------------\r\n" + ], + [ + 0.048607, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 1.9e-05, + "\u001b]1;~/Pictures\u0007" + ], + [ + 7.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.2e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00017, + "\u001b[?2004h" + ], + [ + 1.509372, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.261334, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.25826, + "\b\b\u001b[1m#\u001b[1m \u001b[1mS\u001b[0m\u001b[39m" + ], + [ + 0.162616, + "\b\u001b[1mS\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.27891, + "\b\u001b[1mt\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.174723, + "\b\u001b[1mi\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.124142, + "\b\u001b[1ml\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 1.012371, + "\b\u001b[1ml\u001b[1m quite fast…\u001b[0m\u001b[39m" + ], + [ + 0.74493, + "\u001b[?1l\u001b>" + ], + [ + 0.000416, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000686, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 6e-06, + "\u001b]1;~/Pictures\u0007" + ], + [ + 8.8e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000111, + "\u001b[?1h\u001b=" + ], + [ + 0.000271, + "\u001b[?2004h" + ], + [ + 2.038818, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.861519, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 2.235116, + "\b\b\u001b[1m#\u001b[1m \u001b[1mB\u001b[0m\u001b[39m" + ], + [ + 0.20981, + "\b\u001b[1mB\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.216676, + "\b\u001b[1mu\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.72822, + "\b\u001b[1mt\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 1.094756, + "\b\u001b[1m \u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.315528, + "\b\u001b[1mw\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.23713, + "\b\u001b[1mh\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.286805, + "\b\u001b[1me\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.638764, + "\b\u001b[1mn\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.81778, + "\b\u001b[1m \u001b[1my\u001b[0m\u001b[39m" + ], + [ + 0.245269, + "\b\u001b[1my\u001b[1mou look at the \"deduplicated file size\" again, you see that borg\u001b[1m \u001b[1malso recognized that only the dir and not the files changed in this backup.\u001b[0m\u001b[39m\u001b[K" + ], + [ + 2.34618, + "\u001b[?1l\u001b>" + ], + [ + 0.000453, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000631, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.00011, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000113, + "\u001b[?1h\u001b=" + ], + [ + 0.000262, + "\u001b[?2004h" + ], + [ + 3.418707, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.275819, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.2004, + "\b\b\u001b[1m#\u001b[1m \u001b[1mN\u001b[0m\u001b[39m" + ], + [ + 0.172829, + "\b\u001b[1mN\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.308378, + "\b\u001b[1mo\u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.703684, + "\b\u001b[1mw\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.8183, + "\b\u001b[1m \u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.193322, + "\b\u001b[1ml\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.18438, + "\b\u001b[1me\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.389996, + "\b\u001b[1mt\u001b[1m's look into a repo.\u001b[0m\u001b[39m" + ], + [ + 0.857879, + "\u001b[?1l\u001b>" + ], + [ + 0.000349, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000564, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 2.9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000246, + "\u001b[?2004h" + ], + [ + 1.60039, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.177554, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.117613, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.12982, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.145309, + " " + ], + [ + 0.256078, + "l" + ], + [ + 0.145029, + "i" + ], + [ + 0.100415, + "s" + ], + [ + 0.137667, + "t" + ], + [ + 0.172051, + " " + ], + [ + 0.490083, + "\u001b[4m/\u001b[24m" + ], + [ + 0.190449, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.216676, + "\b\u001b[4mm\u001b[4me\u001b[24m" + ], + [ + 0.174909, + "\b\u001b[4me\u001b[4md\u001b[24m" + ], + [ + 0.242368, + "\u001b[?7l\u001b[31m......\u001b[39m" + ], + [ + 3.2e-05, + "\u001b[?7h" + ], + [ + 0.00599, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m list \u001b[4m/media\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.345758, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003294, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m list \u001b[4m/media/backup\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.253376, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003389, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m list \u001b[4m/media/backup/borgdemo\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 1.036958, + "\b\b\u001b[4mo\u001b[24m\u001b[0m\u001b[24m \b" + ], + [ + 2.6e-05, + "\u001b[?1l\u001b>" + ], + [ + 0.000854, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000717, + "\u001b]2;borg list /media/backup/borgdemo\u0007\u001b]1;borg\u0007" + ], + [ + 0.624291, + "Enter passphrase for key /media/backup/borgdemo: " + ], + [ + 2.363577, + "\r\n" + ], + [ + 0.158203, + "backup1 Fri, 2017-07-14 21:54:06 [9758c7db339a066360bffad17b2ffac4fb368c6722c0be3a47a7a9b631f06407]\r\nbackup2 Fri, 2017-07-14 21:54:56 [5aaf03d1c710cf774f9c9ff1c6317b621c14e519c6bac459f6d64b31e3bbd200]\r\nbackup3 Fri, 2017-07-14 21:55:37 [36cd8fdf9b8b2e3bbb3fc2bb600acd48609efaf3a0880f900e0701a47ff69d4d]\r\n" + ], + [ + 0.044143, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 5.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.4e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000207, + "\u001b[?2004h" + ], + [ + 5.582312, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.371134, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.184918, + "\b\b\u001b[1m#\u001b[1m \u001b[1mY\u001b[0m\u001b[39m" + ], + [ + 0.177123, + "\b\u001b[1mY\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.148041, + "\b\u001b[1mo\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.461676, + "\b\u001b[1mu\u001b[1m'\u001b[0m\u001b[39m" + ], + [ + 0.668888, + "\b\u001b[1m'\u001b[1mll see a list of all backups.\u001b[0m\u001b[39m" + ], + [ + 0.876235, + "\u001b[?1l\u001b>" + ], + [ + 0.000363, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001075, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 8.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000297, + "\u001b[?2004h" + ], + [ + 2.475491, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.382591, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.23474, + "\b\b\u001b[1m#\u001b[1m \u001b[1mY\u001b[0m\u001b[39m" + ], + [ + 0.210269, + "\b\u001b[1mY\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.196151, + "\b\u001b[1mo\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.460253, + "\b\u001b[1mu\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.305764, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.184098, + "\b\u001b[1mc\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.212534, + "\b\u001b[1ma\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.305097, + "\b\u001b[1mn\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.163485, + "\b\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.194803, + "\b\u001b[1ma\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.282791, + "\b\u001b[1ml\u001b[1mso use the same command to look into an archive. But we better f\u001b[1mi\u001b[1mlter the output here:\u001b[0m\u001b[39m\u001b[K" + ], + [ + 2.679252, + "\u001b[?1l\u001b>" + ], + [ + 0.000434, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000646, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 3e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000107, + "\u001b[?1h\u001b=" + ], + [ + 0.000302, + "\u001b[?2004h" + ], + [ + 1.162094, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.184756, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.114887, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.143983, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.230507, + " " + ], + [ + 0.414382, + "l" + ], + [ + 0.153591, + "i" + ], + [ + 0.044178, + "s" + ], + [ + 0.236299, + "t" + ], + [ + 0.330148, + " " + ], + [ + 0.70018, + "\u001b[4m/\u001b[24m" + ], + [ + 0.193582, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.172118, + "\b\u001b[4mm\u001b[4me\u001b[24m" + ], + [ + 0.134283, + "\b\u001b[4me\u001b[4md\u001b[24m" + ], + [ + 0.250757, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.006227, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m list \u001b[4m/media\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.374078, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003992, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m list \u001b[4m/media/backup\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.2609, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003434, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m list \u001b[4m/media/backup/borgdemo\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.237963, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003371, + "\r\r\u001b[34C\u001b[0m\u001b[4m/\u001b[24m" + ], + [ + 0.000178, + "\r\r\n\u001b[J" + ], + [ + 4.2e-05, + "\u001b[0mREADME \u001b[38;5;33mdata\u001b[0m/ index.14 nonce \r\n\u001b[Jconfig \u001b[Jhints.14 \u001b[Jintegrity.14 \u001b[J \u001b[A\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mborg\u001b[39m list \u001b[4m/media/backup/borgdemo/\u001b[24m\u001b[K" + ], + [ + 0.833604, + "\b\b\u001b[4mo\u001b[24m\u001b[24m \b" + ], + [ + 1.042199, + "\u001b[22D\u001b[24m/\u001b[24mm\u001b[24me\u001b[24md\u001b[24mi\u001b[24ma\u001b[24m/\u001b[24mb\u001b[24ma\u001b[24mc\u001b[24mk\u001b[24mu\u001b[24mp\u001b[24m/\u001b[24mb\u001b[24mo\u001b[24mr\u001b[24mg\u001b[24md\u001b[24me\u001b[24mm\u001b[24mo:" + ], + [ + 0.139477, + ":" + ], + [ + 0.711096, + "b" + ], + [ + 0.099664, + "a" + ], + [ + 0.149912, + "c" + ], + [ + 0.16888, + "k" + ], + [ + 0.923931, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.005451, + "\r\r\r\r\n\u001b[J\u001b[A\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m list /media/backup/borgdemo::back\u001b[K" + ], + [ + 0.885297, + "u" + ], + [ + 0.29853, + "p" + ], + [ + 0.456244, + "3" + ], + [ + 1.061844, + " " + ], + [ + 0.589511, + "|" + ], + [ + 0.527539, + " " + ], + [ + 0.343662, + "\u001b[32mg\u001b[39m" + ], + [ + 0.117117, + "\b\u001b[32mg\u001b[32mr\u001b[39m" + ], + [ + 0.124331, + "\b\b\u001b[1m\u001b[31mg\u001b[1m\u001b[31mr\u001b[1m\u001b[31me\u001b[0m\u001b[39m" + ], + [ + 0.726149, + "\b\b\b\u001b[0m\u001b[32mg\u001b[0m\u001b[32mr\u001b[0m\u001b[32me\u001b[32mp\u001b[39m" + ], + [ + 0.198601, + " " + ], + [ + 0.476336, + "\u001b[33m'\u001b[39m" + ], + [ + 0.392009, + "\b\u001b[33m'\u001b[33md\u001b[39m" + ], + [ + 0.627529, + "\b\u001b[33md\u001b[33me\u001b[39m" + ], + [ + 0.142332, + "\b\u001b[33me\u001b[33me\u001b[39m" + ], + [ + 0.322681, + "\b\u001b[33me\u001b[33mr\u001b[39m" + ], + [ + 0.916328, + "\b\u001b[33mr\u001b[33m.\u001b[39m" + ], + [ + 0.50653, + "\b\u001b[33m.\u001b[33mj\u001b[39m" + ], + [ + 0.242318, + "\b\u001b[33mj\u001b[33mp\u001b[39m" + ], + [ + 0.272214, + "\b\u001b[33mp\u001b[33mg\u001b[39m" + ], + [ + 0.581098, + "\b\u001b[33mg\u001b[33m'\u001b[39m" + ], + [ + 2.559186, + "\u001b[?1l\u001b>" + ], + [ + 0.001382, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000773, + "\u001b]2;borg list /media/backup/borgdemo::backup3 | grep --color 'deer.jpg'\u0007\u001b]1;borg\u0007" + ], + [ + 0.628501, + "Enter passphrase for key /media/backup/borgdemo: " + ], + [ + 2.584332, + "\r\n" + ], + [ + 0.141205, + "-rw-rw-r-- rugk rugk 3781749 Fri, 2017-07-14 17:01:45 Wallpaper/\u001b[01;31m\u001b[Kdeer.jpg\u001b[m\u001b[K\r\n" + ], + [ + 0.054041, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000135, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 2.7e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00017, + "\u001b[?2004h" + ], + [ + 2.222435, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.269828, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.211035, + "\b\b\u001b[1m#\u001b[1m \u001b[1mO\u001b[0m\u001b[39m" + ], + [ + 0.184712, + "\b\u001b[1mO\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.374912, + "\b\u001b[1mh\u001b[1m, we found our picture. Now extract it:\u001b[0m\u001b[39m" + ], + [ + 1.545747, + "\u001b[?1l\u001b>" + ], + [ + 0.000418, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00063, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 3.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000135, + "\u001b[?1h\u001b=" + ], + [ + 0.000463, + "\u001b[?2004h" + ], + [ + 1.638625, + "\u001b[1m\u001b[31mm\u001b[0m\u001b[39m" + ], + [ + 0.156977, + "\b\u001b[0m\u001b[32mm\u001b[32mv\u001b[39m" + ], + [ + 0.220013, + " " + ], + [ + 0.151118, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.002944, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mmv\u001b[39m \u001b[4mWallpaper\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.668654, + "\b\b\u001b[4mr\u001b[24m\u001b[0m\u001b[24m " + ], + [ + 0.297169, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.005693, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mmv\u001b[39m \u001b[4mWallpaper\u001b[24m \u001b[4mWallpaper\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.672973, + "\b\b\u001b[4mr\u001b[24m\u001b[0m\u001b[24m \b" + ], + [ + 0.263416, + "\u001b[9D\u001b[24mW\u001b[24ma\u001b[24ml\u001b[24ml\u001b[24mp\u001b[24ma\u001b[24mp\u001b[24me\u001b[24mr." + ], + [ + 0.334671, + "o" + ], + [ + 0.19768, + "r" + ], + [ + 0.142283, + "i" + ], + [ + 0.17833, + "g" + ], + [ + 0.688576, + "\u001b[?1l\u001b>" + ], + [ + 0.001806, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000954, + "\u001b]2;mv -i Wallpaper Wallpaper.orig\u0007\u001b]1;mv\u0007" + ], + [ + 0.002076, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 5.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000153, + "\u001b[?2004h" + ], + [ + 1.864942, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.18048, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.143872, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.161829, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.170439, + " " + ], + [ + 0.248909, + "c" + ], + [ + 0.365319, + "\b \b" + ], + [ + 0.142233, + "e" + ], + [ + 0.157272, + "x" + ], + [ + 0.166861, + "t" + ], + [ + 0.115114, + "r" + ], + [ + 0.103674, + "a" + ], + [ + 0.102162, + "c" + ], + [ + 0.163264, + "t" + ], + [ + 0.308166, + " " + ], + [ + 1.386497, + "\u001b[4m/\u001b[24m" + ], + [ + 0.183134, + "\b\u001b[4m/\u001b[4mm\u001b[24m" + ], + [ + 0.115533, + "\b\u001b[4mm\u001b[4me\u001b[24m" + ], + [ + 0.12416, + "\b\u001b[4me\u001b[4md\u001b[24m" + ], + [ + 0.206989, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003179, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m extract \u001b[4m/media\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.241808, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003324, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m extract \u001b[4m/media/backup\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.193552, + "\u001b[?7l" + ], + [ + 2.6e-05, + "\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003368, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mborg\u001b[39m extract \u001b[4m/media/backup/borgdemo\u001b[1m\u001b[4m/\u001b[0m\u001b[24m\u001b[K" + ], + [ + 0.700774, + "\b\b\u001b[4mo\u001b[24m\u001b[0m\u001b[24m \b" + ], + [ + 1.151074, + "\u001b[22D\u001b[24m/\u001b[24mm\u001b[24me\u001b[24md\u001b[24mi\u001b[24ma\u001b[24m/\u001b[24mb\u001b[24ma\u001b[24mc\u001b[24mk\u001b[24mu\u001b[24mp\u001b[24m/\u001b[24mb\u001b[24mo\u001b[24mr\u001b[24mg\u001b[24md\u001b[24me\u001b[24mm\u001b[24mo:" + ], + [ + 0.146222, + ":" + ], + [ + 0.579644, + "b" + ], + [ + 0.102789, + "a" + ], + [ + 0.178851, + "c" + ], + [ + 0.133936, + "k" + ], + [ + 0.124089, + "u" + ], + [ + 0.229823, + "p" + ], + [ + 0.174738, + "3" + ], + [ + 0.306821, + " " + ], + [ + 4.287483, + "\u001b[45D\u001b[39mb\u001b[39mo\u001b[39mr\u001b[39mg\u001b[41C\u001b[7mWallpaper/deer.jpg\u001b[27m" + ], + [ + 1.718396, + "\u001b[63D\u001b[32mb\u001b[32mo\u001b[32mr\u001b[32mg\u001b[39m\u001b[41C\u001b[27mW\u001b[27ma\u001b[27ml\u001b[27ml\u001b[27mp\u001b[27ma\u001b[27mp\u001b[27me\u001b[27mr\u001b[27m/\u001b[27md\u001b[27me\u001b[27me\u001b[27mr\u001b[27m.\u001b[27mj\u001b[27mp\u001b[27mg" + ], + [ + 6.4e-05, + "\u001b[?1l\u001b>" + ], + [ + 0.001749, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000991, + "\u001b]2;borg extract /media/backup/borgdemo::backup3 Wallpaper/deer.jpg\u0007\u001b]1;borg\u0007" + ], + [ + 0.633044, + "Enter passphrase for key /media/backup/borgdemo: " + ], + [ + 2.659432, + "\r\n" + ], + [ + 0.198939, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000134, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 7.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000169, + "\u001b[?2004h" + ], + [ + 4.506682, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.287992, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.13604, + "\b\b\u001b[1m#\u001b[1m \u001b[1mA\u001b[0m\u001b[39m" + ], + [ + 0.132241, + "\b\u001b[1mA\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.115152, + "\b\u001b[1mn\u001b[1md\u001b[0m\u001b[39m" + ], + [ + 0.190449, + "\b\u001b[1md\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.168765, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.248816, + "\b\u001b[1mc\u001b[1mheck that it's the same:\u001b[0m\u001b[39m" + ], + [ + 1.093037, + "\u001b[?1l\u001b>" + ], + [ + 0.000401, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000745, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 7.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.0001, + "\u001b[?1h\u001b=" + ], + [ + 0.000321, + "\u001b[?2004h" + ], + [ + 1.350298, + "\u001b[32md\u001b[39m" + ], + [ + 0.181769, + "\b\u001b[1m\u001b[31md\u001b[1m\u001b[31mi\u001b[0m\u001b[39m" + ], + [ + 0.148155, + "\b\b\u001b[1m\u001b[31md\u001b[1m\u001b[31mi\u001b[1m\u001b[31mf\u001b[0m\u001b[39m" + ], + [ + 0.13874, + "\b\b\b\u001b[0m\u001b[32md\u001b[0m\u001b[32mi\u001b[0m\u001b[32mf\u001b[32mf\u001b[39m" + ], + [ + 0.321772, + " " + ], + [ + 0.410311, + "-" + ], + [ + 0.160707, + "s" + ], + [ + 0.223167, + " " + ], + [ + 0.856546, + "\u001b[4mW\u001b[24m" + ], + [ + 0.184551, + "\b\u001b[4mW\u001b[4ma\u001b[24m" + ], + [ + 0.211734, + "\b\u001b[4ma\u001b[4ml\u001b[24m" + ], + [ + 0.115481, + "\b\u001b[4ml\u001b[4ml\u001b[24m" + ], + [ + 0.13804, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.007132, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper\u001b[24m\u001b[K" + ], + [ + 0.620064, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.004082, + "\r\r\u001b[19C" + ], + [ + 0.000148, + "\r\r\n\u001b[J\u001b[J\u001b[38;5;33mWallpaper\u001b[0m/ \u001b[J\u001b[38;5;33mWallpaper.orig\u001b[0m/\u001b[J\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper\u001b[24m\u001b[K" + ], + [ + 0.83944, + "\u001b[?7l" + ], + [ + 2.4e-05, + "\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.003487, + "\u001b[9D\u001b[24mW\u001b[24ma\u001b[24ml\u001b[24ml\u001b[24mp\u001b[24ma\u001b[24mp\u001b[24me\u001b[24mr\u001b[1m/\u001b[0m" + ], + [ + 0.000166, + "\r\r\n\u001b[J\u001b[7mWallpaper/ \u001b[0m \u001b[J\u001b[38;5;33mWallpaper.orig\u001b[0m/\u001b[J\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mdiff\u001b[39m -s Wallpaper\u001b[1m/\u001b[0m\u001b[K" + ], + [ + 0.488495, + "\r\r\n" + ], + [ + 1.6e-05, + "\u001b[7mWallpaper/ \u001b[0m \r\u001b[7mWallpaper/ \u001b[0m \r\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mdiff\u001b[39m -s Wallpaper\u001b[1m/\u001b[0m\u001b[K\r\r\n\u001b[J\u001b[A\u001b[20C" + ], + [ + 0.001959, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper\u001b[24m\u001b[K\u001b[1C" + ], + [ + 0.285593, + "\b" + ], + [ + 0.303988, + "\b\u001b[4mr\u001b[4m/\u001b[24m" + ], + [ + 0.798187, + "\b\u001b[4m/\u001b[4md\u001b[24m" + ], + [ + 0.241007, + "\b\u001b[4md\u001b[4me\u001b[24m" + ], + [ + 0.21286, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.00579, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper/deer.jpg\u001b[24m\u001b[1m \u001b[0m\u001b[K" + ], + [ + 1.289271, + "\b\u001b[0m \u001b[4mW\u001b[24m" + ], + [ + 0.148557, + "\b\u001b[4mW\u001b[4ma\u001b[24m" + ], + [ + 0.16621, + "\b\u001b[4ma\u001b[4ml\u001b[24m" + ], + [ + 0.097599, + "\b\u001b[4ml\u001b[4ml\u001b[24m" + ], + [ + 0.111176, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.005059, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper/deer.jpg\u001b[24m \u001b[4mWallpaper\u001b[24m\u001b[K" + ], + [ + 0.431538, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.005176, + "\r\r\u001b[38C" + ], + [ + 0.000155, + "\r\r\n\u001b[J\u001b[J\u001b[38;5;33mWallpaper\u001b[0m/ \u001b[J\u001b[38;5;33mWallpaper.orig\u001b[0m/\u001b[J\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper/deer.jpg\u001b[24m \u001b[4mWallpaper\u001b[24m\u001b[K" + ], + [ + 0.389092, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.004561, + "\u001b[9D\u001b[24mW\u001b[24ma\u001b[24ml\u001b[24ml\u001b[24mp\u001b[24ma\u001b[24mp\u001b[24me\u001b[24mr\u001b[1m/\u001b[0m" + ], + [ + 0.000155, + "\r\r\n\u001b[J\u001b[7mWallpaper/ \u001b[0m \u001b[J\u001b[38;5;33mWallpaper.orig\u001b[0m/\u001b[J\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C" + ], + [ + 1.3e-05, + "\u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper/deer.jpg\u001b[24m Wallpaper\u001b[1m/\u001b[0m\u001b[K" + ], + [ + 0.260844, + "\r\r\n" + ], + [ + 3.6e-05, + "\u001b[7mWallpaper/ \u001b[0m \r\u001b[7mWallpaper/ \u001b[0m \r\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper/deer.jpg\u001b[24m Wallpaper\u001b[1m/\u001b[0m\u001b[K\b\u001b[0m.orig\u001b[1m/\u001b[0m" + ], + [ + 0.000163, + "\r\r\n\u001b[17C\u001b[7mWallpaper.orig/\u001b[0m\r\u001b[38;5;33mWallpaper\u001b[0m/ \r\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper/deer.jpg\u001b[24m Wallpaper.orig\u001b[1m/\u001b[0m\u001b[K" + ], + [ + 0.598634, + "\r\r\n\u001b[J\u001b[A\u001b[44C" + ], + [ + 0.002461, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper/deer.jpg\u001b[24m \u001b[4mWallpaper.orig\u001b[24m\u001b[K\u001b[1C" + ], + [ + 0.275896, + "\b" + ], + [ + 0.321512, + "\b\u001b[4mg\u001b[4m/\u001b[24m" + ], + [ + 1.499007, + "\b\u001b[4m/\u001b[4md\u001b[24m" + ], + [ + 0.165243, + "\b\u001b[4md\u001b[4me\u001b[24m" + ], + [ + 0.260397, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.005274, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mdiff\u001b[39m -s \u001b[4mWallpaper/deer.jpg\u001b[24m \u001b[4mWallpaper.orig/deer.jpg\u001b[24m\u001b[1m \u001b[0m\u001b[K" + ], + [ + 1.658125, + "\b\u001b[0m \b" + ], + [ + 1.5e-05, + "\u001b[?1l\u001b>" + ], + [ + 0.001138, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000783, + "\u001b]2;diff -s Wallpaper/deer.jpg Wallpaper.orig/deer.jpg\u0007\u001b]1;diff\u0007" + ], + [ + 0.057035, + "Files Wallpaper/deer.jpg and Wallpaper.orig/deer.jpg are identical\r\n" + ], + [ + 0.000183, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000114, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 9.1e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000199, + "\u001b[?2004h" + ], + [ + 3.579542, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.624347, + "\b\u001b[0m\u001b[39m \b" + ], + [ + 0.353186, + "\u001b[?1l\u001b>" + ], + [ + 0.000351, + "\u001b[?2004l\r\r\n" + ], + [ + 0.0006, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 2.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.00013, + "\u001b[?1h\u001b=" + ], + [ + 0.000185, + "\u001b[?2004h" + ], + [ + 0.726522, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.358332, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.183839, + "\b\b\u001b[1m#\u001b[1m \u001b[1mA\u001b[0m\u001b[39m" + ], + [ + 0.150451, + "\b\u001b[1mA\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.128839, + "\b\u001b[1mn\u001b[1md\u001b[0m\u001b[39m" + ], + [ + 0.583652, + "\b\u001b[1md\u001b[1m,\u001b[0m\u001b[39m" + ], + [ + 0.152149, + "\b\u001b[1m,\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.240696, + "\b\u001b[1m \u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.130032, + "\b\u001b[1mo\u001b[1mf\u001b[0m\u001b[39m" + ], + [ + 0.306901, + "\b\u001b[1mf\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.181176, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.271007, + "\b\u001b[1mc\u001b[1mourse, we can also create remote repos via ssh when borg is setup\u001b[1m \u001b[1mthere. This command creates a new remote repo in a subdirectory called \"demo\"\u001b[1m:\u001b[0m\u001b[39m\u001b[K" + ], + [ + 2.040444, + "\u001b[?1l\u001b>" + ], + [ + 0.000423, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000711, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 6.8e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000297, + "\u001b[?2004h" + ], + [ + 1.613372, + "\u001b[1m\u001b[31mb\u001b[0m\u001b[39m" + ], + [ + 0.204618, + "\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[0m\u001b[39m" + ], + [ + 0.121257, + "\b\b\u001b[1m\u001b[31mb\u001b[1m\u001b[31mo\u001b[1m\u001b[31mr\u001b[0m\u001b[39m" + ], + [ + 0.228506, + "\b\b\b\u001b[0m\u001b[32mb\u001b[0m\u001b[32mo\u001b[0m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.469213, + " " + ], + [ + 0.23811, + "i" + ], + [ + 0.139149, + "n" + ], + [ + 0.157285, + "i" + ], + [ + 0.219101, + "t" + ], + [ + 0.389153, + " " + ], + [ + 0.633813, + "-" + ], + [ + 0.102895, + "-" + ], + [ + 0.267338, + "e" + ], + [ + 0.244036, + "n" + ], + [ + 0.303722, + "c" + ], + [ + 0.117325, + "r" + ], + [ + 0.112606, + "y" + ], + [ + 0.250891, + "p" + ], + [ + 0.258828, + "t" + ], + [ + 0.276877, + "i" + ], + [ + 0.131491, + "o" + ], + [ + 0.206852, + "n" + ], + [ + 0.966102, + "=" + ], + [ + 0.388021, + "r" + ], + [ + 0.146133, + "e" + ], + [ + 0.176939, + "p" + ], + [ + 0.139187, + "o" + ], + [ + 0.273188, + "k" + ], + [ + 0.172429, + "e" + ], + [ + 0.306306, + "y" + ], + [ + 0.851125, + " " + ], + [ + 0.868971, + "b" + ], + [ + 0.261136, + "o" + ], + [ + 0.12143, + "r" + ], + [ + 0.15507, + "g" + ], + [ + 0.186684, + "d" + ], + [ + 0.141974, + "e" + ], + [ + 0.13004, + "m" + ], + [ + 0.172673, + "o" + ], + [ + 1.041475, + "@" + ], + [ + 0.536019, + "r" + ], + [ + 0.02293, + "e" + ], + [ + 0.223755, + "m" + ], + [ + 0.152859, + "o" + ], + [ + 0.222368, + "t" + ], + [ + 0.095106, + "e" + ], + [ + 0.33914, + "s" + ], + [ + 0.213902, + "e" + ], + [ + 0.136448, + "r" + ], + [ + 0.196228, + "v" + ], + [ + 0.171447, + "e" + ], + [ + 0.154296, + "r" + ], + [ + 1.151168, + "." + ], + [ + 0.198973, + "e" + ], + [ + 0.195428, + "x" + ], + [ + 0.163512, + "a" + ], + [ + 0.157805, + "m" + ], + [ + 0.174865, + "p" + ], + [ + 0.103133, + "l" + ], + [ + 0.145276, + "e" + ], + [ + 2.109373, + ":" + ], + [ + 0.494126, + "." + ], + [ + 0.315325, + "/" + ], + [ + 0.182218, + "d" + ], + [ + 0.138815, + "e" + ], + [ + 0.143066, + "m" + ], + [ + 0.17136, + "o" + ], + [ + 1.831712, + "\u001b[?1l\u001b>" + ], + [ + 0.001025, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000824, + "\u001b]2;borg init --encryption=repokey borgdemo@remoteserver.example:./demo\u0007\u001b]1;borg\u0007" + ], + [ + 6.069586, + "Enter new passphrase: " + ], + [ + 2.598936, + "\r\n" + ], + [ + 0.000189, + "Enter same passphrase again: " + ], + [ + 2.044707, + "\r\n" + ], + [ + 0.000198, + "Do you want your passphrase to be displayed for verification? [yN]: " + ], + [ + 1.415539, + "\r\n" + ], + [ + 1.950077, + "\r\nBy default repositories initialized with this version will produce security\r\nerrors if written to with an older version (up to and including Borg 1.0.8).\r\n\r\nIf you want to use these older versions, you can disable the check by running:\r\nborg upgrade --disable-tam 'ssh://borgdemo@remoteserver.example/./demo'\r\n\r\nSee https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability for details about the security implications.\r\n" + ], + [ + 0.548386, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 9.5e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000105, + "\u001b[?1h\u001b=" + ], + [ + 0.000221, + "\u001b[?2004h" + ], + [ + 0.82377, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.662248, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.610999, + "\b\b\u001b[1m#\u001b[1m \u001b[1mE\u001b[0m\u001b[39m" + ], + [ + 0.267513, + "\b\u001b[1mE\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.185698, + "\b\u001b[1ma\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.161855, + "\b\u001b[1ms\u001b[1my\u001b[0m\u001b[39m" + ], + [ + 0.46273, + "\b\u001b[1my\u001b[1m, isn't it? That's all you need to know for basic usage.\u001b[0m\u001b[39m" + ], + [ + 1.861984, + "\u001b[?1l\u001b>" + ], + [ + 0.001044, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001525, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 6.3e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000316, + "\u001b[?2004h" + ], + [ + 1.009133, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.240205, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.217287, + "\b\b\u001b[1m#\u001b[1m \u001b[1mI\u001b[0m\u001b[39m" + ], + [ + 0.163888, + "\b\u001b[1mI\u001b[1mf\u001b[0m\u001b[39m" + ], + [ + 0.349458, + "\b\u001b[1mf\u001b[1m you want to see more, have a look at the screencast showing the \"advanc\u001b[1me\u001b[1md usage\".\u001b[0m\u001b[39m\u001b[K" + ], + [ + 2.780664, + "\u001b[?1l\u001b>" + ], + [ + 0.000734, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000812, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 4.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000113, + "\u001b[?1h\u001b=" + ], + [ + 0.000299, + "\u001b[?2004h" + ], + [ + 1.119856, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.281915, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.244389, + "\b\b\u001b[1m#\u001b[1m \u001b[1mI\u001b[0m\u001b[39m" + ], + [ + 0.143064, + "\b\u001b[1mI\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.171731, + "\b\u001b[1mn\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.139438, + "\b\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.388834, + "\b\u001b[1ma\u001b[1mny case, enjoy using borg!\u001b[0m\u001b[39m" + ], + [ + 1.502218, + "\u001b[?1l\u001b>" + ], + [ + 0.000883, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000735, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 9.9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000113, + "\u001b[?1h\u001b=" + ], + [ + 0.000498, + "\u001b[?2004h" + ], + [ + 1.273251, + "\u001b[?2004l\r\r\n" + ] + ] +} diff --git a/docs/misc/asciinema/basic.sh b/docs/misc/asciinema/basic.sh new file mode 100644 index 00000000..743a506d --- /dev/null +++ b/docs/misc/asciinema/basic.sh @@ -0,0 +1,53 @@ +# Here you'll see some basic commands to start working with borg. +# Note: This teaser screencast was made with borg version 1.1.0 – older or newer borg versions may behave differently. +# But let's start. + +# First of all, you can always get help: +borg help +# These are a lot of commands, so better we start with a few: +# Let's create a repo on an external drive… +borg init --encryption=repokey /media/backup/borgdemo +# This uses the repokey encryption. You may look at "borg help init" or the online doc at https://borgbackup.readthedocs.io/ for other modes. + +# So now, let's create our first (compressed) backup. +borg create --stats --progress --compression lz4 /media/backup/borgdemo::backup1 Wallpaper + +# That's nice, so far. +# So let's add a new file… +echo "new nice file" > Wallpaper/newfile.txt + + +borg create --stats --progress --compression lz4 /media/backup/borgdemo::backup2 Wallpaper + +# Wow, this was a lot faster! +# Notice the "Deduplicated size" for "This archive"! +# Borg recognized that most files did not change and deduplicated them. + +# But what happens, when we move a dir and create a new backup? +mv … + +borg create --stats --progress --compression lz4 /media/backup/borgdemo::backup3 Wallpaper + +# Still quite fast… +# But when you look at the "deduplicated file size" again, you see that borg also recognized that only the dir and not the files changed in this backup. + +# Now lets look into a repo. +borg list /media/backup/borgdemo + +# You'll see a list of all backups. +# You can also use the same command to look into an archive. But we better filter the output here: +borg list /media/backup/borgdemo::backup3 | grep 'deer.jpg' + +# Oh, we found our picture. Now extract it… +mv Wallpaper Wallpaper.orig +borg extract /media/backup/borgdemo::backup3 + +# And check that it's the same: +diff -s Wallpaper/deer.jpg Wallpaper.orig/deer.jpg + +# And, of course, we can also create remote repos via ssh when borg is setup there. This command creates a new remote repo in a subdirectory called "demo": +borg init --encryption=repokey borgdemo@remoteserver.example:./demo + +# Easy, isn't it? That's all you need to know for basic usage. +# If you want to see more, have a look at the screencast showing the "advanced usage". +# In any case, enjoy using borg! diff --git a/docs/misc/asciinema/install.json b/docs/misc/asciinema/install.json new file mode 100644 index 00000000..477b1ef2 --- /dev/null +++ b/docs/misc/asciinema/install.json @@ -0,0 +1,1354 @@ +{ + "version": 1, + "width": 78, + "height": 25, + "duration": 140.275038, + "command": null, + "title": null, + "env": { + "TERM": "xterm-256color", + "SHELL": "/bin/zsh" + }, + "stdout": [ + [ + 9.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000108, + "\u001b[?1h\u001b=" + ], + [ + 0.000182, + "\u001b[?2004h" + ], + [ + 0.45774, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.31515, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.220208, + "\b\b\u001b[1m#\u001b[1m \u001b[1mT\u001b[0m\u001b[39m" + ], + [ + 0.121752, + "\b\u001b[1mT\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.142781, + "\b\u001b[1mh\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.117367, + "\b\u001b[1mi\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.255471, + "\b\u001b[1ms\u001b[1m asciinema will show you the installation of borg as a standalone bina\u001b[1mr\u001b[1my. Usually you only need this if you want to have an up-to-date version of bo\u001b[1mr\u001b[1mg or no package is available for your distro/OS.\u001b[0m\u001b[39m\u001b[K" + ], + [ + 0.563803, + "\u001b[?1l\u001b>" + ], + [ + 0.000412, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000823, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 5.9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 5.2e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.00027, + "\u001b[?2004h" + ], + [ + 2.191111, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.301924, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.21419, + "\b\b\u001b[1m#\u001b[1m \u001b[1mF\u001b[0m\u001b[39m" + ], + [ + 0.117654, + "\b\u001b[1mF\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.198616, + "\b\u001b[1mi\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.101113, + "\b\u001b[1mr\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.107485, + "\b\u001b[1ms\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.357443, + "\b\u001b[1mt\u001b[1m, we need to download the version, we'd like to install…\u001b[0m\u001b[39m" + ], + [ + 0.516614, + "\u001b[?1l\u001b>" + ], + [ + 0.000826, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000757, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 4.4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000134, + "\u001b[?1h\u001b=" + ], + [ + 0.000598, + "\u001b[?2004h" + ], + [ + 1.411874, + "\u001b[32mw\u001b[39m" + ], + [ + 0.119593, + "\b\u001b[1m\u001b[31mw\u001b[1m\u001b[31mg\u001b[0m\u001b[39m" + ], + [ + 0.13329, + "\b\b\u001b[1m\u001b[31mw\u001b[1m\u001b[31mg\u001b[1m\u001b[31me\u001b[0m\u001b[39m" + ], + [ + 0.127861, + "\b\b\b\u001b[0m\u001b[32mw\u001b[0m\u001b[32mg\u001b[0m\u001b[32me\u001b[32mt\u001b[39m" + ], + [ + 0.324708, + " -q --show-progress https://github.com/borgbackup/borg/releases/download/1.1.0b6/borg-linux64\u001b[K" + ], + [ + 0.797801, + "\u001b[?1l\u001b>" + ], + [ + 0.000964, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000631, + "\u001b]2;wget -q --show-progress \u0007\u001b]1;wget\u0007" + ], + [ + 1.306534, + "\rborg-linux64 0%[ ] 0 --.-KB/s " + ], + [ + 0.23185, + "\rborg-linux64 0%[ ] 24.58K 106KB/s " + ], + [ + 0.341907, + "\rborg-linux64 0%[ ] 92.58K 161KB/s " + ], + [ + 0.230021, + "\rborg-linux64 1%[ ] 160.58K 200KB/s " + ], + [ + 0.22577, + "\rborg-linux64 1%[ ] 211.58K 206KB/s " + ], + [ + 0.229246, + "\rborg-linux64 2%[ ] 279.58K 222KB/s " + ], + [ + 0.347713, + "\rborg-linux64 2%[ ] 347.58K 216KB/s " + ], + [ + 0.224636, + "\rborg-linux64 98%[================> ] 12.41M 404KB/s eta 2s " + ], + [ + 0.205977, + "\rborg-linux64 99%[================> ] 12.50M 401KB/s eta 0s " + ], + [ + 0.137036, + "\rborg-linux64 100%[=================>] 12.56M 417KB/s in 39s \r\n" + ], + [ + 0.000872, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000103, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 0.000117, + "\u001b[?1h\u001b=" + ], + [ + 0.000208, + "\u001b[?2004h" + ], + [ + 2.118269, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.266901, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.142975, + "\b\b\u001b[1m#\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.074155, + "\b\u001b[1ma\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.167144, + "\b\u001b[1mn\u001b[1md\u001b[0m\u001b[39m" + ], + [ + 0.2241, + "\b\u001b[1md\u001b[1m do not forget the GPG signature…!\u001b[0m\u001b[39m" + ], + [ + 0.596854, + "\u001b[?1l\u001b>" + ], + [ + 0.000696, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000691, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 8.2e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000286, + "\u001b[?2004h" + ], + [ + 1.51737, + "\u001b[1m# and do not forget the GPG signature…!\u001b[0m\u001b[39m" + ], + [ + 0.314759, + "\u001b[39D\u001b[0m\u001b[32mw\u001b[0m\u001b[32mg\u001b[0m\u001b[32me\u001b[0m\u001b[32mt\u001b[39m\u001b[0m\u001b[39m \u001b[0m\u001b[39m-\u001b[0m\u001b[39mq\u001b[0m\u001b[39m \u001b[0m\u001b[39m-\u001b[0m\u001b[39m-\u001b[0m\u001b[39ms\u001b[0m\u001b[39mh\u001b[0m\u001b[39mo\u001b[0m\u001b[39mw\u001b[0m\u001b[39m-\u001b[0m\u001b[39mp\u001b[0m\u001b[39mr\u001b[0m\u001b[39mo\u001b[0m\u001b[39mg\u001b[0m\u001b[39mr\u001b[0m\u001b[39me\u001b[0m\u001b[39ms\u001b[0m\u001b[39ms\u001b[0m\u001b[39m \u001b[0m\u001b[39mh\u001b[0m\u001b[39mt\u001b[0m\u001b[39mt\u001b[0m\u001b[39mp\u001b[0m\u001b[39ms\u001b[0m\u001b[39m:\u001b[0m\u001b[39m/\u001b[0m\u001b[39m/\u001b[0m\u001b[39mg\u001b[0m\u001b[39mi\u001b[0m\u001b[39mt\u001b[0m\u001b[39mh\u001b[0m\u001b[39mu\u001b[0m\u001b[39mb\u001b[0m\u001b[39m.com/borgbackup/borg/releases/download/1.1.0b6/borg-linux64\u001b[K" + ], + [ + 1.043903, + "." + ], + [ + 0.207322, + "a" + ], + [ + 0.16952, + "s" + ], + [ + 0.19625, + "c" + ], + [ + 0.359073, + "\u001b[?1l\u001b>" + ], + [ + 0.001424, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000717, + "\u001b]2;wget -q --show-progress \u0007\u001b]1;wget\u0007" + ], + [ + 1.236785, + "\rborg-linux64.asc 0%[ ] 0 --.-KB/s " + ], + [ + 1.8e-05, + "\rborg-linux64.asc 100%[=================>] 819 --.-KB/s in 0s \r\n" + ], + [ + 0.00093, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 7.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 8.7e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000183, + "\u001b[?2004h" + ], + [ + 3.234458, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 1.023301, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.331266, + "\b\b\u001b[1m#\u001b[1m \u001b[1mI\u001b[0m\u001b[39m" + ], + [ + 0.166799, + "\b\u001b[1mI\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.34554, + "\b\u001b[1mn\u001b[1m this case, we have already imported the public key of a borg developer.\u001b[1m \u001b[1mSo we only need to verify it:\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.499971, + "\u001b[?1l\u001b>" + ], + [ + 0.001069, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000922, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000159, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 3.1e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000451, + "\u001b[?2004h" + ], + [ + 2.31724, + "\u001b[32mg\u001b[39m" + ], + [ + 0.151243, + "\b\u001b[32mg\u001b[32mp\u001b[39m" + ], + [ + 0.074305, + "\b\b\u001b[32mg\u001b[32mp\u001b[32mg\u001b[39m" + ], + [ + 0.315686, + " " + ], + [ + 0.345624, + "-" + ], + [ + 0.100203, + "-" + ], + [ + 0.291673, + "v" + ], + [ + 0.11497, + "e" + ], + [ + 0.183055, + "r" + ], + [ + 0.146521, + "i" + ], + [ + 0.11872, + "f" + ], + [ + 0.309865, + "y" + ], + [ + 0.346758, + " " + ], + [ + 0.264902, + "\u001b[4mb\u001b[24m" + ], + [ + 0.307683, + "\u001b[?7l" + ], + [ + 2.1e-05, + "\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.011212, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mgpg\u001b[39m --verify \u001b[4mborg-linux64\u001b[24m\u001b[K" + ], + [ + 0.577848, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.015636, + "\r\r\u001b[27C" + ], + [ + 0.000193, + "\r\r\n\u001b[J" + ], + [ + 2e-05, + "\u001b[J\u001b[0mborg-linux64 \u001b[Jborg-linux64.asc\u001b[J\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mgpg\u001b[39m --verify \u001b[4mborg-linux64\u001b[24m\u001b[K" + ], + [ + 0.626316, + "\u001b[?7l\u001b[31m......\u001b[39m\u001b[?7h" + ], + [ + 0.012642, + "\u001b[12D\u001b[24mb\u001b[24mo\u001b[24mr\u001b[24mg\u001b[24m-\u001b[24ml\u001b[24mi\u001b[24mn\u001b[24mu\u001b[24mx\u001b[24m6\u001b[24m4" + ], + [ + 0.000154, + "\r\r\n" + ], + [ + 1.8e-05, + "\u001b[J\u001b[7mborg-linux64 \u001b[0m \u001b[Jborg-linux64.asc\u001b[J\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mgpg\u001b[39m --verify borg-linux64\u001b[K" + ], + [ + 0.189964, + "\r\r\n" + ], + [ + 1.7e-05, + "\u001b[7mborg-linux64 \u001b[0m \r\u001b[7mborg-linux64 \u001b[0m \r\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mgpg\u001b[39m --verify borg-linux64\u001b[K.asc" + ], + [ + 0.000225, + "\r\r\n" + ], + [ + 1.9e-05, + "\u001b[18C\u001b[7mborg-linux64.asc\u001b[0m\rborg-linux64 \r\u001b[A\u001b[0m\u001b[27m\u001b[24m\r\u001b[2C\u001b[32mgpg\u001b[39m --verify borg-linux64.asc\u001b[K" + ], + [ + 0.866638, + "\r\r\n\u001b[J\u001b[A\u001b[31C\u001b[1m \u001b[0m" + ], + [ + 0.001241, + "\r\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[32mgpg\u001b[39m --verify \u001b[4mborg-linux64.asc\u001b[24m\u001b[1m \u001b[0m\u001b[K" + ], + [ + 0.654098, + "\b\u001b[0m \b" + ], + [ + 2.7e-05, + "\u001b[?1l\u001b>" + ], + [ + 0.001361, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000737, + "\u001b]2;gpg --verify borg-linux64.asc\u0007" + ], + [ + 2.6e-05, + "\u001b]1;gpg\u0007" + ], + [ + 0.002478, + "gpg: assuming signed data in `borg-linux64'\r\n" + ], + [ + 0.082679, + "gpg: Signature made Sun Jun 18 16:54:19 2017 CEST\r\ngpg: using RSA key 0x243ACFA951F78E01\r\n" + ], + [ + 0.003947, + "gpg: Good signature from \"Thomas Waldmann \" [ultimate]\r\ngpg: aka \"Thomas Waldmann \" [ultimate]\r\ngpg: aka \"Thomas Waldmann \" [ultimate]\r\n" + ], + [ + 2.1e-05, + "gpg: aka \"Thomas Waldmann \"" + ], + [ + 1.5e-05, + " [ultimate]\r\n" + ], + [ + 0.001743, + "Primary key fingerprint: 6D5B EF9A DD20 7580 5747 B70F 9F88 FB52 FAF7 B393\r\n Subkey fingerprint: 2F81 AFFB AB04 E11F E8EE 65D4 243A CFA9 51F7 8E01\r\n" + ], + [ + 0.000384, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000155, + "\u001b[?2004h" + ], + [ + 4.627219, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.225001, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.213579, + "\b\b\u001b[1m#\u001b[1m \u001b[1mO\u001b[0m\u001b[39m" + ], + [ + 0.132218, + "\b\u001b[1mO\u001b[1mk\u001b[0m\u001b[39m" + ], + [ + 0.061577, + "\b\u001b[1mk\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.154786, + "\b\u001b[1ma\u001b[1my\u001b[0m\u001b[39m" + ], + [ + 0.172921, + "\b\u001b[1my\u001b[1m,\u001b[0m\u001b[39m" + ], + [ + 0.648978, + "\b\u001b[1m,\u001b[1m the binary is valid!\u001b[0m\u001b[39m" + ], + [ + 0.822303, + "\u001b[?1l\u001b>" + ], + [ + 0.000388, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000681, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000113, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 4.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000252, + "\u001b[?2004h" + ], + [ + 2.048081, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.243659, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.174242, + "\b\b\u001b[1m#\u001b[1m \u001b[1mN\u001b[0m\u001b[39m" + ], + [ + 0.131485, + "\b\u001b[1mN\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.109555, + "\b\u001b[1mo\u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.128309, + "\b\u001b[1mw\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.163064, + "\b\u001b[1m \u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.138953, + "\b\u001b[1mi\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.050135, + "\b\u001b[1mn\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.095385, + "\b\u001b[1ms\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.114692, + "\b\u001b[1mt\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.155821, + "\b\u001b[1ma\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.118297, + "\b\u001b[1ml\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.165834, + "\b\u001b[1ml\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.231866, + "\b\u001b[1m \u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.159893, + "\b\u001b[1mi\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.289328, + "\b\u001b[1mt\u001b[1m:\u001b[0m\u001b[39m" + ], + [ + 2.713706, + "\u001b[?1l\u001b>" + ], + [ + 0.000362, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000674, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 9e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 5.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000272, + "\u001b[?2004h" + ], + [ + 1.703796, + "\u001b[1m\u001b[31ms\u001b[0m\u001b[39m" + ], + [ + 0.12754, + "\b\u001b[0m\u001b[32ms\u001b[32mu\u001b[39m" + ], + [ + 0.149508, + "\b\b\u001b[1m\u001b[31ms\u001b[1m\u001b[31mu\u001b[1m\u001b[31md\u001b[0m\u001b[39m" + ], + [ + 0.121616, + "\b\b\b\u001b[0m\u001b[4m\u001b[32ms\u001b[0m\u001b[4m\u001b[32mu\u001b[0m\u001b[4m\u001b[32md\u001b[4m\u001b[32mo\u001b[24m\u001b[39m" + ], + [ + 0.321903, + " \u001b[32mcp\u001b[39m \u001b[4mborg-linux64\u001b[24m \u001b[4m/usr/local/bin/borg\u001b[24m" + ], + [ + 2.352378, + "\u001b[?1l\u001b>" + ], + [ + 0.001087, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00091, + "\u001b]2;sudo cp borg-linux64 /usr/local/bin/borg\u0007\u001b]1;cp\u0007" + ], + [ + 0.013652, + "[sudo] password for rugk: " + ], + [ + 2.992379, + "\r\n" + ], + [ + 0.031173, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 4e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.3e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000214, + "\u001b[?2004h" + ], + [ + 5.400881, + "\u001b[1m\u001b[31ms\u001b[0m\u001b[39m" + ], + [ + 0.138474, + "\b\u001b[0m\u001b[32ms\u001b[32mu\u001b[39m" + ], + [ + 0.114266, + "\b\b\u001b[1m\u001b[31ms\u001b[1m\u001b[31mu\u001b[1m\u001b[31md\u001b[0m\u001b[39m" + ], + [ + 0.098068, + "\b\b\b\u001b[0m\u001b[4m\u001b[32ms\u001b[0m\u001b[4m\u001b[32mu\u001b[0m\u001b[4m\u001b[32md\u001b[4m\u001b[32mo\u001b[24m\u001b[39m" + ], + [ + 0.16926, + " " + ], + [ + 0.188874, + "\u001b[32mcp\u001b[39m \u001b[4mborg-linux64\u001b[24m \u001b[4m/usr/local/bin/borg\u001b[24m" + ], + [ + 0.413244, + "\u001b[34D\u001b[32mh\u001b[32mo\u001b[24m\u001b[32mw\u001b[24m\u001b[32mn\u001b[39m\u001b[24m \u001b[24mr\u001b[24mo\u001b[24mo\u001b[24mt\u001b[24m:\u001b[24mr\u001b[24mo\u001b[24mo\u001b[24mt\u001b[20C" + ], + [ + 1.397429, + "\u001b[?1l\u001b>" + ], + [ + 0.00132, + "\u001b[?2004l\r\r\n" + ], + [ + 0.00075, + "\u001b]2;sudo chown root:root /usr/local/bin/borg\u0007\u001b]1;chown\u0007" + ], + [ + 0.010539, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 5.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000284, + "\u001b[?2004h" + ], + [ + 2.229436, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.213191, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.099902, + "\b\b\u001b[1m#\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.069437, + "\b\u001b[1ma\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.106463, + "\b\u001b[1mn\u001b[1md\u001b[0m\u001b[39m" + ], + [ + 0.080598, + "\b\u001b[1md\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.172381, + "\b\u001b[1m \u001b[1mm\u001b[0m\u001b[39m" + ], + [ + 0.096638, + "\b\u001b[1mm\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.078606, + "\b\u001b[1ma\u001b[1mk\u001b[0m\u001b[39m" + ], + [ + 0.106382, + "\b\u001b[1mk\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.110174, + "\b\u001b[1me\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.216964, + "\b\u001b[1m \u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.183739, + "\b\u001b[1mi\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.150872, + "\b\u001b[1mt\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.188901, + "\b\u001b[1m \u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.099651, + "\b\u001b[1me\u001b[1mx\u001b[0m\u001b[39m" + ], + [ + 0.1893, + "\b\u001b[1mx\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.187999, + "\b\u001b[1me\u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.128262, + "\b\u001b[1mc\u001b[1mu\u001b[0m\u001b[39m" + ], + [ + 0.144851, + "\b\u001b[1mu\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.091175, + "\b\u001b[1mt\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.135575, + "\b\u001b[1ma\u001b[1mb\u001b[0m\u001b[39m" + ], + [ + 0.18045, + "\b\u001b[1mb\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.110687, + "\b\u001b[1ml\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.360861, + "\b\u001b[1me\u001b[1m…\u001b[0m\u001b[39m" + ], + [ + 0.69896, + "\u001b[?1l\u001b>" + ], + [ + 0.000433, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000544, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 5.1e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000306, + "\u001b[?2004h" + ], + [ + 1.028139, + "\u001b[1m\u001b[31ms\u001b[0m\u001b[39m" + ], + [ + 0.136555, + "\b\u001b[0m\u001b[32ms\u001b[32mu\u001b[39m" + ], + [ + 0.115701, + "\b\b\u001b[1m\u001b[31ms\u001b[1m\u001b[31mu\u001b[1m\u001b[31md\u001b[0m\u001b[39m" + ], + [ + 0.151048, + "\b\b\b\u001b[0m\u001b[4m\u001b[32ms\u001b[0m\u001b[4m\u001b[32mu\u001b[0m\u001b[4m\u001b[32md\u001b[4m\u001b[32mo\u001b[24m\u001b[39m" + ], + [ + 0.276036, + " \u001b[32mchown\u001b[39m root:root \u001b[4m/usr/local/bin/borg\u001b[24m" + ], + [ + 0.284509, + "\u001b[34D\u001b[32mp\u001b[39m\u001b[39m \u001b[39m\u001b[4mb\u001b[39m\u001b[4mo\u001b[4mr\u001b[4mg\u001b[4m-\u001b[4ml\u001b[4mi\u001b[4mn\u001b[4mu\u001b[4mx\u001b[4m6\u001b[4m4\u001b[24m\u001b[20C" + ], + [ + 0.422112, + "\u001b[34D\u001b[32mh\u001b[32mo\u001b[24m\u001b[32mw\u001b[24m\u001b[32mn\u001b[39m\u001b[24m \u001b[24mr\u001b[24mo\u001b[24mo\u001b[24mt\u001b[24m:\u001b[24mr\u001b[24mo\u001b[24mo\u001b[24mt\u001b[20C" + ], + [ + 0.616462, + "\u001b[33D\u001b[32mm\u001b[32mo\u001b[32md\u001b[39m 755\u001b[6P\u001b[20C \b\b\b\b\b\b" + ], + [ + 1.090337, + "\u001b[?1l\u001b>" + ], + [ + 0.00101, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000655, + "\u001b]2;sudo chmod 755 /usr/local/bin/borg\u0007" + ], + [ + 1.8e-05, + "\u001b]1;chmod\u0007" + ], + [ + 0.009932, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000124, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 5.9e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000237, + "\u001b[?2004h" + ], + [ + 3.613554, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.305561, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.451533, + "\b\b\u001b[1m#\u001b[1m \u001b[1mN\u001b[0m\u001b[39m" + ], + [ + 0.199295, + "\b\u001b[1mN\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.134017, + "\b\u001b[1mo\u001b[1mw\u001b[0m\u001b[39m" + ], + [ + 0.232574, + "\b\u001b[1mw\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.283449, + "\b\u001b[1m \u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.156927, + "\b\u001b[1mc\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.100718, + "\b\u001b[1mh\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.145048, + "\b\u001b[1me\u001b[1mc\u001b[0m\u001b[39m" + ], + [ + 0.238223, + "\b\u001b[1mc\u001b[1mk\u001b[0m\u001b[39m" + ], + [ + 0.145393, + "\b\u001b[1mk\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.195514, + "\b\u001b[1m \u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.190153, + "\b\u001b[1mi\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 1.202922, + "\b\u001b[1mt\u001b[1m:\u001b[0m\u001b[39m" + ], + [ + 0.17572, + "\b\u001b[1m:\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.209752, + "\b\u001b[1m \u001b[1m(\u001b[0m\u001b[39m" + ], + [ + 0.266264, + "\b\u001b[1m(\u001b[1mp\u001b[0m\u001b[39m" + ], + [ + 0.136174, + "\b\u001b[1mp\u001b[1mo\u001b[0m\u001b[39m" + ], + [ + 0.136549, + "\b\u001b[1mo\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.157321, + "\b\u001b[1ms\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.134812, + "\b\u001b[1ms\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.177707, + "\b\u001b[1mi\u001b[1mb\u001b[0m\u001b[39m" + ], + [ + 0.184458, + "\b\u001b[1mb\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.104718, + "\b\u001b[1ml\u001b[1my\u001b[0m\u001b[39m" + ], + [ + 0.132476, + "\b\u001b[1my\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.14269, + "\b\u001b[1m \u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.109627, + "\b\u001b[1mn\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.150487, + "\b\u001b[1me\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.202663, + "\b\u001b[1me\u001b[1md\u001b[0m\u001b[39m" + ], + [ + 0.12975, + "\b\u001b[1md\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.095469, + "\b\u001b[1ms\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.160511, + "\b\u001b[1m \u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.149495, + "\b\u001b[1ma\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.189727, + "\b\u001b[1m \u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.098768, + "\b\u001b[1mt\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.125099, + "\b\u001b[1me\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.077112, + "\b\u001b[1mr\u001b[1mm\u001b[0m\u001b[39m" + ], + [ + 0.147886, + "\b\u001b[1mm\u001b[1mi\u001b[0m\u001b[39m" + ], + [ + 0.124366, + "\b\u001b[1mi\u001b[1mn\u001b[0m\u001b[39m" + ], + [ + 0.088118, + "\b\u001b[1mn\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.116281, + "\b\u001b[1ma\u001b[1ml\u001b[0m\u001b[39m" + ], + [ + 0.146487, + "\b\u001b[1ml\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.156764, + "\b\u001b[1m \u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.195688, + "\b\u001b[1mr\u001b[1me\u001b[0m\u001b[39m" + ], + [ + 0.40621, + "\b\u001b[1me\u001b[1ms\u001b[0m\u001b[39m" + ], + [ + 0.263813, + "\b\u001b[1ms\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.087475, + "\b\u001b[1mt\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.090176, + "\b\u001b[1ma\u001b[1mr\u001b[0m\u001b[39m" + ], + [ + 0.12059, + "\b\u001b[1mr\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.232423, + "\b\u001b[1mt\u001b[1m)\u001b[0m\u001b[39m" + ], + [ + 1.383975, + "\u001b[?1l\u001b>" + ], + [ + 0.000692, + "\u001b[?2004l\r\r\n" + ], + [ + 0.001339, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000147, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.6e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000558, + "\u001b[?2004h" + ], + [ + 1.045406, + "\u001b[4mb\u001b[24m" + ], + [ + 0.163217, + "\b\u001b[4mb\u001b[4mo\u001b[24m" + ], + [ + 0.131464, + "\b\b\u001b[4mb\u001b[4mo\u001b[4mr\u001b[24m" + ], + [ + 0.103279, + "\b\b\b\u001b[24m\u001b[32mb\u001b[24m\u001b[32mo\u001b[24m\u001b[32mr\u001b[32mg\u001b[39m" + ], + [ + 0.181118, + " " + ], + [ + 0.440449, + "-" + ], + [ + 0.186299, + "V" + ], + [ + 0.522054, + "\u001b[?1l\u001b>" + ], + [ + 0.000643, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000967, + "\u001b]2;borg -V\u0007\u001b]1;borg\u0007" + ], + [ + 0.426128, + "borg 1.1.0b6\r\n" + ], + [ + 0.040916, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 0.000101, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.5e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000176, + "\u001b[?2004h" + ], + [ + 1.92655, + "\u001b[1m#\u001b[0m\u001b[39m" + ], + [ + 0.247681, + "\b\u001b[1m#\u001b[1m \u001b[0m\u001b[39m" + ], + [ + 0.233391, + "\b\b\u001b[1m#\u001b[1m \u001b[1mT\u001b[0m\u001b[39m" + ], + [ + 0.127191, + "\b\u001b[1mT\u001b[1mh\u001b[0m\u001b[39m" + ], + [ + 0.023053, + "\b\u001b[1mh\u001b[1ma\u001b[0m\u001b[39m" + ], + [ + 0.155649, + "\b\u001b[1ma\u001b[1mt\u001b[0m\u001b[39m" + ], + [ + 0.3483, + "\b\u001b[1mt\u001b[1m's it! Check out the other screencasts to see how to actually use borg\u001b[1mb\u001b[1mackup.\u001b[0m\u001b[39m\u001b[K" + ], + [ + 1.701253, + "\u001b[?1l\u001b>" + ], + [ + 0.000707, + "\u001b[?2004l\r\r\n" + ], + [ + 0.000682, + "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r" + ], + [ + 1.1e-05, + "\u001b]1;..lder/borgdemo\u0007" + ], + [ + 5.7e-05, + "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J$ \u001b[K" + ], + [ + 6.8e-05, + "\u001b[?1h\u001b=" + ], + [ + 0.000284, + "\u001b[?2004h" + ], + [ + 1.579085, + "\u001b[?2004l\r\r\n" + ] + ] +} diff --git a/docs/misc/asciinema/install.sh b/docs/misc/asciinema/install.sh new file mode 100644 index 00000000..3ade47c8 --- /dev/null +++ b/docs/misc/asciinema/install.sh @@ -0,0 +1,21 @@ +# This asciinema will show you the installation of borg as a standalone binary. Usually you only need this if you want to have an up-to-date version of borg or no package is available for your distro/OS. + +# First, we need to download the version, we'd like to install… +wget -q --show-progress https://github.com/borgbackup/borg/releases/download/1.1.0b6/borg-linux64 +# and do not forget the GPG signature…! +wget -q --show-progress https://github.com/borgbackup/borg/releases/download/1.1.0b6/borg-linux64.asc + +# In this case, we have already imported the public key of a borg developer. So we only need to verify it: +gpg --verify borg-linux64.asc +# Okay, the binary is valid! + +# Now install it: +sudo cp borg-linux64 /usr/local/bin/borg +sudo chown root:root /usr/local/bin/borg +# and make it executable… +sudo chmod 755 /usr/local/bin/borg + +# Now check it: (possibly needs a terminal restart) +borg -V + +# That's it! Check out the other screencasts to see how to actually use borgbackup. diff --git a/docs/misc/asciinema/install_and_basics.json b/docs/misc/asciinema/install_and_basics.json deleted file mode 100644 index a50eb35f..00000000 --- a/docs/misc/asciinema/install_and_basics.json +++ /dev/null @@ -1,5550 +0,0 @@ -{ - "version": 1, - "width": 80, - "height": 24, - "duration": 332.0, - "command": "/bin/bash", - "title": "borgbackup - installation and basic usage", - "env": { - "TERM": "xterm", - "SHELL": "/bin/bash" - }, - "stdout": [ - [ - 0.083341, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 0.349103, - "#" - ], - [ - 0.338948, - " " - ], - [ - 0.185424, - "b" - ], - [ - 0.142971, - "o" - ], - [ - 0.091227, - "r" - ], - [ - 0.092867, - "g" - ], - [ - 0.222552, - "b" - ], - [ - 0.114706, - "a" - ], - [ - 0.125044, - "c" - ], - [ - 0.144755, - "k" - ], - [ - 0.241044, - "u" - ], - [ - 0.243681, - "p" - ], - [ - 0.265888, - " " - ], - [ - 0.345247, - "-" - ], - [ - 0.251918, - " " - ], - [ - 0.233420, - "i" - ], - [ - 0.078609, - "n" - ], - [ - 0.076809, - "s" - ], - [ - 0.070225, - "t" - ], - [ - 0.148413, - "a" - ], - [ - 0.077403, - "l" - ], - [ - 0.139884, - "l" - ], - [ - 0.084807, - "a" - ], - [ - 0.138823, - "t" - ], - [ - 0.068185, - "i" - ], - [ - 0.170422, - "o" - ], - [ - 0.161091, - "n" - ], - [ - 0.169247, - " " - ], - [ - 0.110722, - "a" - ], - [ - 0.113785, - "n" - ], - [ - 0.397895, - "d" - ], - [ - 0.305048, - " " - ], - [ - 0.211476, - "b" - ], - [ - 0.109865, - "a" - ], - [ - 0.230634, - "s" - ], - [ - 0.277915, - "i" - ], - [ - 0.206167, - "c" - ], - [ - 0.145265, - " " - ], - [ - 0.219619, - "u" - ], - [ - 0.139989, - "s" - ], - [ - 0.180240, - "a" - ], - [ - 0.200391, - "g" - ], - [ - 0.116961, - "e" - ], - [ - 0.172074, - "\r\n" - ], - [ - 0.000449, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 0.620909, - "#" - ], - [ - 0.217833, - " " - ], - [ - 0.592920, - "I" - ], - [ - 0.166726, - " " - ], - [ - 0.161953, - "h" - ], - [ - 0.072501, - "a" - ], - [ - 0.170951, - "v" - ], - [ - 0.154067, - "e" - ], - [ - 0.110535, - " " - ], - [ - 0.155235, - "a" - ], - [ - 0.130825, - "l" - ], - [ - 0.111834, - "r" - ], - [ - 0.142378, - "e" - ], - [ - 0.165867, - "a" - ], - [ - 0.062556, - "d" - ], - [ - 0.091778, - "y" - ], - [ - 0.216280, - " " - ], - [ - 0.169501, - "d" - ], - [ - 0.198240, - "o" - ], - [ - 0.092373, - "w" - ], - [ - 0.143405, - "n" - ], - [ - 0.207324, - "l" - ], - [ - 0.164248, - "o" - ], - [ - 0.088481, - "a" - ], - [ - 0.129191, - "d" - ], - [ - 0.179234, - "e" - ], - [ - 0.189248, - "d" - ], - [ - 0.145203, - " " - ], - [ - 0.221625, - "t" - ], - [ - 0.100064, - "h" - ], - [ - 0.133349, - "e" - ], - [ - 0.066501, - " " - ], - [ - 0.187004, - "f" - ], - [ - 0.142461, - "i" - ], - [ - 0.204723, - "l" - ], - [ - 0.068716, - "e" - ], - [ - 0.237576, - "s" - ], - [ - 0.128085, - ":" - ], - [ - 0.242282, - "\r\n" - ], - [ - 0.000327, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 1.796834, - "l" - ], - [ - 0.092545, - "s" - ], - [ - 0.210322, - " " - ], - [ - 0.189710, - "-" - ], - [ - 0.215532, - "l" - ], - [ - 0.852863, - "\r\n" - ], - [ - 0.002104, - "total 10620\r\n" - ], - [ - 0.000040, - "-rw-rw-r-- 1 tw tw 10869049 Oct 24 22:11 borg-linux64" - ], - [ - 0.000007, - "\r\n" - ], - [ - 0.000019, - "-rw-rw-r-- 1 tw tw 819 Oct 24 22:11 borg-linux64.asc\r\n" - ], - [ - 0.000431, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 0.513172, - "#" - ], - [ - 0.284059, - " " - ], - [ - 0.330931, - "b" - ], - [ - 0.118806, - "i" - ], - [ - 0.100553, - "n" - ], - [ - 0.259930, - "a" - ], - [ - 0.106715, - "r" - ], - [ - 0.276545, - "y" - ], - [ - 0.126132, - " " - ], - [ - 0.379724, - "+" - ], - [ - 0.199249, - " " - ], - [ - 0.295913, - "G" - ], - [ - 0.108970, - "P" - ], - [ - 0.080480, - "G" - ], - [ - 0.349293, - " " - ], - [ - 0.236785, - "s" - ], - [ - 0.105197, - "i" - ], - [ - 0.289951, - "g" - ], - [ - 0.351385, - "n" - ], - [ - 0.282003, - "a" - ], - [ - 0.206591, - "t" - ], - [ - 0.163963, - "u" - ], - [ - 0.082416, - "r" - ], - [ - 0.125432, - "e" - ], - [ - 0.369988, - "\r\n" - ], - [ - 0.000341, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 0.889617, - "#" - ], - [ - 0.226974, - " " - ], - [ - 0.218497, - "l" - ], - [ - 0.134545, - "e" - ], - [ - 0.103159, - "t" - ], - [ - 0.711682, - "'" - ], - [ - 0.185463, - "s" - ], - [ - 0.162130, - " " - ], - [ - 0.166049, - "v" - ], - [ - 0.183069, - "e" - ], - [ - 0.099764, - "r" - ], - [ - 0.234211, - "i" - ], - [ - 0.854328, - "f" - ], - [ - 0.203758, - "y" - ], - [ - 0.166681, - " " - ], - [ - 0.216715, - "t" - ], - [ - 0.560064, - "h" - ], - [ - 0.151837, - "a" - ], - [ - 0.194509, - "t" - ], - [ - 0.119665, - " " - ], - [ - 0.141089, - "t" - ], - [ - 0.096803, - "h" - ], - [ - 0.104718, - "e" - ], - [ - 0.106761, - " " - ], - [ - 0.229401, - "b" - ], - [ - 0.213802, - "i" - ], - [ - 0.075481, - "n" - ], - [ - 0.138720, - "a" - ], - [ - 0.062411, - "r" - ], - [ - 0.292719, - "y" - ], - [ - 0.482737, - " " - ], - [ - 0.211595, - "i" - ], - [ - 0.110964, - "s" - ], - [ - 0.102100, - " " - ], - [ - 0.143380, - "v" - ], - [ - 0.189214, - "a" - ], - [ - 0.099337, - "l" - ], - [ - 0.172757, - "i" - ], - [ - 0.082456, - "d" - ], - [ - 0.177514, - ":" - ], - [ - 0.622492, - "\r\n" - ], - [ - 0.000313, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 2.000000, - "g" - ], - [ - 0.261924, - "p" - ], - [ - 0.108570, - "g" - ], - [ - 0.247315, - " " - ], - [ - 0.277162, - "-" - ], - [ - 0.141397, - "-" - ], - [ - 0.143255, - "v" - ], - [ - 0.162858, - "e" - ], - [ - 0.040051, - "r" - ], - [ - 0.105941, - "i" - ], - [ - 0.144872, - "f" - ], - [ - 0.306497, - "y" - ], - [ - 0.468271, - " " - ], - [ - 2.000000, - "b" - ], - [ - 0.119390, - "o" - ], - [ - 0.463137, - "\u0007" - ], - [ - 0.000095, - "rg-linux64" - ], - [ - 0.341519, - "." - ], - [ - 0.146977, - "asc " - ], - [ - 0.186292, - "\r\n" - ], - [ - 0.100648, - "gpg: Signature made Wed 07 Oct 2015 02:41:38 PM CEST\r\n" - ], - [ - 0.000011, - "gpg: using RSA key 243ACFA951F78E01\r\n" - ], - [ - 0.006906, - "gpg: Good signature from \"Thomas Waldmann \u003ctw@waldmann-edv.de\u003e\"" - ], - [ - 0.000033, - "\r\ngpg: aka \"Thomas Waldmann \u003ctw-public@gmx.de\u003e\"\r\ngpg: aka \"Thomas Waldmann \u003ctwaldmann@thinkmo.de\u003e\"\r\n" - ], - [ - 0.000018, - "gpg: aka \"Thomas Waldmann \u003cthomas.j.waldmann@gmail.com\u003e\"\r\n" - ], - [ - 0.003077, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 2.000000, - "#" - ], - [ - 0.241501, - " " - ], - [ - 0.186571, - "e" - ], - [ - 0.214388, - "v" - ], - [ - 0.157101, - "e" - ], - [ - 0.042348, - "r" - ], - [ - 0.253261, - "y" - ], - [ - 0.254356, - "t" - ], - [ - 0.094622, - "h" - ], - [ - 0.213972, - "i" - ], - [ - 0.084853, - "n" - ], - [ - 0.084920, - "g" - ], - [ - 0.178519, - " " - ], - [ - 0.256151, - "o" - ], - [ - 0.217918, - "k" - ], - [ - 0.153899, - "!" - ], - [ - 0.246211, - "\r\n" - ], - [ - 0.000330, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 2.000000, - "#" - ], - [ - 0.288008, - " " - ], - [ - 0.232836, - "i" - ], - [ - 0.055326, - "n" - ], - [ - 0.142978, - "s" - ], - [ - 0.080599, - "t" - ], - [ - 0.139018, - "a" - ], - [ - 0.111052, - "l" - ], - [ - 0.132419, - "l" - ], - [ - 0.169037, - " " - ], - [ - 0.117036, - "t" - ], - [ - 0.092749, - "h" - ], - [ - 0.124768, - "e" - ], - [ - 0.088888, - " " - ], - [ - 0.184118, - "b" - ], - [ - 0.182336, - "i" - ], - [ - 0.075466, - "n" - ], - [ - 0.085516, - "a" - ], - [ - 0.060363, - "r" - ], - [ - 0.843225, - "y" - ], - [ - 0.209758, - " " - ], - [ - 0.168892, - "a" - ], - [ - 0.151126, - "s" - ], - [ - 0.127487, - " " - ], - [ - 0.300923, - "\"" - ], - [ - 0.217060, - "b" - ], - [ - 0.221579, - "o" - ], - [ - 0.047775, - "r" - ], - [ - 0.107202, - "g" - ], - [ - 0.438939, - "\"" - ], - [ - 0.253153, - ":" - ], - [ - 0.617823, - "\r\n" - ], - [ - 0.000326, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 0.816740, - "c" - ], - [ - 0.168734, - "p" - ], - [ - 0.230846, - " " - ], - [ - 0.299588, - "b" - ], - [ - 0.121082, - "o" - ], - [ - 0.214148, - "\u0007" - ], - [ - 0.000011, - "rg-linux64" - ], - [ - 0.331736, - " " - ], - [ - 0.812264, - "~" - ], - [ - 0.518926, - "/" - ], - [ - 0.233797, - "b" - ], - [ - 0.214141, - "i" - ], - [ - 0.098062, - "n" - ], - [ - 0.607725, - "/" - ], - [ - 0.566434, - "b" - ], - [ - 0.145886, - "o" - ], - [ - 0.113081, - "r" - ], - [ - 0.068870, - "g" - ], - [ - 0.851794, - "\r\n" - ], - [ - 0.012632, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 2.000000, - "#" - ], - [ - 0.269926, - " " - ], - [ - 0.208575, - "m" - ], - [ - 0.135192, - "a" - ], - [ - 0.119543, - "k" - ], - [ - 0.080873, - "e" - ], - [ - 0.156871, - " " - ], - [ - 0.197124, - "i" - ], - [ - 0.078784, - "t" - ], - [ - 0.142373, - " " - ], - [ - 0.189080, - "e" - ], - [ - 0.232597, - "x" - ], - [ - 0.170105, - "e" - ], - [ - 0.132039, - "c" - ], - [ - 0.230568, - "u" - ], - [ - 0.086573, - "t" - ], - [ - 0.255047, - "a" - ], - [ - 0.231478, - "b" - ], - [ - 0.283723, - "l" - ], - [ - 0.112987, - "e" - ], - [ - 0.518611, - ":" - ], - [ - 0.459423, - "\r\n" - ], - [ - 0.000822, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 0.353739, - "c" - ], - [ - 0.114161, - "h" - ], - [ - 0.268562, - "m" - ], - [ - 0.179085, - "o" - ], - [ - 0.145360, - "d" - ], - [ - 0.075599, - " " - ], - [ - 0.773964, - "+" - ], - [ - 0.113699, - "x" - ], - [ - 0.187579, - " " - ], - [ - 0.381391, - "~" - ], - [ - 0.512520, - "/" - ], - [ - 0.231090, - "b" - ], - [ - 0.197636, - "i" - ], - [ - 0.101238, - "n" - ], - [ - 0.341295, - "/" - ], - [ - 0.306047, - "b" - ], - [ - 0.106898, - "o" - ], - [ - 0.233773, - "rg " - ], - [ - 0.519336, - "\r\n" - ], - [ - 0.001408, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 2.000000, - "#" - ], - [ - 0.247104, - " " - ], - [ - 0.218717, - "i" - ], - [ - 0.067769, - "n" - ], - [ - 0.139583, - "s" - ], - [ - 0.092034, - "t" - ], - [ - 0.152729, - "a" - ], - [ - 0.083844, - "l" - ], - [ - 0.145806, - "l" - ], - [ - 0.120879, - "a" - ], - [ - 0.164967, - "t" - ], - [ - 0.065308, - "i" - ], - [ - 0.816983, - "o" - ], - [ - 0.231669, - "n" - ], - [ - 0.185168, - " " - ], - [ - 0.125214, - "d" - ], - [ - 0.112630, - "o" - ], - [ - 0.068650, - "n" - ], - [ - 0.108386, - "e" - ], - [ - 0.563031, - "!" - ], - [ - 2.000000, - "\r\n" - ], - [ - 0.000365, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 0.347093, - "#" - ], - [ - 0.262764, - " " - ], - [ - 0.191568, - "l" - ], - [ - 0.086614, - "e" - ], - [ - 0.110365, - "t" - ], - [ - 0.707057, - "'" - ], - [ - 0.220060, - "s" - ], - [ - 0.181690, - " " - ], - [ - 0.128039, - "c" - ], - [ - 0.176264, - "r" - ], - [ - 0.171208, - "e" - ], - [ - 0.199371, - "a" - ], - [ - 0.161622, - "t" - ], - [ - 0.145989, - "e" - ], - [ - 0.187920, - " " - ], - [ - 0.734653, - "a" - ], - [ - 0.185812, - " " - ], - [ - 0.270851, - "r" - ], - [ - 0.120000, - "e" - ], - [ - 0.161097, - "p" - ], - [ - 0.179813, - "o" - ], - [ - 0.170557, - "s" - ], - [ - 0.145457, - "i" - ], - [ - 0.165200, - "t" - ], - [ - 0.135578, - "o" - ], - [ - 0.130363, - "r" - ], - [ - 0.461631, - "y" - ], - [ - 0.303047, - ":" - ], - [ - 0.955198, - "\r\n" - ], - [ - 0.000300, - "\u001b]0;tw@tux: ~/borg/demo/download\u0007tw@tux:~/borg/demo/download$ " - ], - [ - 0.301237, - "c" - ], - [ - 0.084267, - "d" - ], - [ - 0.155241, - " " - ], - [ - 0.813751, - "." - ], - [ - 0.157147, - "." - ], - [ - 0.573720, - "\r\n" - ], - [ - 0.000508, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.225463, - "b" - ], - [ - 0.274841, - "o" - ], - [ - 0.125292, - "r" - ], - [ - 0.083313, - "g" - ], - [ - 0.088596, - " " - ], - [ - 0.231502, - "i" - ], - [ - 0.062726, - "n" - ], - [ - 0.144877, - "i" - ], - [ - 0.112508, - "t" - ], - [ - 0.313489, - " " - ], - [ - 0.298944, - "r" - ], - [ - 0.216556, - "e" - ], - [ - 0.180484, - "p" - ], - [ - 0.204141, - "o" - ], - [ - 0.682782, - "\r\n" - ], - [ - 0.352828, - "Initializing repository at \"repo\"\r\n" - ], - [ - 0.001407, - "Encryption NOT enabled.\r\nUse the \"--encryption=repokey|keyfile|passphrase\" to enable encryption." - ], - [ - 0.000009, - "\r\n" - ], - [ - 0.008492, - "Synchronizing chunks cache..." - ], - [ - 0.000009, - "\r\n" - ], - [ - 0.000030, - "Archives: 0, w/ cached Idx: 0, w/ outdated Idx: 0, w/o cached Idx: 0." - ], - [ - 0.000004, - "\r\n" - ], - [ - 0.000024, - "Done." - ], - [ - 0.000004, - "\r\n" - ], - [ - 0.027827, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.988184, - "#" - ], - [ - 0.248844, - " " - ], - [ - 0.199486, - "l" - ], - [ - 0.104455, - "e" - ], - [ - 0.127960, - "t" - ], - [ - 0.484976, - "'" - ], - [ - 0.186103, - "s" - ], - [ - 0.151763, - " " - ], - [ - 0.177456, - "c" - ], - [ - 0.178972, - "r" - ], - [ - 0.183533, - "e" - ], - [ - 0.192725, - "a" - ], - [ - 0.146352, - "t" - ], - [ - 0.156199, - "e" - ], - [ - 0.232699, - " " - ], - [ - 0.513490, - "o" - ], - [ - 0.229828, - "u" - ], - [ - 0.104744, - "r" - ], - [ - 0.115068, - " " - ], - [ - 0.201439, - "f" - ], - [ - 0.333315, - "i" - ], - [ - 0.209070, - "r" - ], - [ - 0.259194, - "s" - ], - [ - 0.076346, - "t" - ], - [ - 0.125673, - " " - ], - [ - 0.198575, - "b" - ], - [ - 0.089009, - "a" - ], - [ - 0.238307, - "c" - ], - [ - 0.105568, - "k" - ], - [ - 0.254971, - "u" - ], - [ - 0.318094, - "p" - ], - [ - 0.690770, - ":" - ], - [ - 0.580155, - "\r\n" - ], - [ - 0.000308, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.603046, - "b" - ], - [ - 0.104492, - "o" - ], - [ - 0.148182, - "r" - ], - [ - 0.087024, - "g" - ], - [ - 0.176897, - " " - ], - [ - 0.183168, - "c" - ], - [ - 0.185325, - "r" - ], - [ - 0.183347, - "e" - ], - [ - 0.182868, - "a" - ], - [ - 0.170600, - "t" - ], - [ - 0.137005, - "e" - ], - [ - 0.164357, - " " - ], - [ - 0.427028, - "-" - ], - [ - 0.147791, - "-" - ], - [ - 0.440101, - "s" - ], - [ - 0.177193, - "t" - ], - [ - 0.203817, - "a" - ], - [ - 0.217150, - "t" - ], - [ - 0.229771, - "s" - ], - [ - 0.191220, - " " - ], - [ - 0.269939, - "-" - ], - [ - 0.145163, - "-" - ], - [ - 0.450053, - "p" - ], - [ - 0.165194, - "r" - ], - [ - 0.044264, - "o" - ], - [ - 0.204568, - "g" - ], - [ - 0.104759, - "r" - ], - [ - 0.213137, - "e" - ], - [ - 0.216596, - "s" - ], - [ - 0.163238, - "s" - ], - [ - 0.241084, - " " - ], - [ - 0.300727, - "-" - ], - [ - 0.149156, - "-" - ], - [ - 0.259608, - "c" - ], - [ - 0.120930, - "o" - ], - [ - 0.098838, - "m" - ], - [ - 0.234615, - "p" - ], - [ - 0.084600, - "r" - ], - [ - 0.166072, - "e" - ], - [ - 0.185576, - "s" - ], - [ - 0.159984, - "s" - ], - [ - 0.122793, - "i" - ], - [ - 0.180423, - "o" - ], - [ - 0.196311, - "n" - ], - [ - 0.181682, - " " - ], - [ - 0.242129, - "l" - ], - [ - 0.842020, - "z" - ], - [ - 0.707941, - "4" - ], - [ - 0.180354, - " " - ], - [ - 0.419080, - "r" - ], - [ - 0.189076, - "e" - ], - [ - 0.172527, - "p" - ], - [ - 0.154922, - "o" - ], - [ - 0.728059, - ":" - ], - [ - 0.147089, - ":" - ], - [ - 0.396117, - "b" - ], - [ - 0.090233, - "a" - ], - [ - 0.199537, - "c" - ], - [ - 0.084686, - "k" - ], - [ - 0.278049, - "u \r" - ], - [ - 0.268438, - "p" - ], - [ - 0.491592, - "1" - ], - [ - 0.508588, - " " - ], - [ - 0.174143, - "d" - ], - [ - 0.175430, - "a" - ], - [ - 0.166841, - "t" - ], - [ - 0.127029, - "a" - ], - [ - 0.380593, - "\r\n" - ], - [ - 0.557518, - " 2.68 MB O 1.25 MB C 1.25 MB D data/linux-4.1.8/Doc...ia/v4l/func-read.xml\r" - ], - [ - 0.200102, - " 5.37 MB O 2.46 MB C 2.46 MB D data/linux-4.1.8/Documentation/backlight \r" - ], - [ - 0.200342, - " 6.99 MB O 3.36 MB C 3.36 MB D data/linux-4.1.8/Doc...rm_big_little_dt.txt\r" - ], - [ - 0.200137, - " 7.83 MB O 3.87 MB C 3.87 MB D data/linux-4.1.8/Doc...s/mtd/atmel-nand.txt\r" - ], - [ - 0.200271, - " 8.77 MB O 4.41 MB C 4.41 MB D data/linux-4.1.8/Doc...ngs/soc/fsl/qman.txt\r" - ], - [ - 0.200577, - " 9.99 MB O 5.12 MB C 5.12 MB D data/linux-4.1.8/Doc...ching/cachefiles.txt\r" - ], - [ - 0.200033, - " 12.11 MB O 6.34 MB C 6.33 MB D data/linux-4.1.8/Doc...infiniband/ipoib.txt\r" - ], - [ - 0.200272, - " 15.27 MB O 8.08 MB C 8.08 MB D data/linux-4.1.8/Doc.../networking/team.txt\r" - ], - [ - 0.200072, - " 18.22 MB O 9.72 MB C 9.71 MB D data/linux-4.1.8/Doc...tation/sysctl/vm.txt\r" - ], - [ - 0.202107, - " 21.05 MB O 11.22 MB C 11.21 MB D data/linux-4.1.8/MAINTAINERS \r" - ], - [ - 0.200251, - " 23.04 MB O 12.20 MB C 12.20 MB D data/linux-4.1.8/arc...de/uapi/asm/unistd.h\r" - ], - [ - 0.200450, - " 25.45 MB O 13.17 MB C 13.17 MB D data/linux-4.1.8/arc.../boot/dts/imx23.dtsi\r" - ], - [ - 0.200093, - " 27.65 MB O 14.01 MB C 14.00 MB D data/linux-4.1.8/arc...omap3-overo-tobi.dts\r" - ], - [ - 0.200314, - " 30.26 MB O 14.89 MB C 14.89 MB D data/linux-4.1.8/arc...ot/dts/tps65910.dtsi\r" - ], - [ - 0.200003, - " 31.90 MB O 15.63 MB C 15.63 MB D data/linux-4.1.8/arc...include/asm/probes.h\r" - ], - [ - 0.200493, - " 34.66 MB O 16.84 MB C 16.83 MB D data/linux-4.1.8/arc...i/include/mach/psc.h\r" - ], - [ - 0.200675, - " 36.62 MB O 17.70 MB C 17.70 MB D data/linux-4.1.8/arc...mach-ixp4xx/common.c\r" - ], - [ - 0.200307, - " 38.40 MB O 18.54 MB C 18.53 MB D data/linux-4.1.8/arch/arm/mach-omap2/cm.h \r" - ], - [ - 0.200254, - " 41.29 MB O 19.63 MB C 19.63 MB D data/linux-4.1.8/arch/arm/mach-pxa/idp.c \r" - ], - [ - 0.219493, - " 43.57 MB O 20.67 MB C 20.66 MB D data/linux-4.1.8/arc...bile/clock-r8a7778.c\r" - ], - [ - 0.200451, - " 45.55 MB O 21.59 MB C 21.59 MB D data/linux-4.1.8/arc...m/plat-samsung/adc.c\r" - ], - [ - 0.200370, - " 47.50 MB O 22.51 MB C 22.51 MB D data/linux-4.1.8/arch/arm64/lib/memmove.S \r" - ], - [ - 0.200686, - " 49.21 MB O 23.33 MB C 23.32 MB D data/linux-4.1.8/arc...ckfin/kernel/trace.c\r" - ], - [ - 0.200393, - " 53.22 MB O 24.51 MB C 24.50 MB D data/linux-4.1.8/arch/c6x/include/asm/soc.h\r" - ], - [ - 0.200371, - " 56.19 MB O 25.50 MB C 25.49 MB D data/linux-4.1.8/arc...op/iop_sw_cpu_defs.h\r" - ], - [ - 0.200450, - " 57.84 MB O 26.17 MB C 26.14 MB D data/linux-4.1.8/arc...include/asm/vm_mmu.h\r" - ], - [ - 0.200573, - " 60.21 MB O 27.27 MB C 27.25 MB D data/linux-4.1.8/arch/ia64/kernel/time.c \r" - ], - [ - 0.200222, - " 62.31 MB O 28.18 MB C 28.15 MB D data/linux-4.1.8/arc.../coldfire/sltimers.c\r" - ], - [ - 0.200756, - " 67.09 MB O 29.98 MB C 29.90 MB D data/linux-4.1.8/arc...8k/include/asm/tlb.h\r" - ], - [ - 0.200716, - " 68.75 MB O 30.80 MB C 30.72 MB D data/linux-4.1.8/arc...ude/uapi/asm/fcntl.h\r" - ], - [ - 0.200734, - " 70.69 MB O 31.67 MB C 31.59 MB D data/linux-4.1.8/arc...figs/malta_defconfig\r" - ], - [ - 0.200198, - " 72.12 MB O 32.34 MB C 32.26 MB D data/linux-4.1.8/arc...de/asm/mc146818rtc.h\r" - ], - [ - 0.200446, - " 76.01 MB O 33.45 MB C 33.37 MB D data/linux-4.1.8/arch/mips/jazz/jazzdma.c \r" - ], - [ - 0.200111, - " 78.19 MB O 34.46 MB C 34.38 MB D data/linux-4.1.8/arc...tlogic/common/time.c\r" - ], - [ - 0.200191, - " 79.84 MB O 35.30 MB C 35.21 MB D data/linux-4.1.8/arc...de/uapi/asm/msgbuf.h\r" - ], - [ - 0.200421, - " 81.35 MB O 36.07 MB C 35.99 MB D data/linux-4.1.8/arc...sc/include/asm/rtc.h\r" - ], - [ - 0.200090, - " 83.49 MB O 37.03 MB C 36.95 MB D data/linux-4.1.8/arc...fsl/qoriq-dma-1.dtsi\r" - ], - [ - 0.200331, - " 85.13 MB O 37.80 MB C 37.72 MB D data/linux-4.1.8/arc...pc836x_rdk_defconfig\r" - ], - [ - 0.200114, - " 87.04 MB O 38.71 MB C 38.63 MB D data/linux-4.1.8/arc...ude/uapi/asm/nvram.h\r" - ], - [ - 0.200280, - " 90.24 MB O 40.19 MB C 40.11 MB D data/linux-4.1.8/arc...pc/math-emu/mtfsfi.c\r" - ], - [ - 0.216796, - " 92.68 MB O 41.41 MB C 41.33 MB D data/linux-4.1.8/arc...rms/powermac/nvram.c\r" - ], - [ - 0.200198, - " 95.32 MB O 42.60 MB C 42.52 MB D data/linux-4.1.8/arc...nclude/asm/pgtable.h\r" - ], - [ - 0.200304, - " 97.31 MB O 43.50 MB C 43.42 MB D data/linux-4.1.8/arc...mach-dreamcast/irq.c\r" - ], - [ - 0.200328, - " 99.46 MB O 44.41 MB C 44.33 MB D data/linux-4.1.8/arc...artner-jet-setup.txt\r" - ], - [ - 0.200102, - "101.28 MB O 45.25 MB C 45.16 MB D data/linux-4.1.8/arc...rc/include/asm/ecc.h\r" - ], - [ - 0.200253, - "103.53 MB O 46.27 MB C 46.19 MB D data/linux-4.1.8/arc.../kernel/una_asm_64.S\r" - ], - [ - 0.200503, - "105.76 MB O 47.32 MB C 47.23 MB D data/linux-4.1.8/arch/tile/kernel/reboot.c \r" - ], - [ - 0.201177, - "107.64 MB O 48.27 MB C 48.18 MB D data/linux-4.1.8/arc...t/compressed/eboot.c\r" - ], - [ - 0.200192, - "109.82 MB O 49.22 MB C 49.13 MB D data/linux-4.1.8/arc...clude/asm/spinlock.h\r" - ], - [ - 0.200851, - "112.71 MB O 50.56 MB C 50.48 MB D data/linux-4.1.8/arch/x86/kernel/ptrace.c \r" - ], - [ - 0.200195, - "115.71 MB O 51.96 MB C 51.87 MB D data/linux-4.1.8/arc...s/platform_emc1403.c\r" - ], - [ - 0.200306, - "117.28 MB O 52.79 MB C 52.70 MB D data/linux-4.1.8/arc...nclude/variant/tie.h\r" - ], - [ - 0.204475, - "122.68 MB O 55.35 MB C 55.26 MB D data/linux-4.1.8/fir...x-e1-6.2.9.0.fw.ihex\r" - ], - [ - 0.199974, - "127.39 MB O 58.15 MB C 57.97 MB D data/linux-4.1.8/fs/afs/fsclient.c \r" - ], - [ - 0.201254, - "132.58 MB O 60.42 MB C 60.24 MB D data/linux-4.1.8/fs/cifs/cifssmb.c \r" - ], - [ - 0.216710, - "136.76 MB O 62.28 MB C 62.10 MB D data/linux-4.1.8/fs/ext4/inline.c \r" - ], - [ - 0.200891, - "140.78 MB O 64.15 MB C 63.97 MB D data/linux-4.1.8/fs/jbd2/commit.c \r" - ], - [ - 0.199883, - "144.88 MB O 65.98 MB C 65.80 MB D data/linux-4.1.8/fs/nfs/objlayout \r" - ], - [ - 0.201488, - "150.31 MB O 67.96 MB C 67.78 MB D data/linux-4.1.8/fs/...fy/dnotify/dnotify.c\r" - ], - [ - 0.205472, - "154.72 MB O 69.97 MB C 69.79 MB D data/linux-4.1.8/fs/quota/dquot.c \r" - ], - [ - 0.200493, - "159.06 MB O 71.91 MB C 71.73 MB D data/linux-4.1.8/fs/...xfs/xfs_inode_fork.h\r" - ], - [ - 0.200000, - "161.54 MB O 73.09 MB C 72.91 MB D data/linux-4.1.8/inc.../crypto/public_key.h\r" - ], - [ - 0.205041, - "164.32 MB O 74.28 MB C 74.09 MB D data/linux-4.1.8/inc...inux/cgroup_subsys.h\r" - ], - [ - 0.200371, - "166.33 MB O 75.23 MB C 75.05 MB D data/linux-4.1.8/include/linux/if_team.h \r" - ], - [ - 0.200340, - "168.82 MB O 76.24 MB C 76.06 MB D data/linux-4.1.8/inc.../mfd/pcf50633/gpio.h\r" - ], - [ - 0.200162, - "171.65 MB O 77.36 MB C 77.17 MB D data/linux-4.1.8/include/linux/phy.h \r" - ], - [ - 0.200385, - "172.84 MB O 77.97 MB C 77.79 MB D data/linux-4.1.8/include/linux/scc.h \r" - ], - [ - 0.200918, - "174.87 MB O 78.94 MB C 78.76 MB D data/linux-4.1.8/include/linux/wait.h \r" - ], - [ - 0.200117, - "177.06 MB O 80.01 MB C 79.83 MB D data/linux-4.1.8/inc...er/nfnetlink_queue.h\r" - ], - [ - 0.200254, - "179.53 MB O 81.13 MB C 80.95 MB D data/linux-4.1.8/inc...e/events/intel-sst.h\r" - ], - [ - 0.200176, - "181.40 MB O 82.05 MB C 81.86 MB D data/linux-4.1.8/include/uapi/linux/mpls.h \r" - ], - [ - 0.200438, - "183.11 MB O 82.88 MB C 82.70 MB D data/linux-4.1.8/inc...api/scsi/fc/fc_els.h\r" - ], - [ - 0.200226, - "186.12 MB O 84.31 MB C 84.12 MB D data/linux-4.1.8/kernel/jump_label.c \r" - ], - [ - 0.200138, - "190.76 MB O 86.46 MB C 86.28 MB D data/linux-4.1.8/lib/Kconfig.debug \r" - ], - [ - 0.200958, - "194.21 MB O 87.82 MB C 87.64 MB D data/linux-4.1.8/mm/memblock.c \r" - ], - [ - 0.200544, - "198.19 MB O 89.69 MB C 89.51 MB D data/linux-4.1.8/net/bluetooth/ecc.c \r" - ], - [ - 0.200232, - "202.28 MB O 91.52 MB C 91.34 MB D data/linux-4.1.8/net/hsr/hsr_slave.c \r" - ], - [ - 0.200153, - "206.23 MB O 93.40 MB C 93.22 MB D data/linux-4.1.8/net/ipx/af_ipx.c \r" - ], - [ - 0.200526, - "210.30 MB O 95.08 MB C 94.89 MB D data/linux-4.1.8/net...ter/ipvs/ip_vs_ftp.c\r" - ], - [ - 0.200433, - "213.29 MB O 96.37 MB C 96.19 MB D data/linux-4.1.8/net/phonet/af_phonet.c \r" - ], - [ - 0.200669, - "217.21 MB O 98.21 MB C 98.03 MB D data/linux-4.1.8/net.../svc_rdma_recvfrom.c\r" - ], - [ - 0.200014, - "220.20 MB O 99.53 MB C 99.35 MB D data/linux-4.1.8/scr...e/free/iounmap.cocci\r" - ], - [ - 0.200446, - "222.94 MB O 100.82 MB C 100.64 MB D data/linux-4.1.8/security/selinux/Makefile \r" - ], - [ - 0.214711, - "226.41 MB O 102.23 MB C 102.05 MB D data/linux-4.1.8/sou...seq/seq_midi_event.c\r" - ], - [ - 0.202631, - "228.96 MB O 103.31 MB C 103.13 MB D data/linux-4.1.8/sound/mips/ad1843.c \r" - ], - [ - 0.200095, - "232.28 MB O 104.65 MB C 104.47 MB D data/linux-4.1.8/sound/pci/ctxfi/Makefile \r" - ], - [ - 0.200726, - "236.33 MB O 106.24 MB C 106.06 MB D data/linux-4.1.8/sound/pci/nm256/Makefile \r" - ], - [ - 0.199902, - "239.73 MB O 107.58 MB C 107.40 MB D data/linux-4.1.8/sou.../codecs/cs4271-spi.c\r" - ], - [ - 0.200592, - "244.29 MB O 109.08 MB C 108.90 MB D data/linux-4.1.8/sound/soc/codecs/wm8940.c \r" - ], - [ - 0.200357, - "247.98 MB O 110.35 MB C 110.17 MB D data/linux-4.1.8/sou...oc/omap/omap-mcpdm.c\r" - ], - [ - 0.200901, - "250.64 MB O 111.50 MB C 111.32 MB D data/linux-4.1.8/sound/usb/mixer_scarlett.c\r" - ], - [ - 0.200535, - "252.14 MB O 112.20 MB C 112.01 MB D data/linux-4.1.8/tools/perf/builtin-kvm.c \r" - ], - [ - 0.200239, - "254.11 MB O 113.07 MB C 112.88 MB D data/linux-4.1.8/tools/perf/util/record.c \r" - ], - [ - 0.200233, - "255.70 MB O 113.82 MB C 113.64 MB D data/linux-4.1.8/too...re/bin/configinit.sh\r" - ], - [ - 0.395702, - " \r------------------------------------------------------------------------------\r\nArchive name: backup1\r\nArchive fingerprint: b3104802be9faa610f281619c69e4d3e672df2ce97528a35d83f15080d02ed86\r\nStart time: Sat Oct 24 22:27:24 2015\r\nEnd time: Sat Oct 24 22:27:43 2015\r\nDuration: 19.32 seconds\r\nNumber of files: 31557\r\n\r\n Original size Compressed size Deduplicated size\r\nThis archive: 257.06 MB 114.44 MB 114.26 MB\r\nAll archives: 257.06 MB 114.44 MB 114.26 MB\r\n\r\n Unique chunks Total chunks\r\nChunk index: 33731 34030\r\n------------------------------------------------------------------------------\r\n" - ], - [ - 0.046138, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 1.000000, - "#" - ], - [ - 0.564664, - " " - ], - [ - 0.313339, - "c" - ], - [ - 0.492152, - "h" - ], - [ - 0.479518, - "a" - ], - [ - 0.536708, - "n" - ], - [ - 0.134006, - "g" - ], - [ - 0.147326, - "e" - ], - [ - 0.068957, - " " - ], - [ - 0.179678, - "s" - ], - [ - 0.096249, - "o" - ], - [ - 0.081003, - "m" - ], - [ - 0.124342, - "e" - ], - [ - 0.117830, - " " - ], - [ - 0.138019, - "d" - ], - [ - 0.137898, - "a" - ], - [ - 0.199628, - "t" - ], - [ - 0.104935, - "a" - ], - [ - 0.150868, - " " - ], - [ - 0.144877, - "s" - ], - [ - 0.126816, - "l" - ], - [ - 0.178466, - "i" - ], - [ - 0.113395, - "g" - ], - [ - 0.101022, - "h" - ], - [ - 0.102395, - "t" - ], - [ - 0.311498, - "l" - ], - [ - 0.366608, - "y" - ], - [ - 0.657991, - ":" - ], - [ - 0.423140, - "\r\n" - ], - [ - 0.000708, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 2.000000, - "e" - ], - [ - 0.000021, - "c" - ], - [ - 0.000024, - "h" - ], - [ - 0.000029, - "o" - ], - [ - 0.000018, - " " - ], - [ - 0.000025, - "\"" - ], - [ - 0.000025, - "s" - ], - [ - 0.000026, - "o" - ], - [ - 0.000070, - "me " - ], - [ - 0.000022, - "m" - ], - [ - 0.000028, - "o" - ], - [ - 0.000027, - "r" - ], - [ - 0.000026, - "e" - ], - [ - 0.000029, - " " - ], - [ - 0.000028, - "d" - ], - [ - 0.000028, - "a" - ], - [ - 0.000028, - "t" - ], - [ - 0.000026, - "a" - ], - [ - 0.000033, - "\"" - ], - [ - 0.000028, - " " - ], - [ - 0.000059, - "\u003e" - ], - [ - 0.000045, - " " - ], - [ - 0.000020, - "d" - ], - [ - 0.000040, - "a" - ], - [ - 0.000035, - "t" - ], - [ - 0.000039, - "a" - ], - [ - 0.000034, - "/" - ], - [ - 0.000034, - "o" - ], - [ - 0.000034, - "n" - ], - [ - 0.000037, - "e" - ], - [ - 0.000036, - "_" - ], - [ - 0.000037, - "f" - ], - [ - 0.000037, - "i" - ], - [ - 0.000717, - "le_more\r\n" - ], - [ - 0.000181, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 2.000000, - "#" - ], - [ - 0.266289, - " " - ], - [ - 0.194686, - "n" - ], - [ - 0.157296, - "o" - ], - [ - 0.084026, - "w" - ], - [ - 0.092729, - " " - ], - [ - 0.148154, - "c" - ], - [ - 0.169136, - "r" - ], - [ - 0.214327, - "e" - ], - [ - 0.180678, - "a" - ], - [ - 0.161652, - "t" - ], - [ - 0.128260, - "e" - ], - [ - 0.158131, - " " - ], - [ - 0.118838, - "a" - ], - [ - 0.120885, - " " - ], - [ - 0.797511, - "s" - ], - [ - 0.200585, - "e" - ], - [ - 0.171811, - "c" - ], - [ - 0.106721, - "o" - ], - [ - 0.153298, - "n" - ], - [ - 0.052244, - "d" - ], - [ - 0.149675, - " " - ], - [ - 0.183517, - "b" - ], - [ - 0.076768, - "a" - ], - [ - 0.189428, - "c" - ], - [ - 0.088431, - "k" - ], - [ - 0.229617, - "u" - ], - [ - 0.272021, - "p" - ], - [ - 0.965855, - ":" - ], - [ - 0.674517, - "\r\n" - ], - [ - 0.000322, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.946131, - "b" - ], - [ - 0.111159, - "o" - ], - [ - 0.094622, - "r" - ], - [ - 0.085288, - "g" - ], - [ - 0.165429, - " " - ], - [ - 0.936087, - "c" - ], - [ - 0.192608, - "r" - ], - [ - 0.187511, - "e" - ], - [ - 0.173135, - "a" - ], - [ - 0.179441, - "t" - ], - [ - 0.125923, - "e" - ], - [ - 0.164920, - " " - ], - [ - 0.737259, - "-" - ], - [ - 0.185417, - "-" - ], - [ - 0.233405, - "s" - ], - [ - 0.152945, - "t" - ], - [ - 0.181548, - "a" - ], - [ - 0.330237, - "t" - ], - [ - 0.735524, - "s" - ], - [ - 0.179019, - " " - ], - [ - 0.245324, - "-" - ], - [ - 0.142362, - "-" - ], - [ - 0.233989, - "p" - ], - [ - 0.153782, - "r" - ], - [ - 0.064431, - "o" - ], - [ - 0.104827, - "g" - ], - [ - 0.090533, - "r" - ], - [ - 0.168129, - "e" - ], - [ - 0.206325, - "s" - ], - [ - 0.157551, - "s" - ], - [ - 0.383630, - " " - ], - [ - 0.759364, - "r" - ], - [ - 0.199262, - "e" - ], - [ - 0.139781, - "p" - ], - [ - 0.151367, - "o" - ], - [ - 0.720350, - ":" - ], - [ - 0.144801, - ":" - ], - [ - 0.532566, - "b" - ], - [ - 0.226514, - "a" - ], - [ - 0.209449, - "c" - ], - [ - 0.142062, - "k" - ], - [ - 0.300090, - "u" - ], - [ - 0.262794, - "p" - ], - [ - 0.218785, - "2" - ], - [ - 0.249599, - " " - ], - [ - 0.187125, - "d" - ], - [ - 0.157741, - "a" - ], - [ - 0.175739, - "t" - ], - [ - 0.139896, - "a" - ], - [ - 0.795560, - "\r\n" - ], - [ - 0.571808, - " 6.50 MB O 3.09 MB C 0 B D data/linux-4.1.8/Doc...ngs/arm/armadeus.txt\r" - ], - [ - 0.200103, - " 11.82 MB O 6.17 MB C 0 B D data/linux-4.1.8/Documentation/hwmon/w83795\r" - ], - [ - 0.200121, - " 27.38 MB O 13.89 MB C 0 B D data/linux-4.1.8/arc...ot/dts/nspire-cx.dts\r" - ], - [ - 0.200110, - " 39.92 MB O 19.04 MB C 0 B D data/linux-4.1.8/arc...omap2/opp2430_data.c\r" - ], - [ - 0.200088, - " 52.28 MB O 24.23 MB C 0 B D data/linux-4.1.8/arc...fin/mach-bf561/smp.c\r" - ], - [ - 0.200078, - " 67.02 MB O 29.94 MB C 0 B D data/linux-4.1.8/arc...8k/include/asm/pci.h\r" - ], - [ - 0.200116, - " 78.29 MB O 34.52 MB C 0 B D data/linux-4.1.8/arc...etlogic/xlr/wakeup.c\r" - ], - [ - 0.200081, - " 90.07 MB O 40.11 MB C 0 B D data/linux-4.1.8/arc...eature-fixups-test.S\r" - ], - [ - 0.200092, - "101.15 MB O 45.19 MB C 0 B D data/linux-4.1.8/arc...rc/crypto/sha1_asm.S\r" - ], - [ - 0.200078, - "115.05 MB O 51.63 MB C 0 B D data/linux-4.1.8/arc...6/mm/kasan_init_64.c\r" - ], - [ - 0.200062, - "147.39 MB O 66.98 MB C 0 B D data/linux-4.1.8/fs/nls/nls_cp864.c \r" - ], - [ - 0.200117, - "169.16 MB O 76.38 MB C 0 B D data/linux-4.1.8/inc.../mfd/twl4030-audio.h\r" - ], - [ - 0.200074, - "181.43 MB O 82.06 MB C 0 B D data/linux-4.1.8/include/uapi/linux/mtio.h \r" - ], - [ - 0.200131, - "209.10 MB O 94.58 MB C 0 B D data/linux-4.1.8/net/mac80211/scan.c \r" - ], - [ - 0.200079, - "234.87 MB O 105.68 MB C 0 B D data/linux-4.1.8/sou...i/hda/patch_si3054.c\r" - ], - [ - 0.200110, - "255.66 MB O 113.80 MB C 0 B D data/linux-4.1.8/too...ves/asm/asm-compat.h\r" - ], - [ - 0.201350, - " \r------------------------------------------------------------------------------\r\nArchive name: backup2\r\nArchive fingerprint: 5737afe8ad2cda7667973b7f2e1d83f097ef3117b5753a38ba7664b616fbdc5a\r\nStart time: Sat Oct 24 22:28:24 2015\r\nEnd time: Sat Oct 24 22:28:27 2015\r\nDuration: 3.41 seconds\r\nNumber of files: 31557\r\n" - ], - [ - 0.001858, - "\r\n Original size Compressed size Deduplicated size\r\nThis archive: 257.06 MB 114.47 MB 45.19 kB\r\nAll archives: 514.12 MB 228.92 MB 114.31 MB\r\n\r\n Unique chunks Total chunks\r\nChunk index: 33733 68060\r\n------------------------------------------------------------------------------\r\n" - ], - [ - 0.033369, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 1.013315, - "#" - ], - [ - 0.482095, - " " - ], - [ - 0.319571, - "w" - ], - [ - 0.140740, - "o" - ], - [ - 0.090380, - "w" - ], - [ - 0.304400, - "," - ], - [ - 0.137310, - " " - ], - [ - 0.662280, - "t" - ], - [ - 0.162678, - "h" - ], - [ - 0.114083, - "a" - ], - [ - 0.077660, - "t" - ], - [ - 0.120839, - " " - ], - [ - 0.207626, - "w" - ], - [ - 0.195480, - "a" - ], - [ - 0.060188, - "s" - ], - [ - 0.149129, - " " - ], - [ - 0.094522, - "a" - ], - [ - 0.098801, - " " - ], - [ - 0.266235, - "l" - ], - [ - 0.184774, - "o" - ], - [ - 0.255040, - "t" - ], - [ - 0.170498, - " " - ], - [ - 0.201599, - "f" - ], - [ - 0.189774, - "a" - ], - [ - 0.229140, - "s" - ], - [ - 0.275243, - "t" - ], - [ - 0.177347, - "e" - ], - [ - 0.090806, - "r" - ], - [ - 0.204494, - "!" - ], - [ - 0.479851, - "\r\n" - ], - [ - 0.000316, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.734961, - "#" - ], - [ - 0.300000, - " " - ], - [ - 0.300000, - "n" - ], - [ - 0.199297, - "o" - ], - [ - 0.148047, - "t" - ], - [ - 0.071101, - "i" - ], - [ - 0.185554, - "c" - ], - [ - 0.395933, - "e" - ], - [ - 0.180285, - " " - ], - [ - 0.199321, - "t" - ], - [ - 0.094767, - "h" - ], - [ - 0.166966, - "a" - ], - [ - 0.102814, - "t" - ], - [ - 0.415016, - " " - ], - [ - 0.286089, - "\"" - ], - [ - 0.795323, - "D" - ], - [ - 0.180152, - "e" - ], - [ - 0.311214, - "d" - ], - [ - 0.214812, - "u" - ], - [ - 0.251616, - "p" - ], - [ - 0.203533, - "l" - ], - [ - 0.187084, - "i" - ], - [ - 0.124066, - "c" - ], - [ - 0.158062, - "a" - ], - [ - 0.260540, - "t" - ], - [ - 0.136405, - "e" - ], - [ - 0.278039, - "d" - ], - [ - 0.323148, - " " - ], - [ - 0.172337, - "s" - ], - [ - 0.074541, - "i" - ], - [ - 0.269245, - "z" - ], - [ - 0.123599, - "e" - ], - [ - 0.533647, - "\"" - ], - [ - 0.234738, - " " - ], - [ - 0.150720, - "f" - ], - [ - 0.144329, - "o" - ], - [ - 0.086533, - "r" - ], - [ - 0.159717, - " " - ], - [ - 0.274291, - "\"" - ], - [ - 0.471163, - "T" - ], - [ - 0.162135, - "h" - ], - [ - 0.233501, - "i" - ], - [ - 0.134923, - "s" - ], - [ - 0.190779, - " " - ], - [ - 0.307322, - "a" - ], - [ - 0.153882, - "r" - ], - [ - 0.246471, - "c" - ], - [ - 0.110018, - "h" - ], - [ - 0.259798, - "i" - ], - [ - 0.132853, - "v" - ], - [ - 0.171373, - "e" - ], - [ - 0.560405, - "\"" - ], - [ - 0.609162, - "!" - ], - [ - 0.559020, - "\r\n" - ], - [ - 0.000296, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.941369, - "#" - ], - [ - 0.254237, - " " - ], - [ - 0.176998, - "b" - ], - [ - 0.124943, - "o" - ], - [ - 0.097140, - "r" - ], - [ - 0.057513, - "g" - ], - [ - 0.232990, - " " - ], - [ - 0.549539, - "r" - ], - [ - 0.112992, - "e" - ], - [ - 0.240733, - "c" - ], - [ - 0.164146, - "o" - ], - [ - 0.209755, - "g" - ], - [ - 0.145638, - "n" - ], - [ - 0.151826, - "i" - ], - [ - 0.471625, - "z" - ], - [ - 0.759625, - "e" - ], - [ - 0.229566, - "d" - ], - [ - 0.254596, - " " - ], - [ - 0.209452, - "t" - ], - [ - 0.088606, - "h" - ], - [ - 0.155981, - "a" - ], - [ - 0.086797, - "t" - ], - [ - 0.098574, - " " - ], - [ - 0.243290, - "m" - ], - [ - 0.120288, - "o" - ], - [ - 0.092890, - "s" - ], - [ - 0.058823, - "t" - ], - [ - 0.125344, - " " - ], - [ - 0.211464, - "f" - ], - [ - 0.086483, - "i" - ], - [ - 0.213685, - "l" - ], - [ - 0.096764, - "e" - ], - [ - 0.176075, - "s" - ], - [ - 0.122962, - " " - ], - [ - 0.174342, - "d" - ], - [ - 0.103474, - "i" - ], - [ - 0.089744, - "d" - ], - [ - 0.181539, - " " - ], - [ - 0.461771, - "n" - ], - [ - 0.219395, - "o" - ], - [ - 0.095042, - "t" - ], - [ - 0.119662, - " " - ], - [ - 0.156060, - "c" - ], - [ - 0.116988, - "h" - ], - [ - 0.118775, - "a" - ], - [ - 0.126173, - "n" - ], - [ - 0.118518, - "g" - ], - [ - 0.109977, - "e" - ], - [ - 0.167095, - " " - ], - [ - 0.208137, - "a" - ], - [ - 0.155464, - "n" - ], - [ - 0.074939, - "d" - ], - [ - 0.616534, - "\r\n" - ], - [ - 0.000405, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.836535, - "#" - ], - [ - 0.248630, - " " - ], - [ - 0.211525, - "d" - ], - [ - 0.171252, - "e" - ], - [ - 0.244098, - "d" - ], - [ - 0.121718, - "u" - ], - [ - 0.219002, - "p" - ], - [ - 0.197839, - "l" - ], - [ - 0.161081, - "i" - ], - [ - 0.112763, - "c" - ], - [ - 0.154565, - "a" - ], - [ - 0.230427, - "t" - ], - [ - 0.180004, - "e" - ], - [ - 0.182279, - "d" - ], - [ - 0.201281, - " " - ], - [ - 0.202485, - "t" - ], - [ - 0.078397, - "h" - ], - [ - 0.178577, - "e" - ], - [ - 0.150264, - "m" - ], - [ - 0.482274, - "." - ], - [ - 0.300000, - "\r\n" - ], - [ - 0.000265, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.287124, - "#" - ], - [ - 0.314208, - " " - ], - [ - 0.219731, - "n" - ], - [ - 0.176210, - "o" - ], - [ - 0.108529, - "w" - ], - [ - 0.224056, - "," - ], - [ - 0.210976, - " " - ], - [ - 0.190508, - "l" - ], - [ - 0.098452, - "e" - ], - [ - 0.101431, - "t" - ], - [ - 0.855722, - "'" - ], - [ - 0.220403, - "s" - ], - [ - 0.229447, - " " - ], - [ - 0.134839, - "e" - ], - [ - 0.241915, - "x" - ], - [ - 0.217004, - "t" - ], - [ - 0.183774, - "r" - ], - [ - 0.231721, - "a" - ], - [ - 0.221361, - "c" - ], - [ - 0.436221, - "t" - ], - [ - 0.097256, - " " - ], - [ - 0.163933, - "a" - ], - [ - 0.099964, - " " - ], - [ - 0.216806, - "b" - ], - [ - 0.086493, - "a" - ], - [ - 0.211732, - "c" - ], - [ - 0.139016, - "k" - ], - [ - 0.379423, - "u" - ], - [ - 0.250049, - "p" - ], - [ - 0.717916, - ":" - ], - [ - 0.307136, - "\r\n" - ], - [ - 0.000301, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.967533, - "m" - ], - [ - 0.103707, - "v" - ], - [ - 0.407957, - " " - ], - [ - 0.177312, - "d" - ], - [ - 0.166158, - "a" - ], - [ - 0.242593, - "t" - ], - [ - 0.090471, - "a" - ], - [ - 0.699594, - " " - ], - [ - 0.273219, - "d" - ], - [ - 0.170371, - "a" - ], - [ - 0.169331, - "t" - ], - [ - 0.126739, - "a" - ], - [ - 0.288488, - "." - ], - [ - 0.305856, - "o" - ], - [ - 0.135252, - "r" - ], - [ - 0.152717, - "i" - ], - [ - 0.090343, - "g" - ], - [ - 0.312536, - "\r\n" - ], - [ - 0.002579, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.944876, - "b" - ], - [ - 0.149049, - "o" - ], - [ - 0.114834, - "r" - ], - [ - 0.074682, - "g" - ], - [ - 0.129000, - " " - ], - [ - 0.129618, - "e" - ], - [ - 0.261479, - "x" - ], - [ - 0.203937, - "t" - ], - [ - 0.196213, - "r" - ], - [ - 0.193561, - "a" - ], - [ - 0.215314, - "c" - ], - [ - 0.236817, - "t" - ], - [ - 0.188232, - " " - ], - [ - 0.177286, - "r" - ], - [ - 0.200598, - "e" - ], - [ - 0.105866, - "p" - ], - [ - 0.173864, - "o" - ], - [ - 0.388954, - ":" - ], - [ - 0.144865, - ":" - ], - [ - 0.347420, - "b" - ], - [ - 0.105814, - "a" - ], - [ - 0.198728, - "c" - ], - [ - 0.096349, - "k" - ], - [ - 0.261559, - "u" - ], - [ - 0.241998, - "p" - ], - [ - 0.240033, - "2" - ], - [ - 0.981903, - "\r\n" - ], - [ - 2.000000, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.937291, - "#" - ], - [ - 0.994897, - " " - ], - [ - 0.151752, - "c" - ], - [ - 0.096956, - "h" - ], - [ - 0.337975, - "e" - ], - [ - 0.207037, - "c" - ], - [ - 0.177028, - "k" - ], - [ - 0.740370, - " " - ], - [ - 0.330206, - "i" - ], - [ - 0.177976, - "f" - ], - [ - 0.218757, - " " - ], - [ - 0.329345, - "o" - ], - [ - 0.098735, - "r" - ], - [ - 0.098576, - "i" - ], - [ - 0.103157, - "g" - ], - [ - 0.107275, - "i" - ], - [ - 0.117332, - "n" - ], - [ - 0.194072, - "a" - ], - [ - 0.211456, - "l" - ], - [ - 0.197712, - " " - ], - [ - 0.189172, - "d" - ], - [ - 0.163930, - "a" - ], - [ - 0.188334, - "t" - ], - [ - 0.165129, - "a" - ], - [ - 0.220652, - " " - ], - [ - 0.224411, - "a" - ], - [ - 0.136137, - "n" - ], - [ - 0.155260, - "d" - ], - [ - 0.074238, - " " - ], - [ - 0.104154, - "r" - ], - [ - 0.690499, - "e" - ], - [ - 0.193678, - "s" - ], - [ - 0.165163, - "t" - ], - [ - 0.165594, - "o" - ], - [ - 0.111779, - "r" - ], - [ - 0.135625, - "e" - ], - [ - 0.202851, - "d" - ], - [ - 0.096040, - " " - ], - [ - 0.165090, - "d" - ], - [ - 0.155594, - "a" - ], - [ - 0.220606, - "t" - ], - [ - 0.163143, - "a" - ], - [ - 0.174099, - " " - ], - [ - 0.209780, - "d" - ], - [ - 0.166062, - "i" - ], - [ - 0.084688, - "f" - ], - [ - 0.140851, - "f" - ], - [ - 0.204458, - "e" - ], - [ - 0.088661, - "r" - ], - [ - 0.334162, - "s" - ], - [ - 0.904233, - ":" - ], - [ - 0.590489, - "\r\n" - ], - [ - 0.000283, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.503183, - "d" - ], - [ - 0.082614, - "i" - ], - [ - 0.216272, - "f" - ], - [ - 0.123813, - "f" - ], - [ - 0.183603, - " " - ], - [ - 0.302144, - "-" - ], - [ - 0.150946, - "r" - ], - [ - 0.152436, - " " - ], - [ - 2.000000, - "d" - ], - [ - 0.196047, - "a" - ], - [ - 0.206372, - "t" - ], - [ - 0.146051, - "a" - ], - [ - 0.326306, - "." - ], - [ - 0.363408, - "o" - ], - [ - 0.269988, - "rig/" - ], - [ - 0.776581, - " " - ], - [ - 0.137720, - "d" - ], - [ - 0.156080, - "a" - ], - [ - 0.242275, - "\u0007" - ], - [ - 0.000020, - "ta" - ], - [ - 0.872887, - "/" - ], - [ - 0.273993, - "\r\n" - ], - [ - 2.000000, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.488581, - "#" - ], - [ - 0.234021, - " " - ], - [ - 0.380938, - "n" - ], - [ - 0.240685, - "o" - ], - [ - 0.204436, - " " - ], - [ - 0.390794, - "o" - ], - [ - 0.225563, - "u" - ], - [ - 0.167295, - "t" - ], - [ - 0.140625, - "p" - ], - [ - 0.183668, - "u" - ], - [ - 0.106161, - "t" - ], - [ - 0.132063, - " " - ], - [ - 0.204757, - "m" - ], - [ - 0.082693, - "e" - ], - [ - 0.216428, - "a" - ], - [ - 0.121584, - "n" - ], - [ - 0.127398, - "s" - ], - [ - 0.264644, - " " - ], - [ - 0.201524, - "i" - ], - [ - 0.110738, - "t" - ], - [ - 0.120653, - " " - ], - [ - 0.311187, - "d" - ], - [ - 0.119826, - "o" - ], - [ - 0.082654, - "e" - ], - [ - 0.182518, - "s" - ], - [ - 0.096372, - " " - ], - [ - 0.192821, - "n" - ], - [ - 0.193829, - "o" - ], - [ - 0.065739, - "t" - ], - [ - 0.678808, - "." - ], - [ - 0.246797, - " " - ], - [ - 0.520369, - "f" - ], - [ - 0.058288, - "i" - ], - [ - 0.064783, - "n" - ], - [ - 0.104851, - "e" - ], - [ - 0.292910, - "." - ], - [ - 0.174086, - " " - ], - [ - 0.226556, - ":" - ], - [ - 0.249808, - ")" - ], - [ - 2.000000, - "\r\n" - ], - [ - 0.000261, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.477759, - "#" - ], - [ - 0.223699, - " " - ], - [ - 0.237979, - "l" - ], - [ - 0.152769, - "i" - ], - [ - 0.135150, - "s" - ], - [ - 0.068576, - "t" - ], - [ - 0.100516, - "i" - ], - [ - 0.078648, - "n" - ], - [ - 0.099435, - "g" - ], - [ - 0.157388, - " " - ], - [ - 0.115327, - "t" - ], - [ - 0.133738, - "h" - ], - [ - 0.135662, - "e" - ], - [ - 0.100677, - " " - ], - [ - 0.180392, - "r" - ], - [ - 0.190922, - "e" - ], - [ - 0.093920, - "p" - ], - [ - 0.173588, - "o" - ], - [ - 0.193023, - " " - ], - [ - 0.206907, - "c" - ], - [ - 0.106376, - "o" - ], - [ - 0.175291, - "n" - ], - [ - 0.080726, - "t" - ], - [ - 0.179258, - "e" - ], - [ - 0.101491, - "n" - ], - [ - 0.096807, - "t" - ], - [ - 0.211455, - "s" - ], - [ - 0.508210, - ":" - ], - [ - 0.373837, - "\r\n" - ], - [ - 0.000249, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.559782, - "b" - ], - [ - 0.116587, - "o" - ], - [ - 0.139513, - "r" - ], - [ - 0.072751, - "g" - ], - [ - 0.103968, - " " - ], - [ - 0.984928, - "l" - ], - [ - 0.173603, - "i" - ], - [ - 0.112444, - "s" - ], - [ - 0.066704, - "t" - ], - [ - 0.114771, - " " - ], - [ - 0.263745, - "r" - ], - [ - 0.113121, - "e" - ], - [ - 0.126283, - "p" - ], - [ - 0.187453, - "o" - ], - [ - 0.409044, - "\r\n" - ], - [ - 0.360675, - "backup1 Sat Oct 24 22:27:43 2015" - ], - [ - 0.000011, - "\r\n" - ], - [ - 0.000006, - "backup2 Sat Oct 24 22:28:27 2015" - ], - [ - 0.000005, - "\r\n" - ], - [ - 0.027766, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.637813, - "#" - ], - [ - 0.257629, - " " - ], - [ - 0.231710, - "l" - ], - [ - 0.213387, - "i" - ], - [ - 0.132149, - "s" - ], - [ - 0.244957, - "t" - ], - [ - 0.180264, - "i" - ], - [ - 0.082882, - "n" - ], - [ - 0.142810, - "g" - ], - [ - 0.134815, - " " - ], - [ - 0.167455, - "s" - ], - [ - 0.114155, - "o" - ], - [ - 0.106847, - "m" - ], - [ - 0.070629, - "e" - ], - [ - 0.507340, - " " - ], - [ - 0.234237, - "b" - ], - [ - 0.070181, - "a" - ], - [ - 0.220534, - "c" - ], - [ - 0.092316, - "k" - ], - [ - 0.257003, - "u" - ], - [ - 0.233598, - "p" - ], - [ - 0.201484, - " " - ], - [ - 0.124810, - "a" - ], - [ - 0.084732, - "r" - ], - [ - 0.249719, - "c" - ], - [ - 0.119605, - "h" - ], - [ - 0.203875, - "i" - ], - [ - 0.076269, - "v" - ], - [ - 0.174299, - "e" - ], - [ - 0.109711, - " " - ], - [ - 0.238294, - "c" - ], - [ - 0.102351, - "o" - ], - [ - 0.155761, - "n" - ], - [ - 0.060278, - "t" - ], - [ - 0.179564, - "e" - ], - [ - 0.112342, - "n" - ], - [ - 0.078100, - "t" - ], - [ - 0.190203, - "s" - ], - [ - 0.865560, - " " - ], - [ - 0.297799, - "(" - ], - [ - 0.225741, - "s" - ], - [ - 0.080329, - "h" - ], - [ - 0.233668, - "o" - ], - [ - 0.127773, - "r" - ], - [ - 0.190065, - "t" - ], - [ - 0.187679, - "e" - ], - [ - 0.147219, - "n" - ], - [ - 0.064472, - "e" - ], - [ - 0.188512, - "d" - ], - [ - 0.459222, - ")" - ], - [ - 0.723165, - ":" - ], - [ - 0.645995, - "\r\n" - ], - [ - 0.000096, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.446688, - "b" - ], - [ - 0.145841, - "o" - ], - [ - 0.105605, - "r" - ], - [ - 0.088953, - "g" - ], - [ - 0.120803, - " " - ], - [ - 0.227780, - "l" - ], - [ - 0.175052, - "i" - ], - [ - 0.106579, - "s" - ], - [ - 0.058441, - "t" - ], - [ - 0.093196, - " " - ], - [ - 0.172940, - "r" - ], - [ - 0.134731, - "e" - ], - [ - 0.119062, - "p" - ], - [ - 0.183075, - "o" - ], - [ - 0.388321, - ":" - ], - [ - 0.140589, - ":" - ], - [ - 0.324109, - "b" - ], - [ - 0.058606, - "a" - ], - [ - 0.205450, - "c" - ], - [ - 0.105362, - "k" - ], - [ - 0.235009, - "u" - ], - [ - 0.243485, - "p" - ], - [ - 0.485432, - "2" - ], - [ - 0.148177, - " " - ], - [ - 0.632383, - "|" - ], - [ - 0.389914, - " " - ], - [ - 0.174128, - "t" - ], - [ - 0.201473, - "a" - ], - [ - 0.116517, - "i" - ], - [ - 0.225072, - "l" - ], - [ - 0.699624, - "\r\n" - ], - [ - 2.000000, - "-rw-rw-r-- tw tw 5516 Jul 21 19:10 data/linux-4.1.8/virt/kvm/async_pf.c\r\n" - ], - [ - 0.000019, - "-rw-rw-r-- tw tw 1120 Jul 21 19:10 data/linux-4.1.8/virt/kvm/async_pf.h\r\n-rw-rw-r-- tw tw 4215 Jul 21 19:10 data/linux-4.1.8/virt/kvm/coalesced_mmio.c\r\n-rw-rw-r-- tw tw 915 Jul 21 19:10 data/linux-4.1.8/virt/kvm/coalesced_mmio.h\r\n-rw-rw-r-- tw tw 22879 Jul 21 19:10 data/linux-4.1.8/virt/kvm/eventfd.c\r\n-rw-rw-r-- tw tw 5563 Jul 21 19:10 data/linux-4.1.8/virt/kvm/irqchip.c\r\n-rw-rw-r-- tw tw 79385 Jul 21 19:10 data/linux-4.1.8/virt/kvm/kvm_main.c\r\n" - ], - [ - 0.000011, - "-rw-rw-r-- tw tw 6132 Jul 21 19:10 data/linux-4.1.8/virt/kvm/vfio.c\r\n-rw-rw-r-- tw tw 250 Jul 21 19:10 data/linux-4.1.8/virt/kvm/vfio.h\r\n" - ], - [ - 0.000009, - "-rw-rw-r-- tw tw 15 Oct 24 22:28 data/one_file_more\r\n" - ], - [ - 0.000389, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 1.000000, - "#" - ], - [ - 0.337351, - " " - ], - [ - 0.147170, - "e" - ], - [ - 0.235736, - "a" - ], - [ - 0.251314, - "s" - ], - [ - 0.471185, - "y" - ], - [ - 0.277723, - "," - ], - [ - 0.204225, - " " - ], - [ - 0.182231, - "i" - ], - [ - 0.174424, - "s" - ], - [ - 0.074677, - "n" - ], - [ - 0.786274, - "'" - ], - [ - 0.264836, - "t" - ], - [ - 0.330352, - " " - ], - [ - 0.266876, - "i" - ], - [ - 0.112564, - "t" - ], - [ - 0.897299, - "?" - ], - [ - 0.623501, - " " - ], - [ - 0.656625, - "t" - ], - [ - 0.115934, - "h" - ], - [ - 0.625213, - "a" - ], - [ - 0.588409, - "t" - ], - [ - 0.160071, - " " - ], - [ - 0.830693, - "i" - ], - [ - 0.163118, - "s" - ], - [ - 0.075663, - " " - ], - [ - 0.186138, - "a" - ], - [ - 0.109916, - "l" - ], - [ - 0.137005, - "l" - ], - [ - 0.171009, - " " - ], - [ - 0.153348, - "y" - ], - [ - 0.132919, - "o" - ], - [ - 0.568100, - "u" - ], - [ - 0.211350, - " " - ], - [ - 0.195450, - "n" - ], - [ - 0.257974, - "e" - ], - [ - 0.185529, - "e" - ], - [ - 0.265130, - "d" - ], - [ - 0.129116, - " " - ], - [ - 0.169264, - "t" - ], - [ - 0.148964, - "o" - ], - [ - 0.437043, - " " - ], - [ - 0.431197, - "k" - ], - [ - 0.219557, - "n" - ], - [ - 0.257996, - "o" - ], - [ - 0.158826, - "w" - ], - [ - 0.406870, - " " - ], - [ - 0.659664, - "f" - ], - [ - 0.130963, - "o" - ], - [ - 0.125395, - "r" - ], - [ - 0.613713, - " " - ], - [ - 0.646957, - "b" - ], - [ - 0.154695, - "a" - ], - [ - 0.259741, - "s" - ], - [ - 0.156692, - "i" - ], - [ - 0.124345, - "c" - ], - [ - 0.513209, - "\r\n" - ], - [ - 0.000296, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.965828, - "#" - ], - [ - 0.232285, - " " - ], - [ - 0.266818, - "u" - ], - [ - 0.132723, - "s" - ], - [ - 0.216208, - "a" - ], - [ - 0.206362, - "g" - ], - [ - 0.142608, - "e" - ], - [ - 2.000000, - "." - ], - [ - 0.238868, - " " - ], - [ - 0.302986, - "i" - ], - [ - 0.196338, - "f" - ], - [ - 0.092936, - " " - ], - [ - 0.197594, - "y" - ], - [ - 0.122297, - "o" - ], - [ - 0.175360, - "u" - ], - [ - 0.145063, - " " - ], - [ - 0.313719, - "l" - ], - [ - 0.169678, - "i" - ], - [ - 0.185628, - "k" - ], - [ - 0.120660, - "e" - ], - [ - 0.078389, - " " - ], - [ - 0.648628, - "#" - ], - [ - 0.337514, - "b" - ], - [ - 0.108598, - "o" - ], - [ - 0.123792, - "r" - ], - [ - 0.136099, - "g" - ], - [ - 0.235539, - "b" - ], - [ - 0.091671, - "a" - ], - [ - 0.208697, - "c" - ], - [ - 0.100567, - "k" - ], - [ - 0.227477, - "u" - ], - [ - 0.236900, - "p" - ], - [ - 0.302154, - "," - ], - [ - 0.207291, - " " - ], - [ - 0.205656, - "s" - ], - [ - 0.123737, - "p" - ], - [ - 0.142016, - "r" - ], - [ - 0.197260, - "e" - ], - [ - 0.197471, - "a" - ], - [ - 0.104498, - "d" - ], - [ - 0.163267, - " " - ], - [ - 0.178420, - "t" - ], - [ - 0.091669, - "h" - ], - [ - 0.107735, - "e" - ], - [ - 0.102742, - " " - ], - [ - 0.211413, - "w" - ], - [ - 0.124959, - "o" - ], - [ - 0.105787, - "r" - ], - [ - 0.231403, - "d" - ], - [ - 0.299061, - "!" - ], - [ - 2.000000, - "\r\n" - ], - [ - 0.000307, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.304768, - "#" - ], - [ - 0.229433, - " " - ], - [ - 0.647220, - "t" - ], - [ - 0.070692, - "h" - ], - [ - 0.349432, - "a" - ], - [ - 0.112924, - "n" - ], - [ - 0.207031, - "k" - ], - [ - 0.567641, - "s" - ], - [ - 0.121708, - " " - ], - [ - 0.135723, - "f" - ], - [ - 0.139102, - "o" - ], - [ - 0.060453, - "r" - ], - [ - 0.152408, - " " - ], - [ - 0.116234, - "v" - ], - [ - 0.142885, - "i" - ], - [ - 0.106596, - "e" - ], - [ - 0.231115, - "w" - ], - [ - 0.416046, - "i" - ], - [ - 0.086563, - "n" - ], - [ - 0.144009, - "g" - ], - [ - 0.725139, - "!" - ], - [ - 0.299810, - "\r\n" - ], - [ - 0.000250, - "\u001b]0;tw@tux: ~/borg/demo\u0007tw@tux:~/borg/demo$ " - ], - [ - 0.710767, - "exit" - ], - [ - 0.000006, - "\r\n" - ] - ] -} diff --git a/docs/misc/asciinema/install_and_basics.txt b/docs/misc/asciinema/install_and_basics.txt deleted file mode 100644 index 135db170..00000000 --- a/docs/misc/asciinema/install_and_basics.txt +++ /dev/null @@ -1,51 +0,0 @@ -# borgbackup - installation and basic usage - -# I have already downloaded the binary release from github: -ls -l -# binary file + GPG signature - -# verifying whether the binary is valid: -gpg --verify borg-linux64.asc borg-linux64 - -# install it as "borg": -cp borg-linux64 ~/bin/borg - -# making it executable: -chmod +x ~/bin/borg - -# yay, installation done! let's make backups! - -# creating a repository: -borg init repo - -# creating our first backup with stuff from "data" directory: -borg create --stats --progress --compression lz4 repo::backup1 data - -# changing the data slightly: -echo "some more data" > data/one_file_more - -# creating another backup: -borg create --stats --progress repo::backup2 data - -# that was much faster! it recognized/deduplicated unchanged files. -# see the "Deduplicated size" column for "This archive"! :) - -# extracting a backup archive: -mv data data.orig -borg extract repo::backup2 - -# checking if restored data differs from original data: -diff -r data.orig data - -# no, it doesn't! :) - -# listing the repo contents: -borg list repo - -# listing the backup2 archive contents (shortened): -borg list repo::backup2 | tail - -# easy, isn't it? - -# if you like #borgbackup, spread the word! - diff --git a/docs/misc/benchmark-crud.txt b/docs/misc/benchmark-crud.txt new file mode 100644 index 00000000..f4ca363b --- /dev/null +++ b/docs/misc/benchmark-crud.txt @@ -0,0 +1,64 @@ +borg benchmark crud +=================== + +Here is some example of borg benchmark crud output. + +I ran it on my laptop, Core i5-4200u, 8GB RAM, SATA SSD, Linux, ext4 fs. +"src" as well as repo is local, on this SSD. + +$ BORG_PASSPHRASE=secret borg init --encryption repokey-blake2 repo +$ BORG_PASSPHRASE=secret borg benchmark crud repo src + +C-Z-BIG 116.06 MB/s (10 * 100.00 MB all-zero files: 8.62s) +R-Z-BIG 197.00 MB/s (10 * 100.00 MB all-zero files: 5.08s) +U-Z-BIG 418.07 MB/s (10 * 100.00 MB all-zero files: 2.39s) +D-Z-BIG 724.94 MB/s (10 * 100.00 MB all-zero files: 1.38s) +C-R-BIG 42.21 MB/s (10 * 100.00 MB random files: 23.69s) +R-R-BIG 134.45 MB/s (10 * 100.00 MB random files: 7.44s) +U-R-BIG 316.83 MB/s (10 * 100.00 MB random files: 3.16s) +D-R-BIG 251.10 MB/s (10 * 100.00 MB random files: 3.98s) +C-Z-MEDIUM 118.53 MB/s (1000 * 1.00 MB all-zero files: 8.44s) +R-Z-MEDIUM 218.49 MB/s (1000 * 1.00 MB all-zero files: 4.58s) +U-Z-MEDIUM 591.59 MB/s (1000 * 1.00 MB all-zero files: 1.69s) +D-Z-MEDIUM 730.04 MB/s (1000 * 1.00 MB all-zero files: 1.37s) +C-R-MEDIUM 31.46 MB/s (1000 * 1.00 MB random files: 31.79s) +R-R-MEDIUM 129.64 MB/s (1000 * 1.00 MB random files: 7.71s) +U-R-MEDIUM 621.86 MB/s (1000 * 1.00 MB random files: 1.61s) +D-R-MEDIUM 234.82 MB/s (1000 * 1.00 MB random files: 4.26s) +C-Z-SMALL 19.81 MB/s (10000 * 10.00 kB all-zero files: 5.05s) +R-Z-SMALL 97.69 MB/s (10000 * 10.00 kB all-zero files: 1.02s) +U-Z-SMALL 36.35 MB/s (10000 * 10.00 kB all-zero files: 2.75s) +D-Z-SMALL 57.04 MB/s (10000 * 10.00 kB all-zero files: 1.75s) +C-R-SMALL 9.81 MB/s (10000 * 10.00 kB random files: 10.19s) +R-R-SMALL 92.21 MB/s (10000 * 10.00 kB random files: 1.08s) +U-R-SMALL 64.62 MB/s (10000 * 10.00 kB random files: 1.55s) +D-R-SMALL 51.62 MB/s (10000 * 10.00 kB random files: 1.94s) + + +A second run some time later gave: + +C-Z-BIG 115.22 MB/s (10 * 100.00 MB all-zero files: 8.68s) +R-Z-BIG 196.06 MB/s (10 * 100.00 MB all-zero files: 5.10s) +U-Z-BIG 439.50 MB/s (10 * 100.00 MB all-zero files: 2.28s) +D-Z-BIG 671.11 MB/s (10 * 100.00 MB all-zero files: 1.49s) +C-R-BIG 43.40 MB/s (10 * 100.00 MB random files: 23.04s) +R-R-BIG 133.17 MB/s (10 * 100.00 MB random files: 7.51s) +U-R-BIG 464.50 MB/s (10 * 100.00 MB random files: 2.15s) +D-R-BIG 245.19 MB/s (10 * 100.00 MB random files: 4.08s) +C-Z-MEDIUM 110.82 MB/s (1000 * 1.00 MB all-zero files: 9.02s) +R-Z-MEDIUM 217.96 MB/s (1000 * 1.00 MB all-zero files: 4.59s) +U-Z-MEDIUM 601.54 MB/s (1000 * 1.00 MB all-zero files: 1.66s) +D-Z-MEDIUM 686.99 MB/s (1000 * 1.00 MB all-zero files: 1.46s) +C-R-MEDIUM 39.91 MB/s (1000 * 1.00 MB random files: 25.06s) +R-R-MEDIUM 128.91 MB/s (1000 * 1.00 MB random files: 7.76s) +U-R-MEDIUM 599.00 MB/s (1000 * 1.00 MB random files: 1.67s) +D-R-MEDIUM 230.69 MB/s (1000 * 1.00 MB random files: 4.33s) +C-Z-SMALL 14.78 MB/s (10000 * 10.00 kB all-zero files: 6.76s) +R-Z-SMALL 96.86 MB/s (10000 * 10.00 kB all-zero files: 1.03s) +U-Z-SMALL 35.22 MB/s (10000 * 10.00 kB all-zero files: 2.84s) +D-Z-SMALL 64.93 MB/s (10000 * 10.00 kB all-zero files: 1.54s) +C-R-SMALL 11.08 MB/s (10000 * 10.00 kB random files: 9.02s) +R-R-SMALL 92.34 MB/s (10000 * 10.00 kB random files: 1.08s) +U-R-SMALL 64.49 MB/s (10000 * 10.00 kB random files: 1.55s) +D-R-SMALL 46.96 MB/s (10000 * 10.00 kB random files: 2.13s) + diff --git a/docs/misc/borg-data-flow.png b/docs/misc/borg-data-flow.png new file mode 100644 index 00000000..ed7f52b9 Binary files /dev/null and b/docs/misc/borg-data-flow.png differ diff --git a/docs/misc/borg-data-flow.vsd b/docs/misc/borg-data-flow.vsd new file mode 100644 index 00000000..47d756b8 Binary files /dev/null and b/docs/misc/borg-data-flow.vsd differ diff --git a/docs/misc/compression.conf b/docs/misc/compression.conf deleted file mode 100644 index 881f5fe9..00000000 --- a/docs/misc/compression.conf +++ /dev/null @@ -1,56 +0,0 @@ -# example config file for --compression-from option -# -# Format of non-comment / non-empty lines: -# : -# compression-spec is same format as for --compression option -# path/filename pattern is same format as for --exclude option - -# archives / files: -none:*.gz -none:*.tgz -none:*.bz2 -none:*.tbz2 -none:*.xz -none:*.txz -none:*.lzma -none:*.lzo -none:*.zip -none:*.rar -none:*.7z - -# audio: -none:*.mp3 -none:*.ogg -none:*.oga -none:*.flac -none:*.aac -none:*.m4a - -# video: -none:*.mp4 -none:*.mkv -none:*.m4v -none:*.avi -none:*.mpg -none:*.mpeg -none:*.webm -none:*.vob -none:*.ts -none:*.ogv -none:*.mov -none:*.flv -none:*.ogm - -# pictures/images -none:*.jpg -none:*.jpeg -none:*.png -none:*.gif - -# disk images -none:*.dmg - -# software archives -none:*.rpm -none:*.deb -none:*.msi diff --git a/docs/misc/internals-picture.txt b/docs/misc/internals-picture.txt index ae76f0c1..01351a7b 100644 --- a/docs/misc/internals-picture.txt +++ b/docs/misc/internals-picture.txt @@ -11,22 +11,22 @@ BorgBackup from 10.000m | | | +------+-------+ | | | | | - /chunk\/chunk\/chunk\... /maybe different chunks lists\ + /chunk\/chunk\/chunk\... /maybe different chunks lists\ +-----------------------------------------------------------------+ |item list | +-----------------------------------------------------------------+ - | - +-------------------------------------+--------------+ - | | | - | | | -+-------------+ +-------------+ | -|item0 | |item1 | | -| - owner | | - owner | | -| - size | | - size | ... -| - ... | | - ... | -| - chunks | | - chunks | -+----+--------+ +-----+-------+ - | | + | + +-------------------------------------+--------------+ + | | | + | | | ++-------------+ +-------------+ | +|item0 | |item1 | | +| - owner | | - owner | | +| - size | | - size | ... +| - ... | | - ... | +| - chunks | | - chunks | ++----+--------+ +-----+-------+ + | | | +-----+----------------------------+-----------------+ | | | | +-o-----o------------+ | diff --git a/docs/misc/prune-example.txt b/docs/misc/prune-example.txt index 6c8f8e55..12ffeb6f 100644 --- a/docs/misc/prune-example.txt +++ b/docs/misc/prune-example.txt @@ -2,7 +2,7 @@ borg prune visualized ===================== Assume it is 2016-01-01, today's backup has not yet been made and you have -created at least one backup on each day in 2015 except on 2015-12-20 (no +created at least one backup on each day in 2015 except on 2015-12-19 (no backup made on that day). This is what borg prune --keep-daily 14 --keep-monthly 6 would keep. @@ -14,41 +14,41 @@ Calendar view ------------- 2015 - January February March -Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su - 1 2 3 4 1 1 - 5 6 7 8 9 10 11 2 3 4 5 6 7 8 2 3 4 5 6 7 8 -12 13 14 15 16 17 18 9 10 11 12 13 14 15 9 10 11 12 13 14 15 -19 20 21 22 23 24 25 16 17 18 19 20 21 22 16 17 18 19 20 21 22 -26 27 28 29 30 31 23 24 25 26 27 28 23 24 25 26 27 28 29 - 30 31 + January February March +Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su + 1 2 3 4 1 1 + 5 6 7 8 9 10 11 2 3 4 5 6 7 8 2 3 4 5 6 7 8 +12 13 14 15 16 17 18 9 10 11 12 13 14 15 9 10 11 12 13 14 15 +19 20 21 22 23 24 25 16 17 18 19 20 21 22 16 17 18 19 20 21 22 +26 27 28 29 30 31 23 24 25 26 27 28 23 24 25 26 27 28 29 + 30 31 - April May June -Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su - 1 2 3 4 5 1 2 3 1 2 3 4 5 6 7 - 6 7 8 9 10 11 12 4 5 6 7 8 9 10 8 9 10 11 12 13 14 -13 14 15 16 17 18 19 11 12 13 14 15 16 17 15 16 17 18 19 20 21 -20 21 22 23 24 25 26 18 19 20 21 22 23 24 22 23 24 25 26 27 28 -27 28 29 30 25 26 27 28 29 30 31 29 30m - + April May June +Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su + 1 2 3 4 5 1 2 3 1 2 3 4 5 6 7 + 6 7 8 9 10 11 12 4 5 6 7 8 9 10 8 9 10 11 12 13 14 +13 14 15 16 17 18 19 11 12 13 14 15 16 17 15 16 17 18 19 20 21 +20 21 22 23 24 25 26 18 19 20 21 22 23 24 22 23 24 25 26 27 28 +27 28 29 30 25 26 27 28 29 30 31 29 30m - July August September -Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su - 1 2 3 4 5 1 2 1 2 3 4 5 6 - 6 7 8 9 10 11 12 3 4 5 6 7 8 9 7 8 9 10 11 12 13 -13 14 15 16 17 18 19 10 11 12 13 14 15 16 14 15 16 17 18 19 20 -20 21 22 23 24 25 26 17 18 19 20 21 22 23 21 22 23 24 25 26 27 -27 28 29 30 31m 24 25 26 27 28 29 30 28 29 30m - 31m - October November December -Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su - 1 2 3 4 1 1 2 3 4 5 6 - 5 6 7 8 9 10 11 2 3 4 5 6 7 8 7 8 9 10 11 12 13 -12 13 14 15 16 17 18 9 10 11 12 13 14 15 14 15 16 17d18d19d20 -19 20 21 22 23 24 25 16 17 18 19 20 21 22 21d22d23d24d25d26d27d -26 27 28 29 30 31m 23 24 25 26 27 28 29 28d29d30d31d - 30m + July August September +Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su + 1 2 3 4 5 1 2 1 2 3 4 5 6 + 6 7 8 9 10 11 12 3 4 5 6 7 8 9 7 8 9 10 11 12 13 +13 14 15 16 17 18 19 10 11 12 13 14 15 16 14 15 16 17 18 19 20 +20 21 22 23 24 25 26 17 18 19 20 21 22 23 21 22 23 24 25 26 27 +27 28 29 30 31m 24 25 26 27 28 29 30 28 29 30m + 31m + + October November December +Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su + 1 2 3 4 1 1 2 3 4 5 6 + 5 6 7 8 9 10 11 2 3 4 5 6 7 8 7 8 9 10 11 12 13 +12 13 14 15 16 17 18 9 10 11 12 13 14 15 14 15 16 17d18d19 20d +19 20 21 22 23 24 25 16 17 18 19 20 21 22 21d22d23d24d25d26d27d +26 27 28 29 30 31m 23 24 25 26 27 28 29 28d29d30d31d + 30m List view --------- @@ -66,8 +66,8 @@ List view 9. 2015-12-23 10. 2015-12-22 11. 2015-12-21 - (no backup made on 2015-12-20) -12. 2015-12-19 +12. 2015-12-20 + (no backup made on 2015-12-19) 13. 2015-12-18 14. 2015-12-17 @@ -83,7 +83,7 @@ Jun. December is not considered for this rule, because that backup was already kept because of the daily rule. 2015-12-17 is kept to satisfy the --keep-daily 14 rule - because no backup was -made on 2015-12-20. If a backup had been made on that day, it would not keep +made on 2015-12-19. If a backup had been made on that day, it would not keep the one from 2015-12-17. We did not include yearly, weekly, hourly, minutely or secondly rules to keep diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 76726d25..a7a1ba0f 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -5,9 +5,13 @@ Quick Start =========== -This chapter will get you started with |project_name|. The first section -presents a simple step by step example that uses |project_name| to backup data. -The next section continues by showing how backups can be automated. +This chapter will get you started with |project_name| and covers +various use cases. + +A step by step example +---------------------- + +.. include:: quickstart_example.rst.inc Important note about free space ------------------------------- @@ -17,13 +21,20 @@ a good amount of free space on the filesystem that has your backup repository (and also on ~/.cache). A few GB should suffice for most hard-drive sized repositories. See also :ref:`cache-memory-usage`. +|project_name| doesn't use space reserved for root on repository disks (even when run as root), +on file systems which do not support this mechanism (e.g. XFS) we recommend to +reserve some space in |project_name| itself just to be safe by adjusting the +``additional_free_space`` setting in the ``[repository]`` section of a repositories +``config`` file. A good starting point is ``2G``. + If |project_name| runs out of disk space, it tries to free as much space as it -can while aborting the current operation safely, which allows to free more space -by deleting/pruning archives. This mechanism is not bullet-proof though. +can while aborting the current operation safely, which allows the user to free more space +by deleting/pruning archives. This mechanism is not bullet-proof in some +circumstances [1]_. + If you *really* run out of disk space, it can be hard or impossible to free space, because |project_name| needs free space to operate - even to delete backup -archives. There is a ``--save-space`` option for some commands, but even with -that |project_name| will need free space to operate. +archives. You can use some monitoring process or just include the free space information in your backup log files (you check them regularly anyway, right?). @@ -36,116 +47,147 @@ Also helpful: - consider using quotas - use `prune` regularly +.. [1] This failsafe can fail in these circumstances: -A step by step example ----------------------- - -1. Before a backup can be made a repository has to be initialized:: - - $ borg init /path/to/repo - -2. Backup the ``~/src`` and ``~/Documents`` directories into an archive called - *Monday*:: - - $ borg create /path/to/repo::Monday ~/src ~/Documents - -3. The next day create a new archive called *Tuesday*:: - - $ borg create --stats /path/to/repo::Tuesday ~/src ~/Documents - - This backup will be a lot quicker and a lot smaller since only new never - before seen data is stored. The ``--stats`` option causes |project_name| to - output statistics about the newly created archive such as the amount of unique - data (not shared with other archives):: - - ------------------------------------------------------------------------------ - Archive name: Tuesday - Archive fingerprint: bd31004d58f51ea06ff735d2e5ac49376901b21d58035f8fb05dbf866566e3c2 - Time (start): Tue, 2016-02-16 18:15:11 - Time (end): Tue, 2016-02-16 18:15:11 - - Duration: 0.19 seconds - Number of files: 127 - ------------------------------------------------------------------------------ - Original size Compressed size Deduplicated size - This archive: 4.16 MB 4.17 MB 26.78 kB - All archives: 8.33 MB 8.34 MB 4.19 MB - - Unique chunks Total chunks - Chunk index: 132 261 - ------------------------------------------------------------------------------ - -4. List all archives in the repository:: - - $ borg list /path/to/repo - Monday Mon, 2016-02-15 19:14:44 - Tuesday Tue, 2016-02-16 19:15:11 - -5. List the contents of the *Monday* archive:: - - $ borg list /path/to/repo::Monday - drwxr-xr-x user group 0 Mon, 2016-02-15 18:22:30 home/user/Documents - -rw-r--r-- user group 7961 Mon, 2016-02-15 18:22:30 home/user/Documents/Important.doc - ... - -6. Restore the *Monday* archive:: - - $ borg extract /path/to/repo::Monday - -7. Recover disk space by manually deleting the *Monday* archive:: - - $ borg delete /path/to/repo::Monday - -.. Note:: - Borg is quiet by default (it works on WARNING log level). - You can use options like ``--progress`` or ``--list`` to get specific - reports during command execution. You can also add the ``-v`` (or - ``--verbose`` or ``--info``) option to adjust the log level to INFO to - get other informational messages. + - The underlying file system doesn't support statvfs(2), or returns incorrect + data, or the repository doesn't reside on a single file system + - Other tasks fill the disk simultaneously + - Hard quotas (which may not be reflected in statvfs(2)) Automating backups ------------------ -The following example script backs up ``/home`` and ``/var/www`` to a remote -server. The script also uses the :ref:`borg_prune` subcommand to maintain a -certain number of old archives: +The following example script is meant to be run daily by the ``root`` user on +different local machines. It backs up a machine's important files (but not the +complete operating system) to a repository ``~/backup/main`` on a remote server. +Some files which aren't necessarily needed in this backup are excluded. See +:ref:`borg_patterns` on how to add more exclude options. + +After the backup this script also uses the :ref:`borg_prune` subcommand to keep +only a certain number of old archives and deletes the others in order to preserve +disk space. + +Before running, make sure that the repository is initialized as documented in +:ref:`remote_repos` and that the script has the correct permissions to be executable +by the root user, but not executable or readable by anyone else, i.e. root:root 0700. + +You can use this script as a starting point and modify it where it's necessary to fit +your setup. + +Do not forget to test your created backups to make sure everything you need is being +backed up and that the ``prune`` command is keeping and deleting the correct backups. :: #!/bin/sh - # setting this, so the repo does not need to be given on the commandline: - export BORG_REPO=username@remoteserver.com:backup - # setting this, so you won't be asked for your passphrase - make sure the - # script has appropriate owner/group and mode, e.g. root.root 600: - export BORG_PASSPHRASE=mysecret + # Setting this, so the repo does not need to be given on the commandline: + export BORG_REPO=ssh://username@example.com:2022/~/backup/main - # Backup most important stuff: - borg create --stats -C lz4 ::'{hostname}-{now:%Y-%m-%d}' \ - /etc \ - /home \ - /var \ - --exclude '/home/*/.cache' \ - --exclude '*.pyc' + # Setting this, so you won't be asked for your repository passphrase: + export BORG_PASSPHRASE='XYZl0ngandsecurepa_55_phrasea&&123' + # or this to ask an external program to supply the passphrase: + export BORG_PASSCOMMAND='pass show backup' + + # some helpers and error handling: + info() { printf "\n%s %s\n\n" "$( date )" "$*" >&2; } + trap 'echo $( date ) Backup interrupted >&2; exit 2' INT TERM + + info "Starting backup" + + # Backup the most important directories into an archive named after + # the machine this script is currently running on: + + borg create \ + --verbose \ + --filter AME \ + --list \ + --stats \ + --show-rc \ + --compression lz4 \ + --exclude-caches \ + --exclude '/home/*/.cache/*' \ + --exclude '/var/cache/*' \ + --exclude '/var/tmp/*' \ + \ + ::'{hostname}-{now}' \ + /etc \ + /home \ + /root \ + /var \ + + backup_exit=$? + + info "Pruning repository" # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly # archives of THIS machine. The '{hostname}-' prefix is very important to # limit prune's operation to this machine's archives and not apply to - # other machine's archives also. - borg prune -v --prefix '{hostname}-' \ - --keep-daily=7 --keep-weekly=4 --keep-monthly=6 + # other machines' archives also: + + borg prune \ + --list \ + --prefix '{hostname}-' \ + --show-rc \ + --keep-daily 7 \ + --keep-weekly 4 \ + --keep-monthly 6 \ + + prune_exit=$? + + # use highest exit code as global exit code + global_exit=$(( backup_exit > prune_exit ? backup_exit : prune_exit )) + + if [ ${global_exit} -eq 1 ]; + then + info "Backup and/or Prune finished with a warning" + fi + + if [ ${global_exit} -gt 1 ]; + then + info "Backup and/or Prune finished with an error" + fi + + exit ${global_exit} + +Pitfalls with shell variables and environment variables +------------------------------------------------------- + +This applies to all environment variables you want |project_name| to see, not just +``BORG_PASSPHRASE``. The short explanation is: always ``export`` your variable, +and use single quotes if you're unsure of the details of your shell's expansion +behavior. E.g.:: + + export BORG_PASSPHRASE='complicated & long' + +This is because ``export`` exposes variables to subprocesses, which |project_name| may be +one of. More on ``export`` can be found in the "ENVIRONMENT" section of the +bash(1) man page. + +Beware of how ``sudo`` interacts with environment variables. For example, you +may be surprised that the following ``export`` has no effect on your command:: + + export BORG_PASSPHRASE='complicated & long' + sudo ./yourborgwrapper.sh # still prompts for password + +For more information, refer to the sudo(8) man page and ``env_keep`` in +the sudoers(5) man page. + +.. Tip:: + To debug what your borg process is actually seeing, find its PID + (``ps aux|grep borg``) and then look into ``/proc//environ``. .. backup_compression: Backup compression ------------------ -Default is no compression, but we support different methods with high speed -or high compression: +The default is lz4 (very fast, but low compression ratio), but other methods are +supported for different situations. -If you have a fast repo storage and you want some compression: :: +If you have a fast repo storage and you want minimum CPU usage, no compression:: - $ borg create --compression lz4 /path/to/repo::arch ~ + $ borg create --compression none /path/to/repo::arch ~ If you have a less fast repo storage and you want a bit more compression (N=0..9, 0 means no compression, 9 means high compression): :: @@ -207,8 +249,16 @@ For automated backups the passphrase can be specified using the the key in case it gets corrupted or lost. Also keep your passphrase at a safe place. - The backup that is encrypted with that key/passphrase won't help you - with that, of course. + You can make backups using :ref:`borg_key_export` subcommand. + + If you want to print a backup of your key to paper use the ``--paper`` + option of this command and print the result, or this print `template`_ + if you need a version with QR-Code. + + A backup inside of the backup that is encrypted with that key/passphrase + won't help you with that, of course. + +.. _template: paperkey.html .. _remote_repos: @@ -221,16 +271,14 @@ is installed on the remote host, in which case the following syntax is used:: $ borg init user@hostname:/path/to/repo -or:: - - $ borg init ssh://user@hostname:port//path/to/repo +Note: please see the usage chapter for a full documentation of repo URLs. Remote operations over SSH can be automated with SSH keys. You can restrict the use of the SSH keypair by prepending a forced command to the SSH public key in the remote server's `authorized_keys` file. This example will start |project_name| in server mode and limit it to a specific filesystem path:: - command="borg serve --restrict-to-path /path/to/repo",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-user-rc ssh-rsa AAAAB3[...] + command="borg serve --restrict-to-path /path/to/repo",restrict ssh-rsa AAAAB3[...] If it is not possible to install |project_name| on the remote host, it is still possible to use the remote host to store a repository by @@ -239,3 +287,7 @@ mounting the remote filesystem, for example, using sshfs:: $ sshfs user@hostname:/path/to /path/to $ borg init /path/to/repo $ fusermount -u /path/to + +You can also use other remote filesystems in a similar way. Just be careful, +not all filesystems out there are really stable and working good enough to +be acceptable for backup usage. diff --git a/docs/quickstart_example.rst.inc b/docs/quickstart_example.rst.inc new file mode 100644 index 00000000..69ed9845 --- /dev/null +++ b/docs/quickstart_example.rst.inc @@ -0,0 +1,62 @@ +1. Before a backup can be made a repository has to be initialized:: + + $ borg init --encryption=repokey /path/to/repo + +2. Backup the ``~/src`` and ``~/Documents`` directories into an archive called + *Monday*:: + + $ borg create /path/to/repo::Monday ~/src ~/Documents + +3. The next day create a new archive called *Tuesday*:: + + $ borg create --stats /path/to/repo::Tuesday ~/src ~/Documents + + This backup will be a lot quicker and a lot smaller since only new never + before seen data is stored. The ``--stats`` option causes Borg to + output statistics about the newly created archive such as the amount of unique + data (not shared with other archives):: + + ------------------------------------------------------------------------------ + Archive name: Tuesday + Archive fingerprint: bd31004d58f51ea06ff735d2e5ac49376901b21d58035f8fb05dbf866566e3c2 + Time (start): Tue, 2016-02-16 18:15:11 + Time (end): Tue, 2016-02-16 18:15:11 + + Duration: 0.19 seconds + Number of files: 127 + ------------------------------------------------------------------------------ + Original size Compressed size Deduplicated size + This archive: 4.16 MB 4.17 MB 26.78 kB + All archives: 8.33 MB 8.34 MB 4.19 MB + + Unique chunks Total chunks + Chunk index: 132 261 + ------------------------------------------------------------------------------ + +4. List all archives in the repository:: + + $ borg list /path/to/repo + Monday Mon, 2016-02-15 19:14:44 + Tuesday Tue, 2016-02-16 19:15:11 + +5. List the contents of the *Monday* archive:: + + $ borg list /path/to/repo::Monday + drwxr-xr-x user group 0 Mon, 2016-02-15 18:22:30 home/user/Documents + -rw-r--r-- user group 7961 Mon, 2016-02-15 18:22:30 home/user/Documents/Important.doc + ... + +6. Restore the *Monday* archive by extracting the files relative to the current directory:: + + $ borg extract /path/to/repo::Monday + +7. Recover disk space by manually deleting the *Monday* archive:: + + $ borg delete /path/to/repo::Monday + +.. Note:: + Borg is quiet by default (it works on WARNING log level). + You can use options like ``--progress`` or ``--list`` to get specific + reports during command execution. You can also add the ``-v`` (or + ``--verbose`` or ``--info``) option to adjust the log level to INFO to + get other informational messages. diff --git a/docs/support.rst b/docs/support.rst index 9d64621f..8bd3fcbe 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -28,6 +28,7 @@ nickname you get by typing "/nick mydesirednickname"): http://webchat.freenode.net/?randomnick=1&channels=%23borgbackup&uio=MTY9dHJ1ZSY5PXRydWUa8 +.. _mailing_list: Mailing list ------------ @@ -37,6 +38,15 @@ unsubscribe and where you can find the archives of the list, see the `mailing list homepage `_. +Twitter +------- + +Follow @borgbackup for announcements. You can also add @borgbackup if you +would like to get retweeted for a borg related tweet. + +Please understand that Twitter is not suitable for longer / more complex +discussions - use one of the other channels for that. + Bounties and Fundraisers ------------------------ @@ -56,3 +66,21 @@ As a developer, you can become a Bounty Hunter and win bounties (earn money) by contributing to |project_name|, a free and open source software project. We might also use BountySource to fund raise for some bigger goals. + +.. _security-contact: + +Security +-------- + +In case you discover a security issue, please use this contact for reporting it privately +and please, if possible, use encrypted E-Mail: + +Thomas Waldmann + +GPG Key Fingerprint: 6D5B EF9A DD20 7580 5747 B70F 9F88 FB52 FAF7 B393 + +The public key can be fetched from any GPG keyserver, but be careful: you must +use the **full fingerprint** to check that you got the correct key. + +`Releases `_ are signed with this GPG key, +please use GPG to verify their authenticity. diff --git a/docs/usage.rst b/docs/usage.rst index 354b950c..de335c1c 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -5,884 +5,53 @@ Usage ===== -|project_name| consists of a number of commands. Each command accepts -a number of arguments and options. The following sections will describe each -command in detail. - -General -------- - -Type of log output -~~~~~~~~~~~~~~~~~~ - -The log level of the builtin logging configuration defaults to WARNING. -This is because we want |project_name| to be mostly silent and only output -warnings, errors and critical messages, unless output has been requested -by supplying an option that implies output (eg, --list or --progress). - -Log levels: DEBUG < INFO < WARNING < ERROR < CRITICAL - -Use ``--debug`` to set DEBUG log level - -to get debug, info, warning, error and critical level output. - -Use ``--info`` (or ``-v`` or ``--verbose``) to set INFO log level - -to get info, warning, error and critical level output. - -Use ``--warning`` (default) to set WARNING log level - -to get warning, error and critical level output. - -Use ``--error`` to set ERROR log level - -to get error and critical level output. - -Use ``--critical`` to set CRITICAL log level - -to get critical level output. - -While you can set misc. log levels, do not expect that every command will -give different output on different log levels - it's just a possibility. - -.. warning:: Options --critical and --error are provided for completeness, - their usage is not recommended as you might miss important information. - -Return codes -~~~~~~~~~~~~ - -|project_name| can exit with the following return codes (rc): - -:: - - 0 = success (logged as INFO) - 1 = warning (operation reached its normal end, but there were warnings - - you should check the log, logged as WARNING) - 2 = error (like a fatal error, a local or remote exception, the operation - did not reach its normal end, logged as ERROR) - 128+N = killed by signal N (e.g. 137 == kill -9) - -If you use ``--show-rc``, the return code is also logged at the indicated -level as the last log entry. - - -Environment Variables -~~~~~~~~~~~~~~~~~~~~~ - -|project_name| uses some environment variables for automation: - -General: - BORG_REPO - When set, use the value to give the default repository location. If a command needs an archive - parameter, you can abbreviate as `::archive`. If a command needs a repository parameter, you - can either leave it away or abbreviate as `::`, if a positional parameter is required. - BORG_PASSPHRASE - When set, use the value to answer the passphrase question for encrypted repositories. - BORG_DISPLAY_PASSPHRASE - When set, use the value to answer the "display the passphrase for verification" question when defining a new passphrase for encrypted repositories. - BORG_LOGGING_CONF - When set, use the given filename as INI_-style logging configuration. - BORG_RSH - When set, use this command instead of ``ssh``. This can be used to specify ssh options, such as - a custom identity file ``ssh -i /path/to/private/key``. See ``man ssh`` for other options. - BORG_REMOTE_PATH - When set, use the given path/filename as remote path (default is "borg"). - Using ``--remote-path PATH`` commandline option overrides the environment variable. - TMPDIR - where temporary files are stored (might need a lot of temporary space for some operations) - -Some automatic "answerers" (if set, they automatically answer confirmation questions): - BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=no (or =yes) - For "Warning: Attempting to access a previously unknown unencrypted repository" - BORG_RELOCATED_REPO_ACCESS_IS_OK=no (or =yes) - For "Warning: The repository at location ... was previously located at ..." - BORG_CHECK_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) - For "Warning: 'check --repair' is an experimental feature that might result in data loss." - BORG_DELETE_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) - For "You requested to completely DELETE the repository *including* all archives it contains:" - BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) - For "recreate is an experimental feature." - - Note: answers are case sensitive. setting an invalid answer value might either give the default - answer or ask you interactively, depending on whether retries are allowed (they by default are - allowed). So please test your scripts interactively before making them a non-interactive script. - -Directories and files: - BORG_KEYS_DIR - Default to '~/.config/borg/keys'. This directory contains keys for encrypted repositories. - BORG_KEY_FILE - When set, use the given filename as repository key file. - BORG_CACHE_DIR - Default to '~/.cache/borg'. This directory contains the local cache and might need a lot - of space for dealing with big repositories). - -Building: - BORG_OPENSSL_PREFIX - Adds given OpenSSL header file directory to the default locations (setup.py). - BORG_LZ4_PREFIX - Adds given LZ4 header file directory to the default locations (setup.py). - - -Please note: - -- be very careful when using the "yes" sayers, the warnings with prompt exist for your / your data's security/safety -- also be very careful when putting your passphrase into a script, make sure it has appropriate file permissions - (e.g. mode 600, root:root). - - -.. _INI: https://docs.python.org/3.4/library/logging.config.html#configuration-file-format - -Resource Usage -~~~~~~~~~~~~~~ - -|project_name| might use a lot of resources depending on the size of the data set it is dealing with. - -CPU: - It won't go beyond 100% of 1 core as the code is currently single-threaded. - Especially higher zlib and lzma compression levels use significant amounts - of CPU cycles. - -Memory (RAM): - The chunks index and the files index are read into memory for performance - reasons. - Compression, esp. lzma compression with high levels might need substantial - amounts of memory. - -Temporary files: - Reading data and metadata from a FUSE mounted repository will consume about - the same space as the deduplicated chunks used to represent them in the - repository. - -Cache files: - Contains the chunks index and files index (plus a compressed collection of - single-archive chunk indexes). - -Chunks index: - Proportional to the amount of data chunks in your repo. Lots of chunks - in your repo imply a big chunks index. - It is possible to tweak the chunker params (see create options). - -Files index: - Proportional to the amount of files in your last backup. Can be switched - off (see create options), but next backup will be much slower if you do. - -Network: - If your repository is remote, all deduplicated (and optionally compressed/ - encrypted) data of course has to go over the connection (ssh: repo url). - If you use a locally mounted network filesystem, additionally some copy - operations used for transaction support also go over the connection. If - you backup multiple sources to one target repository, additional traffic - happens for cache resynchronization. - -In case you are interested in more details, please read the internals documentation. - -File systems -~~~~~~~~~~~~ - -We strongly recommend against using Borg (or any other database-like -software) on non-journaling file systems like FAT, since it is not -possible to assume any consistency in case of power failures (or a -sudden disconnect of an external drive or similar failures). - -While Borg uses a data store that is resilient against these failures -when used on journaling file systems, it is not possible to guarantee -this with some hardware -- independent of the software used. We don't -know a list of affected hardware. - -If you are suspicious whether your Borg repository is still consistent -and readable after one of the failures mentioned above occured, run -``borg check --verify-data`` to make sure it is consistent. - -Units -~~~~~ - -To display quantities, |project_name| takes care of respecting the -usual conventions of scale. Disk sizes are displayed in `decimal -`_, using powers of ten (so -``kB`` means 1000 bytes). For memory usage, `binary prefixes -`_ are used, and are -indicated using the `IEC binary prefixes -`_, -using powers of two (so ``KiB`` means 1024 bytes). - - -Date and Time -~~~~~~~~~~~~~ - -We format date and time conforming to ISO-8601, that is: YYYY-MM-DD and -HH:MM:SS (24h clock). - -For more information about that, see: https://xkcd.com/1179/ - -Unless otherwise noted, we display local date and time. -Internally, we store and process date and time as UTC. - -Common options -~~~~~~~~~~~~~~ - -All |project_name| commands share these options: - -.. include:: usage/common-options.rst.inc - -.. include:: usage/init.rst.inc - -Examples -~~~~~~~~ -:: - - # Local repository (default is to use encryption in repokey mode) - $ borg init /path/to/repo - - # Local repository (no encryption) - $ borg init --encryption=none /path/to/repo - - # Remote repository (accesses a remote borg via ssh) - $ borg init user@hostname:backup - - # Remote repository (store the key your home dir) - $ borg init --encryption=keyfile user@hostname:backup - -.. include:: usage/create.rst.inc - -Examples -~~~~~~~~ -:: - - # Backup ~/Documents into an archive named "my-documents" - $ borg create /path/to/repo::my-documents ~/Documents - - # same, but list all files as we process them - $ borg create --list /path/to/repo::my-documents ~/Documents - - # Backup ~/Documents and ~/src but exclude pyc files - $ borg create /path/to/repo::my-files \ - ~/Documents \ - ~/src \ - --exclude '*.pyc' - - # Backup home directories excluding image thumbnails (i.e. only - # /home/*/.thumbnails is excluded, not /home/*/*/.thumbnails) - $ borg create /path/to/repo::my-files /home \ - --exclude 're:^/home/[^/]+/\.thumbnails/' - - # Do the same using a shell-style pattern - $ borg create /path/to/repo::my-files /home \ - --exclude 'sh:/home/*/.thumbnails' - - # Backup the root filesystem into an archive named "root-YYYY-MM-DD" - # use zlib compression (good, but slow) - default is no compression - $ borg create -C zlib,6 /path/to/repo::root-{now:%Y-%m-%d} / --one-file-system - - # Make a big effort in fine granular deduplication (big chunk management - # overhead, needs a lot of RAM and disk space, see formula in internals - # docs - same parameters as borg < 1.0 or attic): - $ borg create --chunker-params 10,23,16,4095 /path/to/repo::small /smallstuff - - # Backup a raw device (must not be active/in use/mounted at that time) - $ dd if=/dev/sdx bs=10M | borg create /path/to/repo::my-sdx - - - # No compression (default) - $ borg create /path/to/repo::arch ~ - - # Super fast, low compression - $ borg create --compression lz4 /path/to/repo::arch ~ - - # Less fast, higher compression (N = 0..9) - $ borg create --compression zlib,N /path/to/repo::arch ~ - - # Even slower, even higher compression (N = 0..9) - $ borg create --compression lzma,N /path/to/repo::arch ~ - - # Use short hostname, user name and current time in archive name - $ borg create /path/to/repo::{hostname}-{user}-{now} ~ - $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%d_%H:%M:%S} ~ - -.. include:: usage/extract.rst.inc - -Examples -~~~~~~~~ -:: - - # Extract entire archive - $ borg extract /path/to/repo::my-files - - # Extract entire archive and list files while processing - $ borg extract --list /path/to/repo::my-files - - # Verify whether an archive could be successfully extracted, but do not write files to disk - $ borg extract --dry-run /path/to/repo::my-files - - # Extract the "src" directory - $ borg extract /path/to/repo::my-files home/USERNAME/src - - # Extract the "src" directory but exclude object files - $ borg extract /path/to/repo::my-files home/USERNAME/src --exclude '*.o' - - # Restore a raw device (must not be active/in use/mounted at that time) - $ borg extract --stdout /path/to/repo::my-sdx | dd of=/dev/sdx bs=10M - - -.. Note:: - - Currently, extract always writes into the current working directory ("."), - so make sure you ``cd`` to the right place before calling ``borg extract``. - -.. include:: usage/check.rst.inc - -.. include:: usage/rename.rst.inc - -Examples -~~~~~~~~ -:: - - $ borg create /path/to/repo::archivename ~ - $ borg list /path/to/repo - archivename Mon, 2016-02-15 19:50:19 - - $ borg rename /path/to/repo::archivename newname - $ borg list /path/to/repo - newname Mon, 2016-02-15 19:50:19 - - -.. include:: usage/list.rst.inc - -Examples -~~~~~~~~ -:: - - $ borg list /path/to/repo - Monday Mon, 2016-02-15 19:15:11 - repo Mon, 2016-02-15 19:26:54 - root-2016-02-15 Mon, 2016-02-15 19:36:29 - newname Mon, 2016-02-15 19:50:19 - ... - - $ borg list /path/to/repo::root-2016-02-15 - drwxr-xr-x root root 0 Mon, 2016-02-15 17:44:27 . - drwxrwxr-x root root 0 Mon, 2016-02-15 19:04:49 bin - -rwxr-xr-x root root 1029624 Thu, 2014-11-13 00:08:51 bin/bash - lrwxrwxrwx root root 0 Fri, 2015-03-27 20:24:26 bin/bzcmp -> bzdiff - -rwxr-xr-x root root 2140 Fri, 2015-03-27 20:24:22 bin/bzdiff - ... - - $ borg list /path/to/repo::archiveA --list-format="{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}" - drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 . - drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 code - drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 code/myproject - -rw-rw-r-- user user 1416192 Sun, 2015-02-01 11:00:00 code/myproject/file.ext - ... - - # see what is changed between archives, based on file modification time, size and file path - $ borg list /path/to/repo::archiveA --list-format="{mtime:%s}{TAB}{size}{TAB}{path}{LF}" |sort -n > /tmp/list.archiveA - $ borg list /path/to/repo::archiveB --list-format="{mtime:%s}{TAB}{size}{TAB}{path}{LF}" |sort -n > /tmp/list.archiveB - $ diff -y /tmp/list.archiveA /tmp/list.archiveB - 1422781200 0 . 1422781200 0 . - 1422781200 0 code 1422781200 0 code - 1422781200 0 code/myproject 1422781200 0 code/myproject - 1422781200 1416192 code/myproject/file.ext | 1454664653 1416192 code/myproject/file.ext - ... - - - -.. include:: usage/diff.rst.inc - -Examples -~~~~~~~~ -:: - - $ borg init testrepo - $ mkdir testdir - $ cd testdir - $ echo asdf > file1 - $ dd if=/dev/urandom bs=1M count=4 > file2 - $ touch file3 - $ borg create ../testrepo::archive1 . - - $ chmod a+x file1 - $ echo "something" >> file2 - $ borg create ../testrepo::archive2 . - - $ rm file3 - $ touch file4 - $ borg create ../testrepo::archive3 . - - $ cd .. - $ borg diff testrepo::archive1 archive2 - [-rw-r--r-- -> -rwxr-xr-x] file1 - +135 B -252 B file2 - - $ borg diff testrepo::archive2 archive3 - added 0 B file4 - removed 0 B file3 - - $ borg diff testrepo::archive1 archive3 - [-rw-r--r-- -> -rwxr-xr-x] file1 - +135 B -252 B file2 - added 0 B file4 - removed 0 B file3 - -.. include:: usage/delete.rst.inc - -Examples -~~~~~~~~ -:: - - # delete a single backup archive: - $ borg delete /path/to/repo::Monday - - # delete the whole repository and the related local cache: - $ borg delete /path/to/repo - You requested to completely DELETE the repository *including* all archives it contains: - repo Mon, 2016-02-15 19:26:54 - root-2016-02-15 Mon, 2016-02-15 19:36:29 - newname Mon, 2016-02-15 19:50:19 - Type 'YES' if you understand this and want to continue: YES - - -.. include:: usage/prune.rst.inc - -Examples -~~~~~~~~ - -Be careful, prune is a potentially dangerous command, it will remove backup -archives. - -The default of prune is to apply to **all archives in the repository** unless -you restrict its operation to a subset of the archives using ``--prefix``. -When using ``--prefix``, be careful to choose a good prefix - e.g. do not use a -prefix "foo" if you do not also want to match "foobar". - -It is strongly recommended to always run ``prune --dry-run ...`` first so you -will see what it would do without it actually doing anything. - -There is also a visualized prune example in ``docs/misc/prune-example.txt``. - -:: - - # Keep 7 end of day and 4 additional end of week archives. - # Do a dry-run without actually deleting anything. - $ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo - - # Same as above but only apply to archive names starting with the hostname - # of the machine followed by a "-" character: - $ borg prune --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo - - # Keep 7 end of day, 4 additional end of week archives, - # and an end of month archive for every month: - $ borg prune --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /path/to/repo - - # Keep all backups in the last 10 days, 4 additional end of week archives, - # and an end of month archive for every month: - $ borg prune --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /path/to/repo - - -.. include:: usage/info.rst.inc - -Examples -~~~~~~~~ -:: - - $ borg info /path/to/repo::root-2016-02-15 - Name: root-2016-02-15 - Fingerprint: 57c827621f21b000a8d363c1e163cc55983822b3afff3a96df595077a660be50 - Hostname: myhostname - Username: root - Time (start): Mon, 2016-02-15 19:36:29 - Time (end): Mon, 2016-02-15 19:39:26 - Command line: /usr/local/bin/borg create --list -C zlib,6 /path/to/repo::root-2016-02-15 / --one-file-system - Number of files: 38100 - - Original size Compressed size Deduplicated size - This archive: 1.33 GB 613.25 MB 571.64 MB - All archives: 1.63 GB 853.66 MB 584.12 MB - - Unique chunks Total chunks - Chunk index: 36858 48844 - - -.. include:: usage/mount.rst.inc - -Examples -~~~~~~~~ -borg mount/borgfs -+++++++++++++++++ -:: - - $ borg mount /path/to/repo::root-2016-02-15 /tmp/mymountpoint - $ ls /tmp/mymountpoint - bin boot etc home lib lib64 lost+found media mnt opt root sbin srv tmp usr var - $ fusermount -u /tmp/mymountpoint - -borgfs -++++++ -:: - - $ echo '/mnt/backup /tmp/myrepo fuse.borgfs defaults,noauto 0 0' >> /etc/fstab - $ echo '/mnt/backup::root-2016-02-15 /tmp/myarchive fuse.borgfs defaults,noauto 0 0' >> /etc/fstab - $ mount /tmp/myrepo - $ mount /tmp/myarchive - $ ls /tmp/myrepo - root-2016-02-01 root-2016-02-2015 - $ ls /tmp/myarchive - bin boot etc home lib lib64 lost+found media mnt opt root sbin srv tmp usr var - -.. Note:: - - ``borgfs`` will be automatically provided if you used a distribution - package, ``pip`` or ``setup.py`` to install |project_name|. Users of the - standalone binary will have to manually create a symlink (see - :ref:`pyinstaller-binary`). - -.. include:: usage/change-passphrase.rst.inc - -Examples -~~~~~~~~ -:: - - # Create a key file protected repository - $ borg init --encryption=keyfile -v /path/to/repo - Initializing repository at "/path/to/repo" - Enter new passphrase: - Enter same passphrase again: - Remember your passphrase. Your data will be inaccessible without it. - Key in "/root/.config/borg/keys/mnt_backup" created. - Keep this key safe. Your data will be inaccessible without it. - Synchronizing chunks cache... - Archives: 0, w/ cached Idx: 0, w/ outdated Idx: 0, w/o cached Idx: 0. - Done. - - # Change key file passphrase - $ borg change-passphrase -v /path/to/repo - Enter passphrase for key /root/.config/borg/keys/mnt_backup: - Enter new passphrase: - Enter same passphrase again: - Remember your passphrase. Your data will be inaccessible without it. - Key updated - - -.. include:: usage/serve.rst.inc - -Examples -~~~~~~~~ - -borg serve has special support for ssh forced commands (see ``authorized_keys`` -example below): it will detect that you use such a forced command and extract -the value of the ``--restrict-to-path`` option(s). -It will then parse the original command that came from the client, makes sure -that it is also ``borg serve`` and enforce path restriction(s) as given by the -forced command. That way, other options given by the client (like ``--info`` or -``--umask``) are preserved (and are not fixed by the forced command). - -:: - - # Allow an SSH keypair to only run borg, and only have access to /path/to/repo. - # Use key options to disable unneeded and potentially dangerous SSH functionality. - # This will help to secure an automated remote backup system. - $ cat ~/.ssh/authorized_keys - command="borg serve --restrict-to-path /path/to/repo",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-user-rc ssh-rsa AAAAB3[...] - - -.. include:: usage/upgrade.rst.inc - -Examples -~~~~~~~~ -:: - - # Upgrade the borg repository to the most recent version. - $ borg upgrade -v /path/to/repo - making a hardlink copy in /path/to/repo.upgrade-2016-02-15-20:51:55 - opening attic repository with borg and converting - no key file found for repository - converting repo index /path/to/repo/index.0 - converting 1 segments... - converting borg 0.xx to borg current - no key file found for repository - - -.. include:: usage/recreate.rst.inc - -Examples -~~~~~~~~ -:: - - # Make old (Attic / Borg 0.xx) archives deduplicate with Borg 1.x archives - # Archives created with Borg 1.1+ and the default chunker params are skipped (archive ID stays the same) - $ borg recreate /mnt/backup --chunker-params default --progress - - # Create a backup with little but fast compression - $ borg create /mnt/backup::archive /some/files --compression lz4 - # Then compress it - this might take longer, but the backup has already completed, so no inconsistencies - # from a long-running backup job. - $ borg recreate /mnt/backup::archive --compression zlib,9 - - # Remove unwanted files from all archives in a repository - $ borg recreate /mnt/backup -e /home/icke/Pictures/drunk_photos - - - # Change archive comment - $ borg create --comment "This is a comment" /mnt/backup::archivename ~ - $ borg info /mnt/backup::archivename - Name: archivename - Fingerprint: ... - Comment: This is a comment - ... - $ borg recreate --comment "This is a better comment" /mnt/backup::archivename - $ borg info /mnt/backup::archivename - Name: archivename - Fingerprint: ... - Comment: This is a better comment - ... - - -.. include:: usage/with-lock.rst.inc - - -.. include:: usage/break-lock.rst.inc - - -Miscellaneous Help ------------------- - -.. include:: usage/help.rst.inc - - -Debug Commands --------------- -There are some more commands (all starting with "debug-") which are all -**not intended for normal use** and **potentially very dangerous** if used incorrectly. - -They exist to improve debugging capabilities without direct system access, e.g. -in case you ever run into some severe malfunction. Use them only if you know -what you are doing or if a trusted |project_name| developer tells you what to do. - - -Additional Notes ----------------- - -Here are misc. notes about topics that are maybe not covered in enough detail in the usage section. - -Item flags -~~~~~~~~~~ - -``borg create --list`` outputs a list of all files, directories and other -file system items it considered (no matter whether they had content changes -or not). For each item, it prefixes a single-letter flag that indicates type -and/or status of the item. - -If you are interested only in a subset of that output, you can give e.g. -``--filter=AME`` and it will only show regular files with A, M or E status (see -below). - -A uppercase character represents the status of a regular file relative to the -"files" cache (not relative to the repo -- this is an issue if the files cache -is not used). Metadata is stored in any case and for 'A' and 'M' also new data -chunks are stored. For 'U' all data chunks refer to already existing chunks. - -- 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ) -- 'M' = regular file, modified -- 'U' = regular file, unchanged -- 'E' = regular file, an error happened while accessing/reading *this* file - -A lowercase character means a file type other than a regular file, -borg usually just stores their metadata: - -- 'd' = directory -- 'b' = block device -- 'c' = char device -- 'h' = regular file, hardlink (to already seen inodes) -- 's' = symlink -- 'f' = fifo - -Other flags used include: - -- 'i' = backup data was read from standard input (stdin) -- '-' = dry run, item was *not* backed up -- 'x' = excluded, item was *not* backed up -- '?' = missing status code (if you see this, please file a bug report!) - - ---chunker-params -~~~~~~~~~~~~~~~~ -The chunker params influence how input files are cut into pieces (chunks) -which are then considered for deduplication. They also have a big impact on -resource usage (RAM and disk space) as the amount of resources needed is -(also) determined by the total amount of chunks in the repository (see -`Indexes / Caches memory usage` for details). - -``--chunker-params=10,23,16,4095`` results in a fine-grained deduplication -and creates a big amount of chunks and thus uses a lot of resources to manage -them. This is good for relatively small data volumes and if the machine has a -good amount of free RAM and disk space. - -``--chunker-params=19,23,21,4095`` (default) results in a coarse-grained -deduplication and creates a much smaller amount of chunks and thus uses less -resources. This is good for relatively big data volumes and if the machine has -a relatively low amount of free RAM and disk space. - -If you already have made some archives in a repository and you then change -chunker params, this of course impacts deduplication as the chunks will be -cut differently. - -In the worst case (all files are big and were touched in between backups), this -will store all content into the repository again. - -Usually, it is not that bad though: - -- usually most files are not touched, so it will just re-use the old chunks - it already has in the repo -- files smaller than the (both old and new) minimum chunksize result in only - one chunk anyway, so the resulting chunks are same and deduplication will apply - -If you switch chunker params to save resources for an existing repo that -already has some backup archives, you will see an increasing effect over time, -when more and more files have been touched and stored again using the bigger -chunksize **and** all references to the smaller older chunks have been removed -(by deleting / pruning archives). - -If you want to see an immediate big effect on resource usage, you better start -a new repository when changing chunker params. - -For more details, see :ref:`chunker_details`. - ---read-special -~~~~~~~~~~~~~~ - -The --read-special option is special - you do not want to use it for normal -full-filesystem backups, but rather after carefully picking some targets for it. - -The option ``--read-special`` triggers special treatment for block and char -device files as well as FIFOs. Instead of storing them as such a device (or -FIFO), they will get opened, their content will be read and in the backup -archive they will show up like a regular file. - -Symlinks will also get special treatment if (and only if) they point to such -a special file: instead of storing them as a symlink, the target special file -will get processed as described above. - -One intended use case of this is backing up the contents of one or multiple -block devices, like e.g. LVM snapshots or inactive LVs or disk partitions. - -You need to be careful about what you include when using ``--read-special``, -e.g. if you include ``/dev/zero``, your backup will never terminate. - -Restoring such files' content is currently only supported one at a time via -``--stdout`` option (and you have to redirect stdout to where ever it shall go, -maybe directly into an existing device file of your choice or indirectly via -``dd``). - -To some extent, mounting a backup archive with the backups of special files -via ``borg mount`` and then loop-mounting the image files from inside the mount -point will work. If you plan to access a lot of data in there, it likely will -scale and perform better if you do not work via the FUSE mount. - -Example -+++++++ - -Imagine you have made some snapshots of logical volumes (LVs) you want to backup. - -.. note:: - - For some scenarios, this is a good method to get "crash-like" consistency - (I call it crash-like because it is the same as you would get if you just - hit the reset button or your machine would abrubtly and completely crash). - This is better than no consistency at all and a good method for some use - cases, but likely not good enough if you have databases running. - -Then you create a backup archive of all these snapshots. The backup process will -see a "frozen" state of the logical volumes, while the processes working in the -original volumes continue changing the data stored there. - -You also add the output of ``lvdisplay`` to your backup, so you can see the LV -sizes in case you ever need to recreate and restore them. - -After the backup has completed, you remove the snapshots again. :: - - $ # create snapshots here - $ lvdisplay > lvdisplay.txt - $ borg create --read-special /path/to/repo::arch lvdisplay.txt /dev/vg0/*-snapshot - $ # remove snapshots here - -Now, let's see how to restore some LVs from such a backup. :: - - $ borg extract /path/to/repo::arch lvdisplay.txt - $ # create empty LVs with correct sizes here (look into lvdisplay.txt). - $ # we assume that you created an empty root and home LV and overwrite it now: - $ borg extract --stdout /path/to/repo::arch dev/vg0/root-snapshot > /dev/vg0/root - $ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home - - -.. _append_only_mode: - -Append-only mode -~~~~~~~~~~~~~~~~ - -A repository can be made "append-only", which means that Borg will never overwrite or -delete committed data (append-only refers to the segment files, but borg will also -reject to delete the repository completely). This is useful for scenarios where a -backup client machine backups remotely to a backup server using ``borg serve``, since -a hacked client machine cannot delete backups on the server permanently. - -To activate append-only mode, edit the repository ``config`` file and add a line -``append_only=1`` to the ``[repository]`` section (or edit the line if it exists). - -In append-only mode Borg will create a transaction log in the ``transactions`` file, -where each line is a transaction and a UTC timestamp. - -In addition, ``borg serve`` can act as if a repository is in append-only mode with -its option ``--append-only``. This can be very useful for fine-tuning access control -in ``.ssh/authorized_keys`` :: - - command="borg serve --append-only ..." ssh-rsa - command="borg serve ..." ssh-rsa - -Example -+++++++ - -Suppose an attacker remotely deleted all backups, but your repository was in append-only -mode. A transaction log in this situation might look like this: :: - - transaction 1, UTC time 2016-03-31T15:53:27.383532 - transaction 5, UTC time 2016-03-31T15:53:52.588922 - transaction 11, UTC time 2016-03-31T15:54:23.887256 - transaction 12, UTC time 2016-03-31T15:55:54.022540 - transaction 13, UTC time 2016-03-31T15:55:55.472564 - -From your security logs you conclude the attacker gained access at 15:54:00 and all -the backups where deleted or replaced by compromised backups. From the log you know -that transactions 11 and later are compromised. Note that the transaction ID is the -name of the *last* file in the transaction. For example, transaction 11 spans files 6 -to 11. - -In a real attack you'll likely want to keep the compromised repository -intact to analyze what the attacker tried to achieve. It's also a good idea to make this -copy just in case something goes wrong during the recovery. Since recovery is done by -deleting some files, a hard link copy (``cp -al``) is sufficient. - -The first step to reset the repository to transaction 5, the last uncompromised transaction, -is to remove the ``hints.N`` and ``index.N`` files in the repository (these two files are -always expendable). In this example N is 13. - -Then remove or move all segment files from the segment directories in ``data/`` starting -with file 6:: - - rm data/**/{6..13} - -That's all to it. - -Drawbacks -+++++++++ - -As data is only appended, and nothing deleted, commands like ``prune`` or ``delete`` -won't free disk space, they merely tag data as deleted in a new transaction. - -Note that you can go back-and-forth between normal and append-only operation by editing -the configuration file, it's not a "one way trip". - -Further considerations -++++++++++++++++++++++ - -Append-only mode is not respected by tools other than Borg. ``rm`` still works on the -repository. Make sure that backup client machines only get to access the repository via -``borg serve``. - -Ensure that no remote access is possible if the repository is temporarily set to normal mode -for e.g. regular pruning. - -Further protections can be implemented, but are outside of Borg's scope. For example, -file system snapshots or wrapping ``borg serve`` to set special permissions or ACLs on -new data files. +.. raw:: html + Redirecting... + + + +.. toctree:: + usage/general + + usage/init + usage/create + usage/extract + usage/check + usage/rename + usage/list + usage/diff + usage/delete + usage/prune + usage/info + usage/mount + usage/key + usage/upgrade + usage/recreate + usage/tar + usage/serve + usage/lock + usage/benchmark + + usage/help + usage/debug + usage/notes diff --git a/docs/usage/benchmark.rst b/docs/usage/benchmark.rst new file mode 100644 index 00000000..27436a9b --- /dev/null +++ b/docs/usage/benchmark.rst @@ -0,0 +1 @@ +.. include:: benchmark_crud.rst.inc diff --git a/docs/usage/benchmark_crud.rst.inc b/docs/usage/benchmark_crud.rst.inc new file mode 100644 index 00000000..b76c091d --- /dev/null +++ b/docs/usage/benchmark_crud.rst.inc @@ -0,0 +1,85 @@ +.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! + +.. _borg_benchmark_crud: + +borg benchmark crud +------------------- +.. code-block:: none + + borg [common options] benchmark crud [options] REPO PATH + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------+------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+----------+------------------------------------------+ + | | ``REPO`` | repo to use for benchmark (must exist) | + +-------------------------------------------------------+----------+------------------------------------------+ + | | ``PATH`` | path were to create benchmark input data | + +-------------------------------------------------------+----------+------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------+------------------------------------------+ + + .. raw:: html + + + +.. only:: latex + + REPO + repo to use for benchmark (must exist) + PATH + path were to create benchmark input data + + + :ref:`common_options` + | + +Description +~~~~~~~~~~~ + +This command benchmarks borg CRUD (create, read, update, delete) operations. + +It creates input data below the given PATH and backups this data into the given REPO. +The REPO must already exist (it could be a fresh empty repo or an existing repo, the +command will create / read / update / delete some archives named borg-test-data\* there. + +Make sure you have free space there, you'll need about 1GB each (+ overhead). + +If your repository is encrypted and borg needs a passphrase to unlock the key, use: + +BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH + +Measurements are done with different input file sizes and counts. +The file contents are very artificial (either all zero or all random), +thus the measurement results do not necessarily reflect performance with real data. +Also, due to the kind of content used, no compression is used in these benchmarks. + +C- == borg create (1st archive creation, no compression, do not use files cache) + C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher. + C-R- == random files. no dedup, measuring throughput through all processing stages. + +R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk) + R-Z- == all zero files. Measuring heavily duplicated files. + R-R- == random files. No duplication here, measuring throughput through all processing + stages, except writing to disk. + +U- == borg create (2nd archive creation of unchanged input files, measure files cache speed) + The throughput value is kind of virtual here, it does not actually read the file. + U-Z- == needs to check the 2 all-zero chunks' existence in the repo. + U-R- == needs to check existence of a lot of different chunks in the repo. + +D- == borg delete archive (delete last remaining archive, measure deletion + compaction) + D-Z- == few chunks to delete / few segments to compact/remove. + D-R- == many chunks to delete / many segments to compact/remove. + +Please note that there might be quite some variance in these measurements. +Try multiple measurements and having a otherwise idle machine (and network, if you use it). \ No newline at end of file diff --git a/docs/usage/break-lock.rst.inc b/docs/usage/break-lock.rst.inc index 5fa1cda5..17d5ce10 100644 --- a/docs/usage/break-lock.rst.inc +++ b/docs/usage/break-lock.rst.inc @@ -4,20 +4,44 @@ borg break-lock --------------- -:: +.. code-block:: none - borg break-lock REPOSITORY + borg [common options] break-lock [options] [REPOSITORY] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------------+-----------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+----------------+-----------------------------------------+ + | | ``REPOSITORY`` | repository for which to break the locks | + +-------------------------------------------------------+----------------+-----------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------------+-----------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY repository for which to break the locks -`Common options`_ - | + + :ref:`common_options` + | Description ~~~~~~~~~~~ This command breaks the repository and cache locks. Please use carefully and only while no borg process (on any machine) is -trying to access the Cache or the Repository. +trying to access the Cache or the Repository. \ No newline at end of file diff --git a/docs/usage/change-passphrase.rst.inc b/docs/usage/change-passphrase.rst.inc index 3bb827a4..1114d350 100644 --- a/docs/usage/change-passphrase.rst.inc +++ b/docs/usage/change-passphrase.rst.inc @@ -4,19 +4,43 @@ borg change-passphrase ---------------------- -:: +.. code-block:: none - borg change-passphrase REPOSITORY + borg [common options] change-passphrase [options] [REPOSITORY] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------------+--+ + | **positional arguments** | + +-------------------------------------------------------+----------------+--+ + | | ``REPOSITORY`` | | + +-------------------------------------------------------+----------------+--+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------------+--+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY -`Common options`_ - | + + :ref:`common_options` + | Description ~~~~~~~~~~~ The key files used for repository encryption are optionally passphrase -protected. This command can be used to change this passphrase. +protected. This command can be used to change this passphrase. \ No newline at end of file diff --git a/docs/usage/check.rst b/docs/usage/check.rst new file mode 100644 index 00000000..143b0b4c --- /dev/null +++ b/docs/usage/check.rst @@ -0,0 +1 @@ +.. include:: check.rst.inc diff --git a/docs/usage/check.rst.inc b/docs/usage/check.rst.inc index 17d6cf49..cc61a76e 100644 --- a/docs/usage/check.rst.inc +++ b/docs/usage/check.rst.inc @@ -4,34 +4,80 @@ borg check ---------- -:: +.. code-block:: none - borg check REPOSITORY_OR_ARCHIVE + borg [common options] check [options] [REPOSITORY_OR_ARCHIVE] + +.. only:: html + + .. class:: borg-options-table + + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``REPOSITORY_OR_ARCHIVE`` | repository or archive to check consistency of | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--repository-only`` | only perform repository checks | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--archives-only`` | only perform archives checks | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--verify-data`` | perform cryptographic archive data integrity verification (conflicts with ``--repository-only``) | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--repair`` | attempt to repair any inconsistencies found | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--save-space`` | work slower, but using less space | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Archive filters** — Archive filters can be applied to repository targets. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-P PREFIX``, ``--prefix PREFIX`` | only consider archive names starting with this prefix. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-a GLOB``, ``--glob-archives GLOB`` | only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--sort-by KEYS`` | Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--first N`` | consider first N archives after other filters were applied | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--last N`` | consider last N archives after other filters were applied | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY_OR_ARCHIVE repository or archive to check consistency of -optional arguments - ``--repository-only`` - | only perform repository checks - ``--archives-only`` - | only perform archives checks - ``--verify-data`` - | perform cryptographic archive data integrity verification (conflicts with --repository-only) - ``--repair`` - | attempt to repair any inconsistencies found - ``--save-space`` - | work slower, but using less space - ``--last N`` - | only check last N archives (Default: all) - ``-P``, ``--prefix`` - | only consider archive names starting with this prefix - ``-p``, ``--progress`` - | show progress display while checking -`Common options`_ - | + optional arguments + --repository-only only perform repository checks + --archives-only only perform archives checks + --verify-data perform cryptographic archive data integrity verification (conflicts with ``--repository-only``) + --repair attempt to repair any inconsistencies found + --save-space work slower, but using less space + + + :ref:`common_options` + | + + Archive filters + -P PREFIX, --prefix PREFIX only consider archive names starting with this prefix. + -a GLOB, --glob-archives GLOB only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. + --sort-by KEYS Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp + --first N consider first N archives after other filters were applied + --last N consider last N archives after other filters were applied + Description ~~~~~~~~~~~ @@ -50,7 +96,7 @@ First, the underlying repository data files are checked: stored in the segments. - If you use a remote repo server via ssh:, the repo check is executed on the repo server without causing significant network traffic. -- The repository check can be skipped using the --archives-only option. +- The repository check can be skipped using the ``--archives-only`` option. Second, the consistency and correctness of the archive metadata is verified: @@ -72,16 +118,12 @@ Second, the consistency and correctness of the archive metadata is verified: decryption and this is always done client-side, because key access will be required). - The archive checks can be time consuming, they can be skipped using the - --repository-only option. + ``--repository-only`` option. -The --verify-data option will perform a full integrity verification (as opposed to +The ``--verify-data`` option will perform a full integrity verification (as opposed to checking the CRC32 of the segment) of data, which means reading the data from the repository, decrypting and decompressing it. This is a cryptographic verification, which will detect (accidental) corruption. For encrypted repositories it is tamper-resistant as well, unless the attacker has access to the keys. -It is also very slow. - ---verify-data only verifies data used by the archives specified with --last, ---prefix or an explicitly named archive. If none of these are passed, -all data in the repository is verified. +It is also very slow. \ No newline at end of file diff --git a/docs/usage/common-options.rst.inc b/docs/usage/common-options.rst.inc index 0a3c3c3e..093041c4 100644 --- a/docs/usage/common-options.rst.inc +++ b/docs/usage/common-options.rst.inc @@ -1,24 +1,18 @@ - ``-h``, ``--help`` - | show this help message and exit - ``--critical`` - | work on log level CRITICAL - ``--error`` - | work on log level ERROR - ``--warning`` - | work on log level WARNING (default) - ``--info``, ``-v``, ``--verbose`` - | work on log level INFO - ``--debug`` - | enable debug output, work on log level DEBUG - ``--lock-wait N`` - | wait for the lock, but max. N seconds (default: 1). - ``--show-version`` - | show/log the borg version - ``--show-rc`` - | show/log the return code (rc) - ``--no-files-cache`` - | do not load/update the file metadata cache used to detect unchanged files - ``--umask M`` - | set umask to M (local and remote, default: 0077) - ``--remote-path PATH`` - | set remote path to executable (default: "borg") \ No newline at end of file +-h, --help show this help message and exit +--critical work on log level CRITICAL +--error work on log level ERROR +--warning work on log level WARNING (default) +--info, -v, --verbose work on log level INFO +--debug enable debug output, work on log level DEBUG +--debug-topic TOPIC enable TOPIC debugging (can be specified multiple times). The logger path is borg.debug. if TOPIC is not fully qualified. +-p, --progress show progress information +--log-json Output one JSON object per log line instead of formatted text. +--lock-wait SECONDS wait at most SECONDS for acquiring a repository/cache lock (default: 1). +--show-version show/log the borg version +--show-rc show/log the return code (rc) +--no-files-cache do not load/update the file metadata cache used to detect unchanged files +--umask M set umask to M (local and remote, default: 0077) +--remote-path PATH use PATH as borg executable on the remote (default: "borg") +--remote-ratelimit RATE set remote network upload rate limit in kiByte/s (default: 0=unlimited) +--consider-part-files treat part files like normal files (e.g. to list/extract them) +--debug-profile FILE Write execution profile in Borg format into FILE. For local use a Python-compatible file can be generated by suffixing FILE with ".pyprof". diff --git a/docs/usage/create.rst b/docs/usage/create.rst new file mode 100644 index 00000000..ec49f5fa --- /dev/null +++ b/docs/usage/create.rst @@ -0,0 +1,69 @@ +.. include:: create.rst.inc + +Examples +~~~~~~~~ +:: + + # Backup ~/Documents into an archive named "my-documents" + $ borg create /path/to/repo::my-documents ~/Documents + + # same, but list all files as we process them + $ borg create --list /path/to/repo::my-documents ~/Documents + + # Backup ~/Documents and ~/src but exclude pyc files + $ borg create /path/to/repo::my-files \ + ~/Documents \ + ~/src \ + --exclude '*.pyc' + + # Backup home directories excluding image thumbnails (i.e. only + # /home//.thumbnails is excluded, not /home/*/*/.thumbnails etc.) + $ borg create /path/to/repo::my-files /home \ + --exclude 'sh:/home/*/.thumbnails' + + # Backup the root filesystem into an archive named "root-YYYY-MM-DD" + # use zlib compression (good, but slow) - default is lz4 (fast, low compression ratio) + $ borg create -C zlib,6 /path/to/repo::root-{now:%Y-%m-%d} / --one-file-system + + # Backup a remote host locally ("pull" style) using sshfs + $ mkdir sshfs-mount + $ sshfs root@example.com:/ sshfs-mount + $ cd sshfs-mount + $ borg create /path/to/repo::example.com-root-{now:%Y-%m-%d} . + $ cd .. + $ fusermount -u sshfs-mount + + # Make a big effort in fine granular deduplication (big chunk management + # overhead, needs a lot of RAM and disk space, see formula in internals + # docs - same parameters as borg < 1.0 or attic): + $ borg create --chunker-params 10,23,16,4095 /path/to/repo::small /smallstuff + + # Backup a raw device (must not be active/in use/mounted at that time) + $ dd if=/dev/sdx bs=10M | borg create /path/to/repo::my-sdx - + + # No compression (none) + $ borg create --compression none /path/to/repo::arch ~ + + # Super fast, low compression (lz4, default) + $ borg create /path/to/repo::arch ~ + + # Less fast, higher compression (zlib, N = 0..9) + $ borg create --compression zlib,N /path/to/repo::arch ~ + + # Even slower, even higher compression (lzma, N = 0..9) + $ borg create --compression lzma,N /path/to/repo::arch ~ + + # Only compress compressible data with lzma,N (N = 0..9) + $ borg create --compression auto,lzma,N /path/to/repo::arch ~ + + # Use short hostname, user name and current time in archive name + $ borg create /path/to/repo::{hostname}-{user}-{now} ~ + # Similar, use the same datetime format as borg 1.1 will have as default + $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%dT%H:%M:%S} ~ + # As above, but add nanoseconds + $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%dT%H:%M:%S.%f} ~ + + # Backing up relative paths by moving into the correct directory first + $ cd /home/user/Documents + # The root directory of the archive will be "projectA" + $ borg create /path/to/repo::daily-projectA-{now:%Y-%m-%d} projectA diff --git a/docs/usage/create.rst.inc b/docs/usage/create.rst.inc index 6e176a52..c70e2a8f 100644 --- a/docs/usage/create.rst.inc +++ b/docs/usage/create.rst.inc @@ -4,93 +4,230 @@ borg create ----------- -:: +.. code-block:: none - borg create ARCHIVE PATH + borg [common options] create [options] ARCHIVE [PATH...] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``ARCHIVE`` | name of archive to create (must be also a valid directory name) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``PATH`` | paths to archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-n``, ``--dry-run`` | do not create a backup archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-s``, ``--stats`` | print statistics for the created archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--list`` | output verbose list of items (files, dirs, ...) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--filter STATUSCHARS`` | only display items with the given status characters (see description) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--json`` | output stats as JSON. Implies ``--stats``. | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--no-cache-sync`` | experimental: do not synchronize the cache. Implies ``--no-files-cache``. | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Exclusion options** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-e PATTERN``, ``--exclude PATTERN`` | exclude paths matching PATTERN | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-from EXCLUDEFILE`` | read exclude patterns from EXCLUDEFILE, one per line | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--pattern PATTERN`` | experimental: include/exclude paths matching PATTERN | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--patterns-from PATTERNFILE`` | experimental: read include/exclude patterns from PATTERNFILE, one per line | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-caches`` | exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-if-present NAME`` | exclude directories that are tagged by containing a filesystem object with the given NAME | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--keep-exclude-tags``, ``--keep-tag-files`` | if tag objects are specified with ``--exclude-if-present``, don't omit the tag objects themselves from the backup archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Filesystem options** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-x``, ``--one-file-system`` | stay in the same file system and do not store mount points of other file systems | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--numeric-owner`` | only store numeric user and group identifiers | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--noatime`` | do not store atime into archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--noctime`` | do not store ctime into archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--ignore-inode`` | ignore inode data in the file metadata cache used to detect unchanged files. | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--read-special`` | open and read block and char device files as well as FIFOs as if they were regular files. Also follows symlinks pointing to these kinds of files. | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Archive options** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--comment COMMENT`` | add a comment text to the archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--timestamp TIMESTAMP`` | manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). Alternatively, give a reference file/directory. | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-c SECONDS``, ``--checkpoint-interval SECONDS`` | write checkpoint every SECONDS seconds (Default: 1800) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--chunker-params PARAMS`` | specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE). default: 19,23,21,4095 | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-C COMPRESSION``, ``--compression COMPRESSION`` | select compression algorithm, see the output of the "borg help compression" command for details. | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments ARCHIVE name of archive to create (must be also a valid directory name) PATH paths to archive -optional arguments - ``-n``, ``--dry-run`` - | do not create a backup archive - ``-s``, ``--stats`` - | print statistics for the created archive - ``-p``, ``--progress`` - | show progress display while creating the archive, showing Original, Compressed and Deduplicated sizes, followed by the Number of files seen and the path being processed, default: False - ``--list`` - | output verbose list of items (files, dirs, ...) - ``--filter STATUSCHARS`` - | only display items with the given status characters -`Common options`_ - | + optional arguments + -n, --dry-run do not create a backup archive + -s, --stats print statistics for the created archive + --list output verbose list of items (files, dirs, ...) + --filter STATUSCHARS only display items with the given status characters (see description) + --json output stats as JSON. Implies ``--stats``. + --no-cache-sync experimental: do not synchronize the cache. Implies ``--no-files-cache``. -Exclusion options - ``-e PATTERN``, ``--exclude PATTERN`` - | exclude paths matching PATTERN - ``--exclude-from EXCLUDEFILE`` - | read exclude patterns from EXCLUDEFILE, one per line - ``--exclude-caches`` - | exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) - ``--exclude-if-present FILENAME`` - | exclude directories that contain the specified file - ``--keep-tag-files`` - | keep tag files of excluded caches/directories -Filesystem options - ``-x``, ``--one-file-system`` - | stay in same file system, do not cross mount points - ``--numeric-owner`` - | only store numeric user and group identifiers - ``--ignore-inode`` - | ignore inode data in the file metadata cache used to detect unchanged files. - ``--read-special`` - | open and read block and char device files as well as FIFOs as if they were regular files. Also follows symlinks pointing to these kinds of files. + :ref:`common_options` + | + + Exclusion options + -e PATTERN, --exclude PATTERN exclude paths matching PATTERN + --exclude-from EXCLUDEFILE read exclude patterns from EXCLUDEFILE, one per line + --pattern PATTERN experimental: include/exclude paths matching PATTERN + --patterns-from PATTERNFILE experimental: read include/exclude patterns from PATTERNFILE, one per line + --exclude-caches exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) + --exclude-if-present NAME exclude directories that are tagged by containing a filesystem object with the given NAME + --keep-exclude-tags, --keep-tag-files if tag objects are specified with ``--exclude-if-present``, don't omit the tag objects themselves from the backup archive + + + Filesystem options + -x, --one-file-system stay in the same file system and do not store mount points of other file systems + --numeric-owner only store numeric user and group identifiers + --noatime do not store atime into archive + --noctime do not store ctime into archive + --ignore-inode ignore inode data in the file metadata cache used to detect unchanged files. + --read-special open and read block and char device files as well as FIFOs as if they were regular files. Also follows symlinks pointing to these kinds of files. + + + Archive options + --comment COMMENT add a comment text to the archive + --timestamp TIMESTAMP manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). Alternatively, give a reference file/directory. + -c SECONDS, --checkpoint-interval SECONDS write checkpoint every SECONDS seconds (Default: 1800) + --chunker-params PARAMS specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE). default: 19,23,21,4095 + -C COMPRESSION, --compression COMPRESSION select compression algorithm, see the output of the "borg help compression" command for details. -Archive options - ``--comment COMMENT`` - | add a comment text to the archive - ``--timestamp yyyy-mm-ddThh:mm:ss`` - | manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory. - ``-c SECONDS``, ``--checkpoint-interval SECONDS`` - | write checkpoint every SECONDS seconds (Default: 1800) - ``--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE`` - | specify the chunker parameters. default: 19,23,21,4095 - ``-C COMPRESSION``, ``--compression COMPRESSION`` - | select compression algorithm (and level): - | none == no compression (default), - | auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L] - | being any valid compression algorithm (and optional level), - | lz4 == lz4, - | zlib == zlib (default level 6), - | zlib,0 .. zlib,9 == zlib (with level 0..9), - | lzma == lzma (default level 6), - | lzma,0 .. lzma,9 == lzma (with level 0..9). - ``--compression-from COMPRESSIONCONFIG`` - | read compression patterns from COMPRESSIONCONFIG, one per line Description ~~~~~~~~~~~ This command creates a backup archive containing all files found while recursively -traversing all paths specified. The archive will consume almost no disk space for -files or parts of files that have already been stored in other archives. +traversing all paths specified. Paths are added to the archive as they are given, +that means if relative paths are desired, the command has to be run from the correct +directory. + +When giving '-' as path, borg will read data from standard input and create a +file 'stdin' in the created archive from that data. + +The archive will consume almost no disk space for files or parts of files that +have already been stored in other archives. The archive name needs to be unique. It must not end in '.checkpoint' or '.checkpoint.N' (with N being a number), because these names are used for checkpoints and treated in special ways. -In the archive name, you may use the following format tags: -{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}, {uuid4} +In the archive name, you may use the following placeholders: +{now}, {utcnow}, {fqdn}, {hostname}, {user} and some others. -To speed up pulling backups over sshfs and similar network file systems which do -not provide correct inode information the --ignore-inode flag can be used. This -potentially decreases reliability of change detection, while avoiding always reading -all files on these file systems. +Backup speed is increased by not reprocessing files that are already part of +existing archives and weren't modified. Normally, detecting file modifications +will take inode information into consideration. This is problematic for files +located on sshfs and similar network file systems which do not provide stable +inode numbers, such files will always be considered modified. The +``--ignore-inode`` flag can be used to prevent this and improve performance. +This flag will reduce reliability of change detection however, with files +considered unmodified as long as their size and modification time are unchanged. + +The mount points of filesystems or filesystem snapshots should be the same for every +creation of a new archive to ensure fast operation. This is because the file cache that +is used to determine changed files quickly uses absolute filenames. +If this is not possible, consider creating a bind mount to a stable location. + +The ``--progress`` option shows (from left to right) Original, Compressed and Deduplicated +(O, C and D, respectively), then the Number of files (N) processed so far, followed by +the currently processed path. See the output of the "borg help patterns" command for more help on exclude patterns. See the output of the "borg help placeholders" command for more help on placeholders. + +.. man NOTES + +The ``--exclude`` patterns are not like tar. In tar ``--exclude`` .bundler/gems will +exclude foo/.bundler/gems. In borg it will not, you need to use ``--exclude`` +'\*/.bundler/gems' to get the same effect. See ``borg help patterns`` for +more information. + +In addition to using ``--exclude`` patterns, it is possible to use +``--exclude-if-present`` to specify the name of a filesystem object (e.g. a file +or folder name) which, when contained within another folder, will prevent the +containing folder from being backed up. By default, the containing folder and +all of its contents will be omitted from the backup. If, however, you wish to +only include the objects specified by ``--exclude-if-present`` in your backup, +and not include any other contents of the containing folder, this can be enabled +through using the ``--keep-exclude-tags`` option. + +Item flags +++++++++++ + +``--list`` outputs a list of all files, directories and other +file system items it considered (no matter whether they had content changes +or not). For each item, it prefixes a single-letter flag that indicates type +and/or status of the item. + +If you are interested only in a subset of that output, you can give e.g. +``--filter=AME`` and it will only show regular files with A, M or E status (see +below). + +A uppercase character represents the status of a regular file relative to the +"files" cache (not relative to the repo -- this is an issue if the files cache +is not used). Metadata is stored in any case and for 'A' and 'M' also new data +chunks are stored. For 'U' all data chunks refer to already existing chunks. + +- 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ) +- 'M' = regular file, modified +- 'U' = regular file, unchanged +- 'E' = regular file, an error happened while accessing/reading *this* file + +A lowercase character means a file type other than a regular file, +borg usually just stores their metadata: + +- 'd' = directory +- 'b' = block device +- 'c' = char device +- 'h' = regular file, hardlink (to already seen inodes) +- 's' = symlink +- 'f' = fifo + +Other flags used include: + +- 'i' = backup data was read from standard input (stdin) +- '-' = dry run, item was *not* backed up +- 'x' = excluded, item was *not* backed up +- '?' = missing status code (if you see this, please file a bug report!) \ No newline at end of file diff --git a/docs/usage/debug-delete-obj.rst.inc b/docs/usage/debug-delete-obj.rst.inc deleted file mode 100644 index 4fcfb48f..00000000 --- a/docs/usage/debug-delete-obj.rst.inc +++ /dev/null @@ -1,23 +0,0 @@ -.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! - -.. _borg_debug-delete-obj: - -borg debug-delete-obj ---------------------- -:: - - borg debug-delete-obj REPOSITORY IDs - -positional arguments - REPOSITORY - repository to use - IDs - hex object ID(s) to delete from the repo - -`Common options`_ - | - -Description -~~~~~~~~~~~ - -This command deletes objects from the repository. diff --git a/docs/usage/debug-dump-archive-items.rst.inc b/docs/usage/debug-dump-archive-items.rst.inc deleted file mode 100644 index 63c39546..00000000 --- a/docs/usage/debug-dump-archive-items.rst.inc +++ /dev/null @@ -1,21 +0,0 @@ -.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! - -.. _borg_debug-dump-archive-items: - -borg debug-dump-archive-items ------------------------------ -:: - - borg debug-dump-archive-items ARCHIVE - -positional arguments - ARCHIVE - archive to dump - -`Common options`_ - | - -Description -~~~~~~~~~~~ - -This command dumps raw (but decrypted and decompressed) archive items (only metadata) to files. diff --git a/docs/usage/debug-get-obj.rst.inc b/docs/usage/debug-get-obj.rst.inc deleted file mode 100644 index a0b3f457..00000000 --- a/docs/usage/debug-get-obj.rst.inc +++ /dev/null @@ -1,25 +0,0 @@ -.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! - -.. _borg_debug-get-obj: - -borg debug-get-obj ------------------- -:: - - borg debug-get-obj REPOSITORY ID PATH - -positional arguments - REPOSITORY - repository to use - ID - hex object ID to get from the repo - PATH - file to write object data into - -`Common options`_ - | - -Description -~~~~~~~~~~~ - -This command gets an object from the repository. diff --git a/docs/usage/debug-put-obj.rst.inc b/docs/usage/debug-put-obj.rst.inc deleted file mode 100644 index d03ace84..00000000 --- a/docs/usage/debug-put-obj.rst.inc +++ /dev/null @@ -1,23 +0,0 @@ -.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! - -.. _borg_debug-put-obj: - -borg debug-put-obj ------------------- -:: - - borg debug-put-obj REPOSITORY PATH - -positional arguments - REPOSITORY - repository to use - PATH - file(s) to read and create object(s) from - -`Common options`_ - | - -Description -~~~~~~~~~~~ - -This command puts objects into the repository. diff --git a/docs/usage/debug.rst b/docs/usage/debug.rst new file mode 100644 index 00000000..0dc54ae3 --- /dev/null +++ b/docs/usage/debug.rst @@ -0,0 +1,34 @@ +Debugging Facilities +-------------------- + +There is a ``borg debug`` command that has some subcommands which are all +**not intended for normal use** and **potentially very dangerous** if used incorrectly. + +For example, ``borg debug put-obj`` and ``borg debug delete-obj`` will only do +what their name suggests: put objects into repo / delete objects from repo. + +Please note: + +- they will not update the chunks cache (chunks index) about the object +- they will not update the manifest (so no automatic chunks index resync is triggered) +- they will not check whether the object is in use (e.g. before delete-obj) +- they will not update any metadata which may point to the object + +They exist to improve debugging capabilities without direct system access, e.g. +in case you ever run into some severe malfunction. Use them only if you know +what you are doing or if a trusted Borg developer tells you what to do. + +Borg has a ``--debug-topic TOPIC`` option to enable specific debugging messages. Topics +are generally not documented. + +A ``--debug-profile FILE`` option exists which writes a profile of the main program's +execution to a file. The format of these files is not directly compatible with the +Python profiling tools, since these use the "marshal" format, which is not intended +to be secure (quoting the Python docs: "Never unmarshal data received from an untrusted +or unauthenticated source."). + +The ``borg debug profile-convert`` command can be used to take a Borg profile and convert +it to a profile file that is compatible with the Python tools. + +Additionally, if the filename specified for ``--debug-profile`` ends with ".pyprof" a +Python compatible profile is generated. This is only intended for local use by developers. diff --git a/docs/usage/delete.rst b/docs/usage/delete.rst new file mode 100644 index 00000000..a5017681 --- /dev/null +++ b/docs/usage/delete.rst @@ -0,0 +1,16 @@ +.. include:: delete.rst.inc + +Examples +~~~~~~~~ +:: + + # delete a single backup archive: + $ borg delete /path/to/repo::Monday + + # delete the whole repository and the related local cache: + $ borg delete /path/to/repo + You requested to completely DELETE the repository *including* all archives it contains: + repo Mon, 2016-02-15 19:26:54 + root-2016-02-15 Mon, 2016-02-15 19:36:29 + newname Mon, 2016-02-15 19:50:19 + Type 'YES' if you understand this and want to continue: YES diff --git a/docs/usage/delete.rst.inc b/docs/usage/delete.rst.inc index 87451ec5..e965b830 100644 --- a/docs/usage/delete.rst.inc +++ b/docs/usage/delete.rst.inc @@ -4,32 +4,81 @@ borg delete ----------- -:: +.. code-block:: none - borg delete TARGET + borg [common options] delete [options] [TARGET] + +.. only:: html + + .. class:: borg-options-table + + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``TARGET`` | archive or repository to delete | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-s``, ``--stats`` | print statistics for the deleted archive | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--cache-only`` | delete only the local cache for the given repository | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--force`` | force deletion of corrupted archives, use ``--force --force`` in case ``--force`` does not work. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--save-space`` | work slower, but using less space | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Archive filters** — Archive filters can be applied to repository targets. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-P PREFIX``, ``--prefix PREFIX`` | only consider archive names starting with this prefix. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-a GLOB``, ``--glob-archives GLOB`` | only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--sort-by KEYS`` | Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--first N`` | consider first N archives after other filters were applied | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--last N`` | consider last N archives after other filters were applied | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments TARGET archive or repository to delete -optional arguments - ``-p``, ``--progress`` - | show progress display while deleting a single archive - ``-s``, ``--stats`` - | print statistics for the deleted archive - ``-c``, ``--cache-only`` - | delete only the local cache for the given repository - ``--force`` - | force deletion of corrupted archives - ``--save-space`` - | work slower, but using less space -`Common options`_ - | + optional arguments + -s, --stats print statistics for the deleted archive + --cache-only delete only the local cache for the given repository + --force force deletion of corrupted archives, use ``--force --force`` in case ``--force`` does not work. + --save-space work slower, but using less space + + + :ref:`common_options` + | + + Archive filters + -P PREFIX, --prefix PREFIX only consider archive names starting with this prefix. + -a GLOB, --glob-archives GLOB only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. + --sort-by KEYS Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp + --first N consider first N archives after other filters were applied + --last N consider last N archives after other filters were applied + Description ~~~~~~~~~~~ This command deletes an archive from the repository or the complete repository. Disk space is reclaimed accordingly. If you delete the complete repository, the -local cache for it (if any) is also deleted. +local cache for it (if any) is also deleted. \ No newline at end of file diff --git a/docs/usage/diff.rst b/docs/usage/diff.rst new file mode 100644 index 00000000..e2972443 --- /dev/null +++ b/docs/usage/diff.rst @@ -0,0 +1,36 @@ +.. include:: diff.rst.inc + +Examples +~~~~~~~~ +:: + + $ borg init -e=none testrepo + $ mkdir testdir + $ cd testdir + $ echo asdf > file1 + $ dd if=/dev/urandom bs=1M count=4 > file2 + $ touch file3 + $ borg create ../testrepo::archive1 . + + $ chmod a+x file1 + $ echo "something" >> file2 + $ borg create ../testrepo::archive2 . + + $ rm file3 + $ touch file4 + $ borg create ../testrepo::archive3 . + + $ cd .. + $ borg diff testrepo::archive1 archive2 + [-rw-r--r-- -> -rwxr-xr-x] file1 + +135 B -252 B file2 + + $ borg diff testrepo::archive2 archive3 + added 0 B file4 + removed 0 B file3 + + $ borg diff testrepo::archive1 archive3 + [-rw-r--r-- -> -rwxr-xr-x] file1 + +135 B -252 B file2 + added 0 B file4 + removed 0 B file3 diff --git a/docs/usage/diff.rst.inc b/docs/usage/diff.rst.inc index 9836af57..ef5b831a 100644 --- a/docs/usage/diff.rst.inc +++ b/docs/usage/diff.rst.inc @@ -4,40 +4,97 @@ borg diff --------- -:: +.. code-block:: none - borg diff ARCHIVE1 ARCHIVE2 PATH + borg [common options] diff [options] REPO_ARCHIVE1 ARCHIVE2 [PATH...] -positional arguments - ARCHIVE1 - archive +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``REPO_ARCHIVE1`` | repository location and ARCHIVE1 name | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``ARCHIVE2`` | ARCHIVE2 name (no repository location allowed) | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``PATH`` | paths of items inside the archives to compare; patterns are supported | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--numeric-owner`` | only consider numeric user and group identifiers | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--same-chunker-params`` | Override check of chunker parameters. | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--sort`` | Sort the output lines by file path. | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | **Exclusion options** | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``-e PATTERN``, ``--exclude PATTERN`` | exclude paths matching PATTERN | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-from EXCLUDEFILE`` | read exclude patterns from EXCLUDEFILE, one per line | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--pattern PATTERN`` | experimental: include/exclude paths matching PATTERN | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--patterns-from PATTERNFILE`` | experimental: read include/exclude patterns from PATTERNFILE, one per line | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-caches`` | exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-if-present NAME`` | exclude directories that are tagged by containing a filesystem object with the given NAME | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | | ``--keep-exclude-tags``, ``--keep-tag-files`` | if tag objects are specified with ``--exclude-if-present``, don't omit the tag objects themselves from the backup archive | + +-------------------------------------------------------+-----------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex + + REPO_ARCHIVE1 + repository location and ARCHIVE1 name ARCHIVE2 - archive to compare with ARCHIVE1 (no repository location) + ARCHIVE2 name (no repository location allowed) PATH - paths to compare; patterns are supported + paths of items inside the archives to compare; patterns are supported -optional arguments - ``-e PATTERN``, ``--exclude PATTERN`` - | exclude paths matching PATTERN - ``--exclude-from EXCLUDEFILE`` - | read exclude patterns from EXCLUDEFILE, one per line - ``--numeric-owner`` - | only consider numeric user and group identifiers - ``--same-chunker-params`` - | Override check of chunker parameters. - ``--sort`` - | Sort the output lines by file path. -`Common options`_ - | + optional arguments + --numeric-owner only consider numeric user and group identifiers + --same-chunker-params Override check of chunker parameters. + --sort Sort the output lines by file path. + + + :ref:`common_options` + | + + Exclusion options + -e PATTERN, --exclude PATTERN exclude paths matching PATTERN + --exclude-from EXCLUDEFILE read exclude patterns from EXCLUDEFILE, one per line + --pattern PATTERN experimental: include/exclude paths matching PATTERN + --patterns-from PATTERNFILE experimental: read include/exclude patterns from PATTERNFILE, one per line + --exclude-caches exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) + --exclude-if-present NAME exclude directories that are tagged by containing a filesystem object with the given NAME + --keep-exclude-tags, --keep-tag-files if tag objects are specified with ``--exclude-if-present``, don't omit the tag objects themselves from the backup archive + Description ~~~~~~~~~~~ -This command finds differences in files (contents, user, group, mode) between archives. +This command finds differences (file contents, user/group/mode) between archives. -Both archives need to be in the same repository, and a repository location may only -be specified for ARCHIVE1. +A repository location and an archive name must be specified for REPO_ARCHIVE1. +ARCHIVE2 is just another archive name in same repository (no repository location +allowed). For archives created with Borg 1.1 or newer diff automatically detects whether the archives are created with the same chunker params. If so, only chunk IDs @@ -45,7 +102,7 @@ are compared, which is very fast. For archives prior to Borg 1.1 chunk contents are compared by default. If you did not create the archives with different chunker params, -pass --same-chunker-params. +pass ``--same-chunker-params``. Note that the chunker params changed from Borg 0.xx to 1.0. -See the output of the "borg help patterns" command for more help on exclude patterns. +See the output of the "borg help patterns" command for more help on exclude patterns. \ No newline at end of file diff --git a/docs/usage/export-tar.rst.inc b/docs/usage/export-tar.rst.inc new file mode 100644 index 00000000..9fa16e40 --- /dev/null +++ b/docs/usage/export-tar.rst.inc @@ -0,0 +1,116 @@ +.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! + +.. _borg_export-tar: + +borg export-tar +--------------- +.. code-block:: none + + borg [common options] export-tar [options] ARCHIVE FILE [PATH...] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``ARCHIVE`` | archive to export | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``FILE`` | output tar file. "-" to write to stdout instead. | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``PATH`` | paths to extract; patterns are supported | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--tar-filter`` | filter program to pipe data through | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--list`` | output verbose list of items (files, dirs, ...) | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | **Exclusion options** | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``-e PATTERN``, ``--exclude PATTERN`` | exclude paths matching PATTERN | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--exclude-from EXCLUDEFILE`` | read exclude patterns from EXCLUDEFILE, one per line | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--pattern PATTERN`` | experimental: include/exclude paths matching PATTERN | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--patterns-from PATTERNFILE`` | experimental: read include/exclude patterns from PATTERNFILE, one per line | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--strip-components NUMBER`` | Remove the specified number of leading path elements. Paths with fewer elements will be silently skipped. | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex + + ARCHIVE + archive to export + FILE + output tar file. "-" to write to stdout instead. + PATH + paths to extract; patterns are supported + + + optional arguments + --tar-filter filter program to pipe data through + --list output verbose list of items (files, dirs, ...) + + + :ref:`common_options` + | + + Exclusion options + -e PATTERN, --exclude PATTERN exclude paths matching PATTERN + --exclude-from EXCLUDEFILE read exclude patterns from EXCLUDEFILE, one per line + --pattern PATTERN experimental: include/exclude paths matching PATTERN + --patterns-from PATTERNFILE experimental: read include/exclude patterns from PATTERNFILE, one per line + --strip-components NUMBER Remove the specified number of leading path elements. Paths with fewer elements will be silently skipped. + + +Description +~~~~~~~~~~~ + +This command creates a tarball from an archive. + +When giving '-' as the output FILE, Borg will write a tar stream to standard output. + +By default (``--tar-filter=auto``) Borg will detect whether the FILE should be compressed +based on its file extension and pipe the tarball through an appropriate filter +before writing it to FILE: + +- .tar.gz: gzip +- .tar.bz2: bzip2 +- .tar.xz: xz + +Alternatively a ``--tar-filter`` program may be explicitly specified. It should +read the uncompressed tar stream from stdin and write a compressed/filtered +tar stream to stdout. + +The generated tarball uses the GNU tar format. + +export-tar is a lossy conversion: +BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported. +Timestamp resolution is limited to whole seconds, not the nanosecond resolution +otherwise supported by Borg. + +A ``--sparse`` option (as found in borg extract) is not supported. + +By default the entire archive is extracted but a subset of files and directories +can be selected by passing a list of ``PATHs`` as arguments. +The file selection can further be restricted by using the ``--exclude`` option. + +See the output of the "borg help patterns" command for more help on exclude patterns. + +``--progress`` can be slower than no progress display, since it makes one additional +pass over the archive metadata. \ No newline at end of file diff --git a/docs/usage/extract.rst b/docs/usage/extract.rst new file mode 100644 index 00000000..292f84bd --- /dev/null +++ b/docs/usage/extract.rst @@ -0,0 +1,23 @@ +.. include:: extract.rst.inc + +Examples +~~~~~~~~ +:: + + # Extract entire archive + $ borg extract /path/to/repo::my-files + + # Extract entire archive and list files while processing + $ borg extract --list /path/to/repo::my-files + + # Verify whether an archive could be successfully extracted, but do not write files to disk + $ borg extract --dry-run /path/to/repo::my-files + + # Extract the "src" directory + $ borg extract /path/to/repo::my-files home/USERNAME/src + + # Extract the "src" directory but exclude object files + $ borg extract /path/to/repo::my-files home/USERNAME/src --exclude '*.o' + + # Restore a raw device (must not be active/in use/mounted at that time) + $ borg extract --stdout /path/to/repo::my-sdx | dd of=/dev/sdx bs=10M diff --git a/docs/usage/extract.rst.inc b/docs/usage/extract.rst.inc index c68eaa76..ef9a0c75 100644 --- a/docs/usage/extract.rst.inc +++ b/docs/usage/extract.rst.inc @@ -4,36 +4,84 @@ borg extract ------------ -:: +.. code-block:: none - borg extract ARCHIVE PATH + borg [common options] extract [options] ARCHIVE [PATH...] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``ARCHIVE`` | archive to extract | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``PATH`` | paths to extract; patterns are supported | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--list`` | output verbose list of items (files, dirs, ...) | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``-n``, ``--dry-run`` | do not actually change any files | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--numeric-owner`` | only obey numeric user and group identifiers | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--stdout`` | write all extracted data to stdout | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--sparse`` | create holes in output sparse file from all-zero chunks | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | **Exclusion options** | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``-e PATTERN``, ``--exclude PATTERN`` | exclude paths matching PATTERN | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--exclude-from EXCLUDEFILE`` | read exclude patterns from EXCLUDEFILE, one per line | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--pattern PATTERN`` | experimental: include/exclude paths matching PATTERN | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--patterns-from PATTERNFILE`` | experimental: read include/exclude patterns from PATTERNFILE, one per line | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + | | ``--strip-components NUMBER`` | Remove the specified number of leading path elements. Paths with fewer elements will be silently skipped. | + +-------------------------------------------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments ARCHIVE archive to extract PATH paths to extract; patterns are supported -optional arguments - ``--list`` - | output verbose list of items (files, dirs, ...) - ``-n``, ``--dry-run`` - | do not actually change any files - ``-e PATTERN``, ``--exclude PATTERN`` - | exclude paths matching PATTERN - ``--exclude-from EXCLUDEFILE`` - | read exclude patterns from EXCLUDEFILE, one per line - ``--numeric-owner`` - | only obey numeric user and group identifiers - ``--strip-components NUMBER`` - | Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped. - ``--stdout`` - | write all extracted data to stdout - ``--sparse`` - | create holes in output sparse file from all-zero chunks -`Common options`_ - | + optional arguments + --list output verbose list of items (files, dirs, ...) + -n, --dry-run do not actually change any files + --numeric-owner only obey numeric user and group identifiers + --stdout write all extracted data to stdout + --sparse create holes in output sparse file from all-zero chunks + + + :ref:`common_options` + | + + Exclusion options + -e PATTERN, --exclude PATTERN exclude paths matching PATTERN + --exclude-from EXCLUDEFILE read exclude patterns from EXCLUDEFILE, one per line + --pattern PATTERN experimental: include/exclude paths matching PATTERN + --patterns-from PATTERNFILE experimental: read include/exclude patterns from PATTERNFILE, one per line + --strip-components NUMBER Remove the specified number of leading path elements. Paths with fewer elements will be silently skipped. + Description ~~~~~~~~~~~ @@ -48,3 +96,11 @@ See the output of the "borg help patterns" command for more help on exclude patt By using ``--dry-run``, you can do all extraction steps except actually writing the output data: reading metadata and data chunks from the repo, checking the hash/hmac, decrypting, decompressing. + +``--progress`` can be slower than no progress display, since it makes one additional +pass over the archive metadata. + +.. note:: + + Currently, extract always writes into the current working directory ("."), + so make sure you ``cd`` to the right place before calling ``borg extract``. \ No newline at end of file diff --git a/docs/usage/general.rst b/docs/usage/general.rst new file mode 100644 index 00000000..5629aa5b --- /dev/null +++ b/docs/usage/general.rst @@ -0,0 +1,40 @@ +General +------- + +Borg consists of a number of commands. Each command accepts +a number of arguments and options and interprets various environment variables. +The following sections will describe each command in detail. + +Commands, options, parameters, paths and such are ``set in fixed-width``. +Option values are `underlined`. Borg has few options accepting a fixed set +of values (e.g. ``--encryption`` of :ref:`borg_init`). + +.. container:: experimental + + Experimental features are marked with red stripes on the sides, like this paragraph. + + Experimental features are not stable, which means that they may be changed in incompatible + ways or even removed entirely without prior notice in following releases. + +.. include:: ../usage_general.rst.inc + +In case you are interested in more details (like formulas), please see +:ref:`internals`. For details on the available JSON output, refer to +:ref:`json_output`. + +.. _common_options: + +Common options +~~~~~~~~~~~~~~ + +All Borg commands share these options: + +.. include:: common-options.rst.inc + +Examples +~~~~~~~~ +:: + + # Create an archive and log: borg version, files list, return code + $ borg create --show-version --list --show-rc /path/to/repo::my-files files + diff --git a/docs/usage/help.rst b/docs/usage/help.rst new file mode 100644 index 00000000..a23f0420 --- /dev/null +++ b/docs/usage/help.rst @@ -0,0 +1,4 @@ +Miscellaneous Help +------------------ + +.. include:: help.rst.inc diff --git a/docs/usage/help.rst.inc b/docs/usage/help.rst.inc index 4d7c776a..152e01b5 100644 --- a/docs/usage/help.rst.inc +++ b/docs/usage/help.rst.inc @@ -1,81 +1,44 @@ .. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! -.. _borg_placeholders: - -borg help placeholders -~~~~~~~~~~~~~~~~~~~~~~ - - -Repository (or Archive) URLs and --prefix values support these placeholders: - -{hostname} - - The (short) hostname of the machine. - -{fqdn} - - The full name of the machine. - -{now} - - The current local date and time. - -{utcnow} - - The current UTC date and time. - -{user} - - The user name (or UID, if no name is available) of the user running borg. - -{pid} - - The current process ID. - -Examples:: - - borg create /path/to/repo::{hostname}-{user}-{utcnow} ... - borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ... - borg prune --prefix '{hostname}-' ... - .. _borg_patterns: borg help patterns ~~~~~~~~~~~~~~~~~~ -Exclusion patterns support four separate styles, fnmatch, shell, regular -expressions and path prefixes. By default, fnmatch is used. If followed -by a colon (':') the first two characters of a pattern are used as a +File patterns support these styles: fnmatch, shell, regular expressions, +path prefixes and path full-matches. By default, fnmatch is used for +``--exclude`` patterns and shell-style is used for the experimental ``--pattern`` +option. + +If followed by a colon (':') the first two characters of a pattern are used as a style selector. Explicit style selection is necessary when a non-default style is desired or when the desired pattern starts with two alphanumeric characters followed by a colon (i.e. `aa:something/*`). `Fnmatch `_, selector `fm:` - - This is the default style. These patterns use a variant of shell - pattern syntax, with '*' matching any number of characters, '?' - matching any single character, '[...]' matching any single - character specified, including ranges, and '[!...]' matching any - character not specified. For the purpose of these patterns, the - path separator ('\' for Windows and '/' on other systems) is not + This is the default style for ``--exclude`` and ``--exclude-from``. + These patterns use a variant of shell pattern syntax, with '\*' matching + any number of characters, '?' matching any single character, '[...]' + matching any single character specified, including ranges, and '[!...]' + matching any character not specified. For the purpose of these patterns, + the path separator ('\' for Windows and '/' on other systems) is not treated specially. Wrap meta-characters in brackets for a literal match (i.e. `[?]` to match the literal character `?`). For a path to match a pattern, it must completely match from start to end, or must match from the start to just before a path separator. Except for the root path, paths will never end in the path separator when matching is attempted. Thus, if a given pattern ends in a path - separator, a '*' is appended before matching is attempted. + separator, a '\*' is appended before matching is attempted. Shell-style patterns, selector `sh:` - + This is the default style for ``--pattern`` and ``--patterns-from``. Like fnmatch patterns these are similar to shell patterns. The difference is that the pattern may include `**/` for matching zero or more directory levels, `*` for matching zero or more arbitrary characters with the exception of any path separator. Regular expressions, selector `re:` - Regular expressions similar to those found in Perl are supported. Unlike shell patterns regular expressions are not required to match the complete path and any substring match is sufficient. It is strongly recommended to @@ -85,16 +48,39 @@ Regular expressions, selector `re:` regular expression syntax is described in the `Python documentation for the re module `_. -Prefix path, selector `pp:` - +Path prefix, selector `pp:` This pattern style is useful to match whole sub-directories. The pattern `pp:/data/bar` matches `/data/bar` and everything therein. -Exclusions can be passed via the command line option `--exclude`. When used +Path full-match, selector `pf:` + This pattern style is useful to match whole paths. + This is kind of a pseudo pattern as it can not have any variable or + unspecified parts - the full, precise path must be given. + `pf:/data/foo.txt` matches `/data/foo.txt` only. + + Implementation note: this is implemented via very time-efficient O(1) + hashtable lookups (this means you can have huge amounts of such patterns + without impacting performance much). + Due to that, this kind of pattern does not respect any context or order. + If you use such a pattern to include a file, it will always be included + (if the directory recursion encounters it). + Other include/exclude patterns that would normally match will be ignored. + Same logic applies for exclude. + +.. note:: + + `re:`, `sh:` and `fm:` patterns are all implemented on top of the Python SRE + engine. It is very easy to formulate patterns for each of these types which + requires an inordinate amount of time to match paths. If untrusted users + are able to supply patterns, ensure they cannot supply `re:` patterns. + Further, ensure that `sh:` and `fm:` patterns only contain a handful of + wildcards at most. + +Exclusions can be passed via the command line option ``--exclude``. When used from within a shell the patterns should be quoted to protect them from expansion. -The `--exclude-from` option permits loading exclusion patterns from a text +The ``--exclude-from`` option permits loading exclusion patterns from a text file with one pattern per line. Lines empty or starting with the number sign ('#') after removing whitespace on both ends are ignored. The optional style selector prefix is also supported for patterns loaded from a file. Due to @@ -131,3 +117,154 @@ Examples:: EOF $ borg create --exclude-from exclude.txt backup / +.. container:: experimental + + A more general and easier to use way to define filename matching patterns exists + with the experimental ``--pattern`` and ``--patterns-from`` options. Using these, you + may specify the backup roots (starting points) and patterns for inclusion/exclusion. + A root path starts with the prefix `R`, followed by a path (a plain path, not a + file pattern). An include rule starts with the prefix +, an exclude rule starts + with the prefix -, both followed by a pattern. + Inclusion patterns are useful to include paths that are contained in an excluded + path. The first matching pattern is used so if an include pattern matches before + an exclude pattern, the file is backed up. + + Note that the default pattern style for ``--pattern`` and ``--patterns-from`` is + shell style (`sh:`), so those patterns behave similar to rsync include/exclude + patterns. The pattern style can be set via the `P` prefix. + + Patterns (``--pattern``) and excludes (``--exclude``) from the command line are + considered first (in the order of appearance). Then patterns from ``--patterns-from`` + are added. Exclusion patterns from ``--exclude-from`` files are appended last. + + An example ``--patterns-from`` file could look like that:: + + # "sh:" pattern style is the default, so the following line is not needed: + P sh + R / + # can be rebuild + - /home/*/.cache + # they're downloads for a reason + - /home/*/Downloads + # susan is a nice person + # include susans home + + /home/susan + # don't backup the other home directories + - /home/* + +.. _borg_placeholders: + +borg help placeholders +~~~~~~~~~~~~~~~~~~~~~~ + + +Repository (or Archive) URLs, ``--prefix`` and ``--remote-path`` values support these +placeholders: + +{hostname} + The (short) hostname of the machine. + +{fqdn} + The full name of the machine. + +{now} + The current local date and time, by default in ISO-8601 format. + You can also supply your own `format string `_, e.g. {now:%Y-%m-%d_%H:%M:%S} + +{utcnow} + The current UTC date and time, by default in ISO-8601 format. + You can also supply your own `format string `_, e.g. {utcnow:%Y-%m-%d_%H:%M:%S} + +{user} + The user name (or UID, if no name is available) of the user running borg. + +{pid} + The current process ID. + +{borgversion} + The version of borg, e.g.: 1.0.8rc1 + +{borgmajor} + The version of borg, only the major version, e.g.: 1 + +{borgminor} + The version of borg, only major and minor version, e.g.: 1.0 + +{borgpatch} + The version of borg, only major, minor and patch version, e.g.: 1.0.8 + +If literal curly braces need to be used, double them for escaping:: + + borg create /path/to/repo::{{literal_text}} + +Examples:: + + borg create /path/to/repo::{hostname}-{user}-{utcnow} ... + borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ... + borg prune --prefix '{hostname}-' ... + +.. note:: + systemd uses a difficult, non-standard syntax for command lines in unit files (refer to + the `systemd.unit(5)` manual page). + + When invoking borg from unit files, pay particular attention to escaping, + especially when using the now/utcnow placeholders, since systemd performs its own + %-based variable replacement even in quoted text. To avoid interference from systemd, + double all percent signs (``{hostname}-{now:%Y-%m-%d_%H:%M:%S}`` + becomes ``{hostname}-{now:%%Y-%%m-%%d_%%H:%%M:%%S}``). + +.. _borg_compression: + +borg help compression +~~~~~~~~~~~~~~~~~~~~~ + + +It is no problem to mix different compression methods in one repo, +deduplication is done on the source data chunks (not on the compressed +or encrypted data). + +If some specific chunk was once compressed and stored into the repo, creating +another backup that also uses this chunk will not change the stored chunk. +So if you use different compression specs for the backups, whichever stores a +chunk first determines its compression. See also borg recreate. + +Compression is lz4 by default. If you want something else, you have to specify what you want. + +Valid compression specifiers are: + +none + Do not compress. + +lz4 + Use lz4 compression. High speed, low compression. (default) + +zlib[,L] + Use zlib ("gz") compression. Medium speed, medium compression. + If you do not explicitely give the compression level L (ranging from 0 + to 9), it will use level 6. + Giving level 0 (means "no compression", but still has zlib protocol + overhead) is usually pointless, you better use "none" compression. + +lzma[,L] + Use lzma ("xz") compression. Low speed, high compression. + If you do not explicitely give the compression level L (ranging from 0 + to 9), it will use level 6. + Giving levels above 6 is pointless and counterproductive because it does + not compress better due to the buffer size used by borg - but it wastes + lots of CPU cycles and RAM. + +auto,C[,L] + Use a built-in heuristic to decide per chunk whether to compress or not. + The heuristic tries with lz4 whether the data is compressible. + For incompressible data, it will not use compression (uses "none"). + For compressible data, it uses the given C[,L] compression - with C[,L] + being any valid compression specifier. + +Examples:: + + borg create --compression lz4 REPO::ARCHIVE data + borg create --compression zlib REPO::ARCHIVE data + borg create --compression zlib,1 REPO::ARCHIVE data + borg create --compression auto,lzma,6 REPO::ARCHIVE data + borg create --compression auto,lzma ... + diff --git a/docs/usage/info.rst b/docs/usage/info.rst new file mode 100644 index 00000000..542ff5ef --- /dev/null +++ b/docs/usage/info.rst @@ -0,0 +1,58 @@ +.. include:: info.rst.inc + +Examples +~~~~~~~~ +:: + + $ borg info /path/to/repo::2017-06-29T11:00-srv + Archive name: 2017-06-29T11:00-srv + Archive fingerprint: b2f1beac2bd553b34e06358afa45a3c1689320d39163890c5bbbd49125f00fe5 + Comment: + Hostname: myhostname + Username: root + Time (start): Thu, 2017-06-29 11:03:07 + Time (end): Thu, 2017-06-29 11:03:13 + Duration: 5.66 seconds + Number of files: 17037 + Command line: /usr/sbin/borg create /path/to/repo::2017-06-29T11:00-srv /srv + Utilization of max. archive size: 0% + ------------------------------------------------------------------------------ + Original size Compressed size Deduplicated size + This archive: 12.53 GB 12.49 GB 1.62 kB + All archives: 121.82 TB 112.41 TB 215.42 GB + + Unique chunks Total chunks + Chunk index: 1015213 626934122 + + $ borg info /path/to/repo --last 1 + Archive name: 2017-06-29T11:00-srv + Archive fingerprint: b2f1beac2bd553b34e06358afa45a3c1689320d39163890c5bbbd49125f00fe5 + Comment: + Hostname: myhostname + Username: root + Time (start): Thu, 2017-06-29 11:03:07 + Time (end): Thu, 2017-06-29 11:03:13 + Duration: 5.66 seconds + Number of files: 17037 + Command line: /usr/sbin/borg create /path/to/repo::2017-06-29T11:00-srv /srv + Utilization of max. archive size: 0% + ------------------------------------------------------------------------------ + Original size Compressed size Deduplicated size + This archive: 12.53 GB 12.49 GB 1.62 kB + All archives: 121.82 TB 112.41 TB 215.42 GB + + Unique chunks Total chunks + Chunk index: 1015213 626934122 + + $ borg info /path/to/repo + Repository ID: d857ce5788c51272c61535062e89eac4e8ef5a884ffbe976e0af9d8765dedfa5 + Location: /path/to/repo + Encrypted: Yes (repokey) + Cache: /root/.cache/borg/d857ce5788c51272c61535062e89eac4e8ef5a884ffbe976e0af9d8765dedfa5 + Security dir: /root/.config/borg/security/d857ce5788c51272c61535062e89eac4e8ef5a884ffbe976e0af9d8765dedfa5 + ------------------------------------------------------------------------------ + Original size Compressed size Deduplicated size + All archives: 121.82 TB 112.41 TB 215.42 GB + + Unique chunks Total chunks + Chunk index: 1015213 626934122 diff --git a/docs/usage/info.rst.inc b/docs/usage/info.rst.inc index e9e5f893..ec5d1509 100644 --- a/docs/usage/info.rst.inc +++ b/docs/usage/info.rst.inc @@ -4,22 +4,79 @@ borg info --------- -:: +.. code-block:: none - borg info ARCHIVE + borg [common options] info [options] [REPOSITORY_OR_ARCHIVE] -positional arguments - ARCHIVE - archive to display information about +.. only:: html + + .. class:: borg-options-table + + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``REPOSITORY_OR_ARCHIVE`` | archive or repository to display information about | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--json`` | format output as JSON | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Archive filters** — Archive filters can be applied to repository targets. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-P PREFIX``, ``--prefix PREFIX`` | only consider archive names starting with this prefix. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-a GLOB``, ``--glob-archives GLOB`` | only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--sort-by KEYS`` | Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--first N`` | consider first N archives after other filters were applied | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--last N`` | consider last N archives after other filters were applied | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex + + REPOSITORY_OR_ARCHIVE + archive or repository to display information about + + + optional arguments + --json format output as JSON + + + :ref:`common_options` + | + + Archive filters + -P PREFIX, --prefix PREFIX only consider archive names starting with this prefix. + -a GLOB, --glob-archives GLOB only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. + --sort-by KEYS Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp + --first N consider first N archives after other filters were applied + --last N consider last N archives after other filters were applied -`Common options`_ - | Description ~~~~~~~~~~~ -This command displays some detailed information about the specified archive. +This command displays detailed information about the specified archive or repository. -The "This archive" line refers exclusively to this archive: -"Deduplicated size" is the size of the unique chunks stored only for this -archive. Non-unique / common chunks show up under "All archives". +Please note that the deduplicated sizes of the individual archives do not add +up to the deduplicated size of the repository ("all archives"), because the two +are meaning different things: + +This archive / deduplicated size = amount of data stored ONLY for this archive + = unique chunks of this archive. +All archives / deduplicated size = amount of data stored in the repo + = all chunks in the repository. \ No newline at end of file diff --git a/docs/usage/init.rst b/docs/usage/init.rst new file mode 100644 index 00000000..97860a15 --- /dev/null +++ b/docs/usage/init.rst @@ -0,0 +1,17 @@ +.. include:: init.rst.inc + +Examples +~~~~~~~~ +:: + + # Local repository, repokey encryption, BLAKE2b (often faster, since Borg 1.1) + $ borg init --encryption=repokey-blake2 /path/to/repo + + # Local repository (no encryption) + $ borg init --encryption=none /path/to/repo + + # Remote repository (accesses a remote borg via ssh) + $ borg init --encryption=repokey-blake2 user@hostname:backup + + # Remote repository (store the key your home dir) + $ borg init --encryption=keyfile user@hostname:backup diff --git a/docs/usage/init.rst.inc b/docs/usage/init.rst.inc index b2c84131..68437e57 100644 --- a/docs/usage/init.rst.inc +++ b/docs/usage/init.rst.inc @@ -4,20 +4,54 @@ borg init --------- -:: +.. code-block:: none - borg init REPOSITORY + borg [common options] init [options] [REPOSITORY] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+------------------------------------+-----------------------------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+------------------------------------+-----------------------------------------------------------------------------+ + | | ``REPOSITORY`` | repository to create | + +-------------------------------------------------------+------------------------------------+-----------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+------------------------------------+-----------------------------------------------------------------------------+ + | | ``-e MODE``, ``--encryption MODE`` | select encryption key mode **(required)** | + +-------------------------------------------------------+------------------------------------+-----------------------------------------------------------------------------+ + | | ``--append-only`` | create an append-only mode repository | + +-------------------------------------------------------+------------------------------------+-----------------------------------------------------------------------------+ + | | ``--storage-quota QUOTA`` | Set storage quota of the new repository (e.g. 5G, 1.5T). Default: no quota. | + +-------------------------------------------------------+------------------------------------+-----------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+------------------------------------+-----------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY repository to create -optional arguments - ``-e``, ``--encryption`` - | select encryption key mode (default: "repokey") -`Common options`_ - | + optional arguments + -e MODE, --encryption MODE select encryption key mode **(required)** + --append-only create an append-only mode repository + --storage-quota QUOTA Set storage quota of the new repository (e.g. 5G, 1.5T). Default: no quota. + + + :ref:`common_options` + | Description ~~~~~~~~~~~ @@ -25,21 +59,22 @@ Description This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. -Encryption can be enabled at repository init time (the default). +Encryption can be enabled at repository init time. It cannot be changed later. -It is not recommended to disable encryption. Repository encryption protects you -e.g. against the case that an attacker has access to your backup repository. +It is not recommended to work without encryption. Repository encryption protects +you e.g. against the case that an attacker has access to your backup repository. But be careful with the key / the passphrase: -If you want "passphrase-only" security, use the repokey mode. The key will -be stored inside the repository (in its "config" file). In above mentioned -attack scenario, the attacker will have the key (but not the passphrase). +If you want "passphrase-only" security, use one of the repokey modes. The +key will be stored inside the repository (in its "config" file). In above +mentioned attack scenario, the attacker will have the key (but not the +passphrase). -If you want "passphrase and having-the-key" security, use the keyfile mode. -The key will be stored in your home directory (in .config/borg/keys). In -the attack scenario, the attacker who has just access to your repo won't have -the key (and also not the passphrase). +If you want "passphrase and having-the-key" security, use one of the keyfile +modes. The key will be stored in your home directory (in .config/borg/keys). +In the attack scenario, the attacker who has just access to your repo won't +have the key (and also not the passphrase). Make a backup copy of the key file (keyfile mode) or repo config file (repokey mode) and keep it at a safe place, so you still have the key in @@ -64,5 +99,61 @@ a different keyboard layout. You can change your passphrase for existing repos at any time, it won't affect the encryption/decryption key or other secrets. -When encrypting, AES-CTR-256 is used for encryption, and HMAC-SHA256 for -authentication. Hardware acceleration will be used automatically. +Encryption modes +++++++++++++++++ + +.. nanorst: inline-fill + ++----------+---------------+------------------------+--------------------------+ +| Hash/MAC | Not encrypted | Not encrypted, | Encrypted (AEAD w/ AES) | +| | no auth | but authenticated | and authenticated | ++----------+---------------+------------------------+--------------------------+ +| SHA-256 | none | `authenticated` | repokey | +| | | | keyfile | ++----------+---------------+------------------------+--------------------------+ +| BLAKE2b | n/a | `authenticated-blake2` | `repokey-blake2` | +| | | | `keyfile-blake2` | ++----------+---------------+------------------------+--------------------------+ + +.. nanorst: inline-replace + +`Marked modes` are new in Borg 1.1 and are not backwards-compatible with Borg 1.0.x. + +On modern Intel/AMD CPUs (except very cheap ones), AES is usually +hardware-accelerated. +BLAKE2b is faster than SHA256 on Intel/AMD 64-bit CPUs +(except AMD Ryzen and future CPUs with SHA extensions), +which makes `authenticated-blake2` faster than `none` and `authenticated`. + +On modern ARM CPUs, NEON provides hardware acceleration for SHA256 making it faster +than BLAKE2b-256 there. NEON accelerates AES as well. + +Hardware acceleration is always used automatically when available. + +`repokey` and `keyfile` use AES-CTR-256 for encryption and HMAC-SHA256 for +authentication in an encrypt-then-MAC (EtM) construction. The chunk ID hash +is HMAC-SHA256 as well (with a separate key). +These modes are compatible with Borg 1.0.x. + +`repokey-blake2` and `keyfile-blake2` are also authenticated encryption modes, +but use BLAKE2b-256 instead of HMAC-SHA256 for authentication. The chunk ID +hash is a keyed BLAKE2b-256 hash. +These modes are new and *not* compatible with Borg 1.0.x. + +`authenticated` mode uses no encryption, but authenticates repository contents +through the same HMAC-SHA256 hash as the `repokey` and `keyfile` modes (it uses it +as the chunk ID hash). The key is stored like `repokey`. +This mode is new and *not* compatible with Borg 1.0.x. + +`authenticated-blake2` is like `authenticated`, but uses the keyed BLAKE2b-256 hash +from the other blake2 modes. +This mode is new and *not* compatible with Borg 1.0.x. + +`none` mode uses no encryption and no authentication. It uses SHA256 as chunk +ID hash. Not recommended, rather consider using an authenticated or +authenticated/encrypted mode. This mode has possible denial-of-service issues +when running ``borg create`` on contents controlled by an attacker. +Use it only for new repositories where no encryption is wanted **and** when compatibility +with 1.0.x is important. If compatibility with 1.0.x is not important, use +`authenticated-blake2` or `authenticated` instead. +This mode is compatible with Borg 1.0.x. \ No newline at end of file diff --git a/docs/usage/key.rst b/docs/usage/key.rst new file mode 100644 index 00000000..ac6ba86b --- /dev/null +++ b/docs/usage/key.rst @@ -0,0 +1,41 @@ +.. _borg-change-passphrase: + +.. include:: key_change-passphrase.rst.inc + +Examples +~~~~~~~~ +:: + + # Create a key file protected repository + $ borg init --encryption=keyfile -v /path/to/repo + Initializing repository at "/path/to/repo" + Enter new passphrase: + Enter same passphrase again: + Remember your passphrase. Your data will be inaccessible without it. + Key in "/root/.config/borg/keys/mnt_backup" created. + Keep this key safe. Your data will be inaccessible without it. + Synchronizing chunks cache... + Archives: 0, w/ cached Idx: 0, w/ outdated Idx: 0, w/o cached Idx: 0. + Done. + + # Change key file passphrase + $ borg key change-passphrase -v /path/to/repo + Enter passphrase for key /root/.config/borg/keys/mnt_backup: + Enter new passphrase: + Enter same passphrase again: + Remember your passphrase. Your data will be inaccessible without it. + Key updated + +Fully automated using environment variables: + +:: + + $ BORG_NEW_PASSPHRASE=old borg init -e=repokey repo + # now "old" is the current passphrase. + $ BORG_PASSPHRASE=old BORG_NEW_PASSPHRASE=new borg key change-passphrase repo + # now "new" is the current passphrase. + + +.. include:: key_export.rst.inc + +.. include:: key_import.rst.inc diff --git a/docs/usage/key_change-passphrase.rst.inc b/docs/usage/key_change-passphrase.rst.inc new file mode 100644 index 00000000..64bc409c --- /dev/null +++ b/docs/usage/key_change-passphrase.rst.inc @@ -0,0 +1,46 @@ +.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! + +.. _borg_key_change-passphrase: + +borg key change-passphrase +-------------------------- +.. code-block:: none + + borg [common options] key change-passphrase [options] [REPOSITORY] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------------+--+ + | **positional arguments** | + +-------------------------------------------------------+----------------+--+ + | | ``REPOSITORY`` | | + +-------------------------------------------------------+----------------+--+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------------+--+ + + .. raw:: html + + + +.. only:: latex + + REPOSITORY + + + + :ref:`common_options` + | + +Description +~~~~~~~~~~~ + +The key files used for repository encryption are optionally passphrase +protected. This command can be used to change this passphrase. \ No newline at end of file diff --git a/docs/usage/key_export.rst.inc b/docs/usage/key_export.rst.inc new file mode 100644 index 00000000..466ac07d --- /dev/null +++ b/docs/usage/key_export.rst.inc @@ -0,0 +1,76 @@ +.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! + +.. _borg_key_export: + +borg key export +--------------- +.. code-block:: none + + borg [common options] key export [options] [REPOSITORY] [PATH] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------------+------------------------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+----------------+------------------------------------------------------------------------+ + | | ``REPOSITORY`` | | + +-------------------------------------------------------+----------------+------------------------------------------------------------------------+ + | | ``PATH`` | where to store the backup | + +-------------------------------------------------------+----------------+------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+----------------+------------------------------------------------------------------------+ + | | ``--paper`` | Create an export suitable for printing and later type-in | + +-------------------------------------------------------+----------------+------------------------------------------------------------------------+ + | | ``--qr-html`` | Create an html file suitable for printing and later type-in or qr scan | + +-------------------------------------------------------+----------------+------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------------+------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex + + REPOSITORY + + PATH + where to store the backup + + + optional arguments + --paper Create an export suitable for printing and later type-in + --qr-html Create an html file suitable for printing and later type-in or qr scan + + + :ref:`common_options` + | + +Description +~~~~~~~~~~~ + +If repository encryption is used, the repository is inaccessible +without the key. This command allows to backup this essential key. + +There are two backup formats. The normal backup format is suitable for +digital storage as a file. The ``--paper`` backup format is optimized +for printing and typing in while importing, with per line checks to +reduce problems with manual input. + +For repositories using keyfile encryption the key is saved locally +on the system that is capable of doing backups. To guard against loss +of this key, the key needs to be backed up independently of the main +data backup. + +For repositories using the repokey encryption the key is saved in the +repository in the config file. A backup is thus not strictly needed, +but guards against the repository becoming inaccessible if the file +is damaged for some reason. \ No newline at end of file diff --git a/docs/usage/key_import.rst.inc b/docs/usage/key_import.rst.inc new file mode 100644 index 00000000..4f3961ab --- /dev/null +++ b/docs/usage/key_import.rst.inc @@ -0,0 +1,62 @@ +.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! + +.. _borg_key_import: + +borg key import +--------------- +.. code-block:: none + + borg [common options] key import [options] [REPOSITORY] [PATH] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------------+----------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+----------------+----------------------------------------------------------+ + | | ``REPOSITORY`` | | + +-------------------------------------------------------+----------------+----------------------------------------------------------+ + | | ``PATH`` | path to the backup ('-' to read from stdin) | + +-------------------------------------------------------+----------------+----------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+----------------+----------------------------------------------------------+ + | | ``--paper`` | interactively import from a backup done with ``--paper`` | + +-------------------------------------------------------+----------------+----------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------------+----------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex + + REPOSITORY + + PATH + path to the backup ('-' to read from stdin) + + + optional arguments + --paper interactively import from a backup done with ``--paper`` + + + :ref:`common_options` + | + +Description +~~~~~~~~~~~ + +This command allows to restore a key previously backed up with the +export command. + +If the ``--paper`` option is given, the import will be an interactive +process in which each line is checked for plausibility before +proceeding to the next line. For this format PATH must not be given. \ No newline at end of file diff --git a/docs/usage/key_migrate-to-repokey.rst.inc b/docs/usage/key_migrate-to-repokey.rst.inc new file mode 100644 index 00000000..33cff720 --- /dev/null +++ b/docs/usage/key_migrate-to-repokey.rst.inc @@ -0,0 +1,60 @@ +.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! + +.. _borg_key_migrate-to-repokey: + +borg key migrate-to-repokey +--------------------------- +.. code-block:: none + + borg [common options] key migrate-to-repokey [options] [REPOSITORY] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------------+--+ + | **positional arguments** | + +-------------------------------------------------------+----------------+--+ + | | ``REPOSITORY`` | | + +-------------------------------------------------------+----------------+--+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------------+--+ + + .. raw:: html + + + +.. only:: latex + + REPOSITORY + + + + :ref:`common_options` + | + +Description +~~~~~~~~~~~ + +This command migrates a repository from passphrase mode (removed in Borg 1.0) +to repokey mode. + +You will be first asked for the repository passphrase (to open it in passphrase +mode). This is the same passphrase as you used to use for this repo before 1.0. + +It will then derive the different secrets from this passphrase. + +Then you will be asked for a new passphrase (twice, for safety). This +passphrase will be used to protect the repokey (which contains these same +secrets in encrypted form). You may use the same passphrase as you used to +use, but you may also use a different one. + +After migrating to repokey mode, you can change the passphrase at any time. +But please note: the secrets will always stay the same and they could always +be derived from your (old) passphrase-mode passphrase. \ No newline at end of file diff --git a/docs/usage/list.rst b/docs/usage/list.rst new file mode 100644 index 00000000..9b84eb01 --- /dev/null +++ b/docs/usage/list.rst @@ -0,0 +1,27 @@ +.. include:: list.rst.inc + +Examples +~~~~~~~~ +:: + + $ borg list /path/to/repo + Monday Mon, 2016-02-15 19:15:11 + repo Mon, 2016-02-15 19:26:54 + root-2016-02-15 Mon, 2016-02-15 19:36:29 + newname Mon, 2016-02-15 19:50:19 + ... + + $ borg list /path/to/repo::root-2016-02-15 + drwxr-xr-x root root 0 Mon, 2016-02-15 17:44:27 . + drwxrwxr-x root root 0 Mon, 2016-02-15 19:04:49 bin + -rwxr-xr-x root root 1029624 Thu, 2014-11-13 00:08:51 bin/bash + lrwxrwxrwx root root 0 Fri, 2015-03-27 20:24:26 bin/bzcmp -> bzdiff + -rwxr-xr-x root root 2140 Fri, 2015-03-27 20:24:22 bin/bzdiff + ... + + $ borg list /path/to/repo::archiveA --format="{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}" + drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 . + drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 code + drwxrwxr-x user user 0 Sun, 2015-02-01 11:00:00 code/myproject + -rw-rw-r-- user user 1416192 Sun, 2015-02-01 11:00:00 code/myproject/file.ext + ... diff --git a/docs/usage/list.rst.inc b/docs/usage/list.rst.inc index 8e32df6a..3358a74b 100644 --- a/docs/usage/list.rst.inc +++ b/docs/usage/list.rst.inc @@ -4,31 +4,107 @@ borg list --------- -:: +.. code-block:: none - borg list REPOSITORY_OR_ARCHIVE PATH + borg [common options] list [options] [REPOSITORY_OR_ARCHIVE] [PATH...] + +.. only:: html + + .. class:: borg-options-table + + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``REPOSITORY_OR_ARCHIVE`` | repository/archive to list contents of | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``PATH`` | paths to list; patterns are supported | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--short`` | only print file/directory names, nothing else | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--format FORMAT``, ``--list-format FORMAT`` | specify format for file listing (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}") | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--json`` | Only valid for listing repository contents. Format output as JSON. The form of ``--format`` is ignored, but keys used in it are added to the JSON output. Some keys are always present. Note: JSON can only represent text. A "barchive" key is therefore not available. | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--json-lines`` | Only valid for listing archive contents. Format output as JSON Lines. The form of ``--format`` is ignored, but keys used in it are added to the JSON output. Some keys are always present. Note: JSON can only represent text. A "bpath" key is therefore not available. | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Archive filters** — Archive filters can be applied to repository targets. | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-P PREFIX``, ``--prefix PREFIX`` | only consider archive names starting with this prefix. | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-a GLOB``, ``--glob-archives GLOB`` | only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--sort-by KEYS`` | Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--first N`` | consider first N archives after other filters were applied | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--last N`` | consider last N archives after other filters were applied | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Exclusion options** | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-e PATTERN``, ``--exclude PATTERN`` | exclude paths matching PATTERN | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-from EXCLUDEFILE`` | read exclude patterns from EXCLUDEFILE, one per line | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--pattern PATTERN`` | experimental: include/exclude paths matching PATTERN | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--patterns-from PATTERNFILE`` | experimental: read include/exclude patterns from PATTERNFILE, one per line | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-caches`` | exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-if-present NAME`` | exclude directories that are tagged by containing a filesystem object with the given NAME | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--keep-exclude-tags``, ``--keep-tag-files`` | if tag objects are specified with ``--exclude-if-present``, don't omit the tag objects themselves from the backup archive | + +-----------------------------------------------------------------------------+-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY_OR_ARCHIVE repository/archive to list contents of PATH paths to list; patterns are supported -optional arguments - ``--short`` - | only print file/directory names, nothing else - ``--format``, ``--list-format`` - | specify format for file listing - | (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}") - ``-P``, ``--prefix`` - | only consider archive names starting with this prefix - ``-e PATTERN``, ``--exclude PATTERN`` - | exclude paths matching PATTERN - ``--exclude-from EXCLUDEFILE`` - | read exclude patterns from EXCLUDEFILE, one per line -`Common options`_ - | + optional arguments + --short only print file/directory names, nothing else + --format FORMAT, --list-format FORMAT specify format for file listing (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}") + --json Only valid for listing repository contents. Format output as JSON. The form of ``--format`` is ignored, but keys used in it are added to the JSON output. Some keys are always present. Note: JSON can only represent text. A "barchive" key is therefore not available. + --json-lines Only valid for listing archive contents. Format output as JSON Lines. The form of ``--format`` is ignored, but keys used in it are added to the JSON output. Some keys are always present. Note: JSON can only represent text. A "bpath" key is therefore not available. + + + :ref:`common_options` + | + + Archive filters + -P PREFIX, --prefix PREFIX only consider archive names starting with this prefix. + -a GLOB, --glob-archives GLOB only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. + --sort-by KEYS Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp + --first N consider first N archives after other filters were applied + --last N consider last N archives after other filters were applied + + + Exclusion options + -e PATTERN, --exclude PATTERN exclude paths matching PATTERN + --exclude-from EXCLUDEFILE read exclude patterns from EXCLUDEFILE, one per line + --pattern PATTERN experimental: include/exclude paths matching PATTERN + --patterns-from PATTERNFILE experimental: read include/exclude patterns from PATTERNFILE, one per line + --exclude-caches exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) + --exclude-if-present NAME exclude directories that are tagged by containing a filesystem object with the given NAME + --keep-exclude-tags, --keep-tag-files if tag objects are specified with ``--exclude-if-present``, don't omit the tag objects themselves from the backup archive + Description ~~~~~~~~~~~ @@ -37,53 +113,69 @@ This command lists the contents of a repository or an archive. See the "borg help patterns" command for more help on exclude patterns. -The following keys are available for --format: - - NEWLINE: OS dependent line separator - - NL: alias of NEWLINE - - NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath - - SPACE - - TAB - - CR - - LF +.. man NOTES --- Keys for listing repository archives: - - archive: archive name interpreted as text (might be missing non-text characters, see barchive) - - barchive: verbatim archive name, can contain any character except NUL - - time: time of creation of the archive - - id: internal ID of the archive +The following keys are available for ``--format``: --- Keys for listing archive files: - - type - - mode - - uid - - gid - - user - - group - - path: path interpreted as text (might be missing non-text characters, see bpath) - - bpath: verbatim POSIX path, can contain any character except NUL - - source: link target for links (identical to linktarget) - - linktarget - - flags +- NEWLINE: OS dependent line separator +- NL: alias of NEWLINE +- NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath +- SPACE +- TAB +- CR +- LF - - size - - csize: compressed size - - num_chunks: number of chunks in this file - - unique_chunks: number of unique chunks in this file +Keys for listing repository archives: - - mtime - - ctime - - atime - - isomtime - - isoctime - - isoatime +- name: archive name interpreted as text (might be missing non-text characters, see barchive) +- archive: archive name interpreted as text (might be missing non-text characters, see barchive) +- barchive: verbatim archive name, can contain any character except NUL +- comment: archive comment interpreted as text (might be missing non-text characters, see bcomment) +- bcomment: verbatim archive comment, can contain any character except NUL +- id: internal ID of the archive - - md5 - - sha1 - - sha224 - - sha256 - - sha384 - - sha512 +- time: time (start) of creation of the archive +- start: time (start) of creation of the archive +- end: time (end) of creation of the archive - - archiveid - - archivename - - extra: prepends {source} with " -> " for soft links and " link to " for hard links + +Keys for listing archive files: + +- type +- mode +- uid +- gid +- user +- group +- path: path interpreted as text (might be missing non-text characters, see bpath) +- bpath: verbatim POSIX path, can contain any character except NUL +- source: link target for links (identical to linktarget) +- linktarget +- flags + +- size +- csize: compressed size +- dsize: deduplicated size +- dcsize: deduplicated compressed size +- num_chunks: number of chunks in this file +- unique_chunks: number of unique chunks in this file + +- mtime +- ctime +- atime +- isomtime +- isoctime +- isoatime + +- md5 +- sha1 +- sha224 +- sha256 +- sha384 +- sha512 + +- archiveid +- archivename +- extra: prepends {source} with " -> " for soft links and " link to " for hard links + +- health: either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks) diff --git a/docs/usage/lock.rst b/docs/usage/lock.rst new file mode 100644 index 00000000..b4573dcc --- /dev/null +++ b/docs/usage/lock.rst @@ -0,0 +1,3 @@ +.. include:: with-lock.rst.inc + +.. include:: break-lock.rst.inc diff --git a/docs/usage/migrate-to-repokey.rst.inc b/docs/usage/migrate-to-repokey.rst.inc deleted file mode 100644 index ec5f1a52..00000000 --- a/docs/usage/migrate-to-repokey.rst.inc +++ /dev/null @@ -1,36 +0,0 @@ -.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! - -.. _borg_migrate-to-repokey: - -borg migrate-to-repokey ------------------------ -:: - - borg migrate-to-repokey REPOSITORY - -positional arguments - REPOSITORY - - -`Common options`_ - | - -Description -~~~~~~~~~~~ - -This command migrates a repository from passphrase mode (not supported any -more) to repokey mode. - -You will be first asked for the repository passphrase (to open it in passphrase -mode). This is the same passphrase as you used to use for this repo before 1.0. - -It will then derive the different secrets from this passphrase. - -Then you will be asked for a new passphrase (twice, for safety). This -passphrase will be used to protect the repokey (which contains these same -secrets in encrypted form). You may use the same passphrase as you used to -use, but you may also use a different one. - -After migrating to repokey mode, you can change the passphrase at any time. -But please note: the secrets will always stay the same and they could always -be derived from your (old) passphrase-mode passphrase. diff --git a/docs/usage/mount.rst b/docs/usage/mount.rst new file mode 100644 index 00000000..46c7d554 --- /dev/null +++ b/docs/usage/mount.rst @@ -0,0 +1,58 @@ +.. include:: mount.rst.inc + +.. include:: umount.rst.inc + +Examples +~~~~~~~~ + +:: + + # Mounting the repository shows all archives. + # Archives are loaded lazily, expect some delay when navigating to an archive + # for the first time. + $ borg mount /path/to/repo /tmp/mymountpoint + $ ls /tmp/mymountpoint + root-2016-02-14 root-2016-02-15 + $ borg umount /tmp/mymountpoint + + # Mounting a specific archive is possible as well. + $ borg mount /path/to/repo::root-2016-02-15 /tmp/mymountpoint + $ ls /tmp/mymountpoint + bin boot etc home lib lib64 lost+found media mnt opt + root sbin srv tmp usr var + $ borg umount /tmp/mymountpoint + + # The experimental "versions view" merges all archives in the repository + # and provides a versioned view on files. + $ borg mount -o versions /path/to/repo /tmp/mymountpoint + $ ls -l /tmp/mymountpoint/home/user/doc.txt/ + total 24 + -rw-rw-r-- 1 user group 12357 Aug 26 21:19 doc.cda00bc9.txt + -rw-rw-r-- 1 user group 12204 Aug 26 21:04 doc.fa760f28.txt + $ borg umount /tmp/mymountpoint + + # Archive filters are supported. + # These are especially handy for the "versions view", + # which does not support lazy processing of archives. + $ borg mount -o versions --glob-archives '*-my-home' --last 10 /path/to/repo /tmp/mymountpoint + +borgfs +++++++ + +:: + + $ echo '/mnt/backup /tmp/myrepo fuse.borgfs defaults,noauto 0 0' >> /etc/fstab + $ echo '/mnt/backup::root-2016-02-15 /tmp/myarchive fuse.borgfs defaults,noauto 0 0' >> /etc/fstab + $ mount /tmp/myrepo + $ mount /tmp/myarchive + $ ls /tmp/myrepo + root-2016-02-01 root-2016-02-2015 + $ ls /tmp/myarchive + bin boot etc home lib lib64 lost+found media mnt opt root sbin srv tmp usr var + +.. Note:: + + ``borgfs`` will be automatically provided if you used a distribution + package, ``pip`` or ``setup.py`` to install Borg. Users of the + standalone binary will have to manually create a symlink (see + :ref:`pyinstaller-binary`). diff --git a/docs/usage/mount.rst.inc b/docs/usage/mount.rst.inc index cac84a43..deff3665 100644 --- a/docs/usage/mount.rst.inc +++ b/docs/usage/mount.rst.inc @@ -4,24 +4,75 @@ borg mount ---------- -:: +.. code-block:: none - borg mount REPOSITORY_OR_ARCHIVE MOUNTPOINT + borg [common options] mount [options] REPOSITORY_OR_ARCHIVE MOUNTPOINT + +.. only:: html + + .. class:: borg-options-table + + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``REPOSITORY_OR_ARCHIVE`` | repository/archive to mount | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``MOUNTPOINT`` | where to mount filesystem | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-f``, ``--foreground`` | stay in foreground, do not daemonize | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-o`` | Extra mount options | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Archive filters** — Archive filters can be applied to repository targets. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-P PREFIX``, ``--prefix PREFIX`` | only consider archive names starting with this prefix. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-a GLOB``, ``--glob-archives GLOB`` | only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--sort-by KEYS`` | Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--first N`` | consider first N archives after other filters were applied | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--last N`` | consider last N archives after other filters were applied | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY_OR_ARCHIVE repository/archive to mount MOUNTPOINT where to mount filesystem -optional arguments - ``-f``, ``--foreground`` - | stay in foreground, do not daemonize - ``-o`` - | Extra mount options -`Common options`_ - | + optional arguments + -f, --foreground stay in foreground, do not daemonize + -o Extra mount options + + + :ref:`common_options` + | + + Archive filters + -P PREFIX, --prefix PREFIX only consider archive names starting with this prefix. + -a GLOB, --glob-archives GLOB only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. + --sort-by KEYS Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp + --first N consider first N archives after other filters were applied + --last N consider last N archives after other filters were applied + Description ~~~~~~~~~~~ @@ -41,11 +92,20 @@ To allow a regular user to use fstab entries, add the ``user`` option: For mount options, see the fuse(8) manual page. Additional mount options supported by borg: +- versions: when used with a repository mount, this gives a merged, versioned + view of the files in the archives. EXPERIMENTAL, layout may change in future. - allow_damaged_files: by default damaged files (where missing chunks were - replaced with runs of zeros by borg check --repair) are not readable and + replaced with runs of zeros by borg check ``--repair``) are not readable and return EIO (I/O error). Set this option to read such files. The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users to tweak the performance. It sets the number of cached data chunks; additional memory usage can be up to ~8 MiB times this number. The default is the number of CPU cores. + +When the daemonized process receives a signal or crashes, it does not unmount. +Unmounting in these cases could cause an active rsync or similar process +to unintentionally delete data. + +When running in the foreground ^C/SIGINT unmounts cleanly, but other +signals or crashes do not. \ No newline at end of file diff --git a/docs/usage/notes.rst b/docs/usage/notes.rst new file mode 100644 index 00000000..c45ef3f8 --- /dev/null +++ b/docs/usage/notes.rst @@ -0,0 +1,237 @@ +Additional Notes +---------------- + +Here are misc. notes about topics that are maybe not covered in enough detail in the usage section. + +.. _chunker-params: + +``--chunker-params`` +~~~~~~~~~~~~~~~~~~~~ + +The chunker params influence how input files are cut into pieces (chunks) +which are then considered for deduplication. They also have a big impact on +resource usage (RAM and disk space) as the amount of resources needed is +(also) determined by the total amount of chunks in the repository (see +:ref:`cache-memory-usage` for details). + +``--chunker-params=10,23,16,4095`` results in a fine-grained deduplication| +and creates a big amount of chunks and thus uses a lot of resources to manage +them. This is good for relatively small data volumes and if the machine has a +good amount of free RAM and disk space. + +``--chunker-params=19,23,21,4095`` (default) results in a coarse-grained +deduplication and creates a much smaller amount of chunks and thus uses less +resources. This is good for relatively big data volumes and if the machine has +a relatively low amount of free RAM and disk space. + +If you already have made some archives in a repository and you then change +chunker params, this of course impacts deduplication as the chunks will be +cut differently. + +In the worst case (all files are big and were touched in between backups), this +will store all content into the repository again. + +Usually, it is not that bad though: + +- usually most files are not touched, so it will just re-use the old chunks + it already has in the repo +- files smaller than the (both old and new) minimum chunksize result in only + one chunk anyway, so the resulting chunks are same and deduplication will apply + +If you switch chunker params to save resources for an existing repo that +already has some backup archives, you will see an increasing effect over time, +when more and more files have been touched and stored again using the bigger +chunksize **and** all references to the smaller older chunks have been removed +(by deleting / pruning archives). + +If you want to see an immediate big effect on resource usage, you better start +a new repository when changing chunker params. + +For more details, see :ref:`chunker_details`. + + +``--noatime / --noctime`` +~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can use these ``borg create`` options to not store the respective timestamp +into the archive, in case you do not really need it. + +Besides saving a little space for the not archived timestamp, it might also +affect metadata stream deduplication: if only this timestamp changes between +backups and is stored into the metadata stream, the metadata stream chunks +won't deduplicate just because of that. + +``--umask`` +~~~~~~~~~~~ + +If you use ``--umask``, make sure that all repository-modifying borg commands +(create, delete, prune) that access the repository in question use the same +``--umask`` value. + +If multiple machines access the same repository, this should hold true for all +of them. + +``--read-special`` +~~~~~~~~~~~~~~~~~~ + +The ``--read-special`` option is special - you do not want to use it for normal +full-filesystem backups, but rather after carefully picking some targets for it. + +The option ``--read-special`` triggers special treatment for block and char +device files as well as FIFOs. Instead of storing them as such a device (or +FIFO), they will get opened, their content will be read and in the backup +archive they will show up like a regular file. + +Symlinks will also get special treatment if (and only if) they point to such +a special file: instead of storing them as a symlink, the target special file +will get processed as described above. + +One intended use case of this is backing up the contents of one or multiple +block devices, like e.g. LVM snapshots or inactive LVs or disk partitions. + +You need to be careful about what you include when using ``--read-special``, +e.g. if you include ``/dev/zero``, your backup will never terminate. + +Restoring such files' content is currently only supported one at a time via +``--stdout`` option (and you have to redirect stdout to where ever it shall go, +maybe directly into an existing device file of your choice or indirectly via +``dd``). + +To some extent, mounting a backup archive with the backups of special files +via ``borg mount`` and then loop-mounting the image files from inside the mount +point will work. If you plan to access a lot of data in there, it likely will +scale and perform better if you do not work via the FUSE mount. + +Example ++++++++ + +Imagine you have made some snapshots of logical volumes (LVs) you want to backup. + +.. note:: + + For some scenarios, this is a good method to get "crash-like" consistency + (I call it crash-like because it is the same as you would get if you just + hit the reset button or your machine would abrubtly and completely crash). + This is better than no consistency at all and a good method for some use + cases, but likely not good enough if you have databases running. + +Then you create a backup archive of all these snapshots. The backup process will +see a "frozen" state of the logical volumes, while the processes working in the +original volumes continue changing the data stored there. + +You also add the output of ``lvdisplay`` to your backup, so you can see the LV +sizes in case you ever need to recreate and restore them. + +After the backup has completed, you remove the snapshots again. :: + + $ # create snapshots here + $ lvdisplay > lvdisplay.txt + $ borg create --read-special /path/to/repo::arch lvdisplay.txt /dev/vg0/*-snapshot + $ # remove snapshots here + +Now, let's see how to restore some LVs from such a backup. :: + + $ borg extract /path/to/repo::arch lvdisplay.txt + $ # create empty LVs with correct sizes here (look into lvdisplay.txt). + $ # we assume that you created an empty root and home LV and overwrite it now: + $ borg extract --stdout /path/to/repo::arch dev/vg0/root-snapshot > /dev/vg0/root + $ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home + + +.. _append_only_mode: + +Append-only mode +~~~~~~~~~~~~~~~~ + +A repository can be made "append-only", which means that Borg will never overwrite or +delete committed data (append-only refers to the segment files, but borg will also +reject to delete the repository completely). This is useful for scenarios where a +backup client machine backups remotely to a backup server using ``borg serve``, since +a hacked client machine cannot delete backups on the server permanently. + +To activate append-only mode, edit the repository ``config`` file and add a line +``append_only=1`` to the ``[repository]`` section (or edit the line if it exists). + +In append-only mode Borg will create a transaction log in the ``transactions`` file, +where each line is a transaction and a UTC timestamp. + +In addition, ``borg serve`` can act as if a repository is in append-only mode with +its option ``--append-only``. This can be very useful for fine-tuning access control +in ``.ssh/authorized_keys`` :: + + command="borg serve --append-only ..." ssh-rsa + command="borg serve ..." ssh-rsa + +Running ``borg init`` via a ``borg serve --append-only`` server will *not* create +an append-only repository. Running ``borg init --append-only`` creates an append-only +repository regardless of server settings. + +Example ++++++++ + +Suppose an attacker remotely deleted all backups, but your repository was in append-only +mode. A transaction log in this situation might look like this: :: + + transaction 1, UTC time 2016-03-31T15:53:27.383532 + transaction 5, UTC time 2016-03-31T15:53:52.588922 + transaction 11, UTC time 2016-03-31T15:54:23.887256 + transaction 12, UTC time 2016-03-31T15:55:54.022540 + transaction 13, UTC time 2016-03-31T15:55:55.472564 + +From your security logs you conclude the attacker gained access at 15:54:00 and all +the backups where deleted or replaced by compromised backups. From the log you know +that transactions 11 and later are compromised. Note that the transaction ID is the +name of the *last* file in the transaction. For example, transaction 11 spans files 6 +to 11. + +In a real attack you'll likely want to keep the compromised repository +intact to analyze what the attacker tried to achieve. It's also a good idea to make this +copy just in case something goes wrong during the recovery. Since recovery is done by +deleting some files, a hard link copy (``cp -al``) is sufficient. + +The first step to reset the repository to transaction 5, the last uncompromised transaction, +is to remove the ``hints.N`` and ``index.N`` files in the repository (these two files are +always expendable). In this example N is 13. + +Then remove or move all segment files from the segment directories in ``data/`` starting +with file 6:: + + rm data/**/{6..13} + +That's all to it. + +Drawbacks ++++++++++ + +As data is only appended, and nothing removed, commands like ``prune`` or ``delete`` +won't free disk space, they merely tag data as deleted in a new transaction. + +Be aware that as soon as you write to the repo in non-append-only mode (e.g. prune, +delete or create archives from an admin machine), it will remove the deleted objects +permanently (including the ones that were already marked as deleted, but not removed, +in append-only mode). + +Note that you can go back-and-forth between normal and append-only operation by editing +the configuration file, it's not a "one way trip". + +Further considerations +++++++++++++++++++++++ + +Append-only mode is not respected by tools other than Borg. ``rm`` still works on the +repository. Make sure that backup client machines only get to access the repository via +``borg serve``. + +Ensure that no remote access is possible if the repository is temporarily set to normal mode +for e.g. regular pruning. + +Further protections can be implemented, but are outside of Borg's scope. For example, +file system snapshots or wrapping ``borg serve`` to set special permissions or ACLs on +new data files. + +SSH batch mode +~~~~~~~~~~~~~~ + +When running Borg using an automated script, ``ssh`` might still ask for a password, +even if there is an SSH key for the target server. Use this to make scripts more robust:: + + export BORG_RSH='ssh -oBatchMode=yes' diff --git a/docs/usage/prune.rst b/docs/usage/prune.rst new file mode 100644 index 00000000..028f8300 --- /dev/null +++ b/docs/usage/prune.rst @@ -0,0 +1,39 @@ +.. include:: prune.rst.inc + +Examples +~~~~~~~~ + +Be careful, prune is a potentially dangerous command, it will remove backup +archives. + +The default of prune is to apply to **all archives in the repository** unless +you restrict its operation to a subset of the archives using ``--prefix``. +When using ``--prefix``, be careful to choose a good prefix - e.g. do not use a +prefix "foo" if you do not also want to match "foobar". + +It is strongly recommended to always run ``prune -v --list --dry-run ...`` +first so you will see what it would do without it actually doing anything. + +:: + + # Keep 7 end of day and 4 additional end of week archives. + # Do a dry-run without actually deleting anything. + $ borg prune -v --list --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo + + # Same as above but only apply to archive names starting with the hostname + # of the machine followed by a "-" character: + $ borg prune -v --list --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo + + # Keep 7 end of day, 4 additional end of week archives, + # and an end of month archive for every month: + $ borg prune -v --list --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /path/to/repo + + # Keep all backups in the last 10 days, 4 additional end of week archives, + # and an end of month archive for every month: + $ borg prune -v --list --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /path/to/repo + +There is also a visualized prune example in ``docs/misc/prune-example.txt``: + +.. highlight:: none +.. include:: ../misc/prune-example.txt + :literal: diff --git a/docs/usage/prune.rst.inc b/docs/usage/prune.rst.inc index 5c63d44f..f98fbab8 100644 --- a/docs/usage/prune.rst.inc +++ b/docs/usage/prune.rst.inc @@ -4,46 +4,95 @@ borg prune ---------- -:: +.. code-block:: none - borg prune REPOSITORY + borg [common options] prune [options] [REPOSITORY] + +.. only:: html + + .. class:: borg-options-table + + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``REPOSITORY`` | repository to prune | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-n``, ``--dry-run`` | do not change repository | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--force`` | force pruning of corrupted archives | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-s``, ``--stats`` | print statistics for the deleted archive | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--list`` | output verbose list of archives it keeps/prunes | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--keep-within INTERVAL`` | keep all archives within this time interval | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--keep-last``, ``--keep-secondly`` | number of secondly archives to keep | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--keep-minutely`` | number of minutely archives to keep | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-H``, ``--keep-hourly`` | number of hourly archives to keep | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-d``, ``--keep-daily`` | number of daily archives to keep | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-w``, ``--keep-weekly`` | number of weekly archives to keep | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-m``, ``--keep-monthly`` | number of monthly archives to keep | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-y``, ``--keep-yearly`` | number of yearly archives to keep | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--save-space`` | work slower, but using less space | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Archive filters** — Archive filters can be applied to repository targets. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-P PREFIX``, ``--prefix PREFIX`` | only consider archive names starting with this prefix. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-a GLOB``, ``--glob-archives GLOB`` | only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. | + +-----------------------------------------------------------------------------+---------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY repository to prune -optional arguments - ``-n``, ``--dry-run`` - | do not change repository - ``--force`` - | force pruning of corrupted archives - ``-s``, ``--stats`` - | print statistics for the deleted archive - ``--list`` - | output verbose list of archives it keeps/prunes - ``--keep-within WITHIN`` - | keep all archives within this time interval - ``--keep-last``, ``--keep-secondly`` - | number of secondly archives to keep - ``--keep-minutely`` - | number of minutely archives to keep - ``-H``, ``--keep-hourly`` - | number of hourly archives to keep - ``-d``, ``--keep-daily`` - | number of daily archives to keep - ``-w``, ``--keep-weekly`` - | number of weekly archives to keep - ``-m``, ``--keep-monthly`` - | number of monthly archives to keep - ``-y``, ``--keep-yearly`` - | number of yearly archives to keep - ``-P``, ``--prefix`` - | only consider archive names starting with this prefix - ``--save-space`` - | work slower, but using less space -`Common options`_ - | + optional arguments + -n, --dry-run do not change repository + --force force pruning of corrupted archives + -s, --stats print statistics for the deleted archive + --list output verbose list of archives it keeps/prunes + --keep-within INTERVAL keep all archives within this time interval + --keep-last, --keep-secondly number of secondly archives to keep + --keep-minutely number of minutely archives to keep + -H, --keep-hourly number of hourly archives to keep + -d, --keep-daily number of daily archives to keep + -w, --keep-weekly number of weekly archives to keep + -m, --keep-monthly number of monthly archives to keep + -y, --keep-yearly number of yearly archives to keep + --save-space work slower, but using less space + + + :ref:`common_options` + | + + Archive filters + -P PREFIX, --prefix PREFIX only consider archive names starting with this prefix. + -a GLOB, --glob-archives GLOB only consider archive names matching the glob. sh: rules apply, see "borg help patterns". ``--prefix`` and ``--glob-archives`` are mutually exclusive. + Description ~~~~~~~~~~~ @@ -55,25 +104,27 @@ automated backup scripts wanting to keep a certain number of historic backups. Also, prune automatically removes checkpoint archives (incomplete archives left behind by interrupted backup runs) except if the checkpoint is the latest archive (and thus still needed). Checkpoint archives are not considered when -comparing archive counts against the retention limits (--keep-*). +comparing archive counts against the retention limits (``--keep-X``). If a prefix is set with -P, then only archives that start with the prefix are considered for deletion and only those archives count towards the totals specified by the rules. Otherwise, *all* archives in the repository are candidates for deletion! +There is no automatic distinction between archives representing different +contents. These need to be distinguished by specifying matching prefixes. If you have multiple sequences of archives with different data sets (e.g. from different machines) in one shared repository, use one prune call per data set that matches only the respective archives using the -P option. -The "--keep-within" option takes an argument of the form "", -where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means +The ``--keep-within`` option takes an argument of the form "", +where char is "H", "d", "w", "m", "y". For example, ``--keep-within 2d`` means to keep all archives that were created within the past 48 hours. "1m" is taken to mean "31d". The archives kept with this option do not count towards the totals specified by any other options. A good procedure is to thin out more and more the older your backups get. -As an example, "--keep-daily 7" means to keep the latest backup on each day, +As an example, ``--keep-daily 7`` means to keep the latest backup on each day, up to 7 most recent days with backups (days without backups do not count). The rules are applied from secondly to yearly, and backups selected by previous rules do not count towards those of later rules. The time that each backup @@ -81,6 +132,6 @@ starts is used for pruning purposes. Dates and times are interpreted in the local timezone, and weeks go from Monday to Sunday. Specifying a negative number of archives to keep means that there is no limit. -The "--keep-last N" option is doing the same as "--keep-secondly N" (and it will +The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will keep the last N archives under the assumption that you do not create more than one -backup archive in the same second). +backup archive in the same second). \ No newline at end of file diff --git a/docs/usage/recreate.rst b/docs/usage/recreate.rst new file mode 100644 index 00000000..5ff2917f --- /dev/null +++ b/docs/usage/recreate.rst @@ -0,0 +1,34 @@ +.. include:: recreate.rst.inc + +Examples +~~~~~~~~ +:: + + # Make old (Attic / Borg 0.xx) archives deduplicate with Borg 1.x archives. + # Archives created with Borg 1.1+ and the default chunker params are skipped + # (archive ID stays the same). + $ borg recreate /mnt/backup --chunker-params default --progress + + # Create a backup with little but fast compression + $ borg create /mnt/backup::archive /some/files --compression lz4 + # Then compress it - this might take longer, but the backup has already completed, + # so no inconsistencies from a long-running backup job. + $ borg recreate /mnt/backup::archive --recompress --compression zlib,9 + + # Remove unwanted files from all archives in a repository. + # Note the relative path for the --exclude option - archives only contain relative paths. + $ borg recreate /mnt/backup --exclude home/icke/Pictures/drunk_photos + + # Change archive comment + $ borg create --comment "This is a comment" /mnt/backup::archivename ~ + $ borg info /mnt/backup::archivename + Name: archivename + Fingerprint: ... + Comment: This is a comment + ... + $ borg recreate --comment "This is a better comment" /mnt/backup::archivename + $ borg info /mnt/backup::archivename + Name: archivename + Fingerprint: ... + Comment: This is a better comment + ... diff --git a/docs/usage/recreate.rst.inc b/docs/usage/recreate.rst.inc index f4134b0a..ef258e69 100644 --- a/docs/usage/recreate.rst.inc +++ b/docs/usage/recreate.rst.inc @@ -4,101 +4,150 @@ borg recreate ------------- -:: +.. code-block:: none - borg recreate REPOSITORY_OR_ARCHIVE PATH + borg [common options] recreate [options] [REPOSITORY_OR_ARCHIVE] [PATH...] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``REPOSITORY_OR_ARCHIVE`` | repository/archive to recreate | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``PATH`` | paths to recreate; patterns are supported | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--list`` | output verbose list of items (files, dirs, ...) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--filter STATUSCHARS`` | only display items with the given status characters (listed in borg create --help) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-n``, ``--dry-run`` | do not change anything | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-s``, ``--stats`` | print statistics at end | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Exclusion options** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-e PATTERN``, ``--exclude PATTERN`` | exclude paths matching PATTERN | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-from EXCLUDEFILE`` | read exclude patterns from EXCLUDEFILE, one per line | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--pattern PATTERN`` | experimental: include/exclude paths matching PATTERN | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--patterns-from PATTERNFILE`` | experimental: read include/exclude patterns from PATTERNFILE, one per line | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-caches`` | exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--exclude-if-present NAME`` | exclude directories that are tagged by containing a filesystem object with the given NAME | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--keep-exclude-tags``, ``--keep-tag-files`` | if tag objects are specified with ``--exclude-if-present``, don't omit the tag objects themselves from the backup archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **Archive options** | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--target TARGET`` | create a new archive with the name ARCHIVE, do not replace existing archive (only applies for a single archive) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-c SECONDS``, ``--checkpoint-interval SECONDS`` | write checkpoint every SECONDS seconds (Default: 1800) | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--comment COMMENT`` | add a comment text to the archive | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--timestamp TIMESTAMP`` | manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). alternatively, give a reference file/directory. | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``-C COMPRESSION``, ``--compression COMPRESSION`` | select compression algorithm, see the output of the "borg help compression" command for details. | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--recompress`` | recompress data chunks according to ``--compression`` if `if-different`. When `always`, chunks that are already compressed that way are not skipped, but compressed again. Only the algorithm is considered for `if-different`, not the compression level (if any). | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--chunker-params PARAMS`` | specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE) or `default` to use the current defaults. default: 19,23,21,4095 | + +-------------------------------------------------------+---------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY_OR_ARCHIVE repository/archive to recreate PATH paths to recreate; patterns are supported -optional arguments - ``--list`` - | output verbose list of items (files, dirs, ...) - ``--filter STATUSCHARS`` - | only display items with the given status characters - ``-p``, ``--progress`` - | show progress display while recreating archives - ``-n``, ``--dry-run`` - | do not change anything - ``-s``, ``--stats`` - | print statistics at end -`Common options`_ - | + optional arguments + --list output verbose list of items (files, dirs, ...) + --filter STATUSCHARS only display items with the given status characters (listed in borg create --help) + -n, --dry-run do not change anything + -s, --stats print statistics at end -Exclusion options - ``-e PATTERN``, ``--exclude PATTERN`` - | exclude paths matching PATTERN - ``--exclude-from EXCLUDEFILE`` - | read exclude patterns from EXCLUDEFILE, one per line - ``--exclude-caches`` - | exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) - ``--exclude-if-present FILENAME`` - | exclude directories that contain the specified file - ``--keep-tag-files`` - | keep tag files of excluded caches/directories -Archive options - ``--comment COMMENT`` - | add a comment text to the archive - ``--timestamp yyyy-mm-ddThh:mm:ss`` - | manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory. - ``-C COMPRESSION``, ``--compression COMPRESSION`` - | select compression algorithm (and level): - | none == no compression (default), - | auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L] - | being any valid compression algorithm (and optional level), - | lz4 == lz4, - | zlib == zlib (default level 6), - | zlib,0 .. zlib,9 == zlib (with level 0..9), - | lzma == lzma (default level 6), - | lzma,0 .. lzma,9 == lzma (with level 0..9). - ``--compression-from COMPRESSIONCONFIG`` - | read compression patterns from COMPRESSIONCONFIG, one per line - ``--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE`` - | specify the chunker parameters (or "default"). + :ref:`common_options` + | + + Exclusion options + -e PATTERN, --exclude PATTERN exclude paths matching PATTERN + --exclude-from EXCLUDEFILE read exclude patterns from EXCLUDEFILE, one per line + --pattern PATTERN experimental: include/exclude paths matching PATTERN + --patterns-from PATTERNFILE experimental: read include/exclude patterns from PATTERNFILE, one per line + --exclude-caches exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html) + --exclude-if-present NAME exclude directories that are tagged by containing a filesystem object with the given NAME + --keep-exclude-tags, --keep-tag-files if tag objects are specified with ``--exclude-if-present``, don't omit the tag objects themselves from the backup archive + + + Archive options + --target TARGET create a new archive with the name ARCHIVE, do not replace existing archive (only applies for a single archive) + -c SECONDS, --checkpoint-interval SECONDS write checkpoint every SECONDS seconds (Default: 1800) + --comment COMMENT add a comment text to the archive + --timestamp TIMESTAMP manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). alternatively, give a reference file/directory. + -C COMPRESSION, --compression COMPRESSION select compression algorithm, see the output of the "borg help compression" command for details. + --recompress recompress data chunks according to ``--compression`` if `if-different`. When `always`, chunks that are already compressed that way are not skipped, but compressed again. Only the algorithm is considered for `if-different`, not the compression level (if any). + --chunker-params PARAMS specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE) or `default` to use the current defaults. default: 19,23,21,4095 + Description ~~~~~~~~~~~ Recreate the contents of existing archives. ---exclude, --exclude-from and PATH have the exact same semantics -as in "borg create". If PATHs are specified the resulting archive -will only contain files from these PATHs. +This is an *experimental* feature. Do *not* use this on your only backup. ---compression: all chunks seen will be stored using the given method. +``--exclude``, ``--exclude-from``, ``--exclude-if-present``, ``--keep-exclude-tags``, and PATH +have the exact same semantics as in "borg create". If PATHs are specified the +resulting archive will only contain files from these PATHs. + +Note that all paths in an archive are relative, therefore absolute patterns/paths +will *not* match (``--exclude``, ``--exclude-from``, PATHs). + +``--recompress`` allows to change the compression of existing data in archives. Due to how Borg stores compressed size information this might display incorrect information for archives that were not recreated at the same time. There is no risk of data loss by this. ---chunker-params will re-chunk all files in the archive, this can be +``--chunker-params`` will re-chunk all files in the archive, this can be used to have upgraded Borg 0.xx or Attic archives deduplicate with Borg 1.x archives. -borg recreate is signal safe. Send either SIGINT (Ctrl-C on most terminals) or -SIGTERM to request termination. - -Use the *exact same* command line to resume the operation later - changing excludes -or paths will lead to inconsistencies (changed excludes will only apply to newly -processed files/dirs). Changing compression leads to incorrect size information -(which does not cause any data loss, but can be misleading). -Changing chunker params between invocations might lead to data loss. - -USE WITH CAUTION. +**USE WITH CAUTION.** Depending on the PATHs and patterns given, recreate can be used to permanently delete files from archives. -When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are +When in doubt, use ``--dry-run --verbose --list`` to see how patterns/PATHS are interpreted. The archive being recreated is only removed after the operation completes. The archive that is built during the operation exists at the same time at ".recreate". The new archive will have a different archive ID. +With ``--target`` the original archive is not replaced, instead a new archive is created. + When rechunking space usage can be substantial, expect at least the entire deduplicated size of the archives using the previous chunker params. -When recompressing approximately 1 % of the repository size or 512 MB -(whichever is greater) of additional space is used. +When recompressing expect approx. (throughput / checkpoint-interval) in space usage, +assuming all chunks are recompressed. \ No newline at end of file diff --git a/docs/usage/rename.rst b/docs/usage/rename.rst new file mode 100644 index 00000000..456e8fca --- /dev/null +++ b/docs/usage/rename.rst @@ -0,0 +1,13 @@ +.. include:: rename.rst.inc + +Examples +~~~~~~~~ +:: + + $ borg create /path/to/repo::archivename ~ + $ borg list /path/to/repo + archivename Mon, 2016-02-15 19:50:19 + + $ borg rename /path/to/repo::archivename newname + $ borg list /path/to/repo + newname Mon, 2016-02-15 19:50:19 diff --git a/docs/usage/rename.rst.inc b/docs/usage/rename.rst.inc index 3cff5a8a..e68c7f78 100644 --- a/docs/usage/rename.rst.inc +++ b/docs/usage/rename.rst.inc @@ -4,22 +4,48 @@ borg rename ----------- -:: +.. code-block:: none - borg rename ARCHIVE NEWNAME + borg [common options] rename [options] ARCHIVE NEWNAME + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+-------------+-----------------------------+ + | **positional arguments** | + +-------------------------------------------------------+-------------+-----------------------------+ + | | ``ARCHIVE`` | archive to rename | + +-------------------------------------------------------+-------------+-----------------------------+ + | | ``NEWNAME`` | the new archive name to use | + +-------------------------------------------------------+-------------+-----------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+-------------+-----------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments ARCHIVE archive to rename NEWNAME the new archive name to use -`Common options`_ - | + + :ref:`common_options` + | Description ~~~~~~~~~~~ This command renames an archive in the repository. -This results in a different archive ID. +This results in a different archive ID. \ No newline at end of file diff --git a/docs/usage/serve.rst b/docs/usage/serve.rst new file mode 100644 index 00000000..7adc8f66 --- /dev/null +++ b/docs/usage/serve.rst @@ -0,0 +1,41 @@ +.. include:: serve.rst.inc + +Examples +~~~~~~~~ + +borg serve has special support for ssh forced commands (see ``authorized_keys`` +example below): it will detect that you use such a forced command and extract +the value of the ``--restrict-to-path`` option(s). + +It will then parse the original command that came from the client, makes sure +that it is also ``borg serve`` and enforce path restriction(s) as given by the +forced command. That way, other options given by the client (like ``--info`` or +``--umask``) are preserved (and are not fixed by the forced command). + +Environment variables (such as BORG_HOSTNAME_IS_UNIQUE) contained in the original +command sent by the client are *not* interpreted, but ignored. If BORG_XXX environment +variables should be set on the ``borg serve`` side, then these must be set in system-specific +locations like ``/etc/environment`` or in the forced command itself (example below). + +:: + + # Allow an SSH keypair to only run borg, and only have access to /path/to/repo. + # Use key options to disable unneeded and potentially dangerous SSH functionality. + # This will help to secure an automated remote backup system. + $ cat ~/.ssh/authorized_keys + command="borg serve --restrict-to-path /path/to/repo",restrict ssh-rsa AAAAB3[...] + + # Set a BORG_XXX environment variable on the "borg serve" side + $ cat ~/.ssh/authorized_keys + command="export BORG_XXX=value; borg serve [...]",restrict ssh-rsa [...] + +.. note:: + The examples above use the ``restrict`` directive. This does automatically + block potential dangerous ssh features, even when they are added in a future + update. Thus, this option should be preferred. + + If you're using openssh-server < 7.2, however, you have to explicitly specify + the ssh features to restrict and cannot simply use the restrict option as it + has been introduced in v7.2. We recommend to use + ``no-port-forwarding,no-X11-forwarding,no-pty,no-agent-forwarding,no-user-rc`` + in this case. diff --git a/docs/usage/serve.rst.inc b/docs/usage/serve.rst.inc index 933f72b9..50bf68b7 100644 --- a/docs/usage/serve.rst.inc +++ b/docs/usage/serve.rst.inc @@ -4,20 +4,53 @@ borg serve ---------- -:: +.. code-block:: none - borg serve + borg [common options] serve [options] -optional arguments - ``--restrict-to-path PATH`` - | restrict repository access to PATH - ``--append-only`` - | only allow appending to repository segment files +.. only:: html -`Common options`_ - | + .. class:: borg-options-table + + +-------------------------------------------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--restrict-to-path PATH`` | restrict repository access to PATH. Can be specified multiple times to allow the client access to several directories. Access to all sub-directories is granted implicitly; PATH doesn't need to directly point to a repository. | + +-------------------------------------------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--restrict-to-repository PATH`` | restrict repository access. Only the repository located at PATH (no sub-directories are considered) is accessible. Can be specified multiple times to allow the client access to several repositories. Unlike ``--restrict-to-path`` sub-directories are not accessible; PATH needs to directly point at a repository location. PATH may be an empty directory or the last element of PATH may not exist, in which case the client may initialize a repository there. | + +-------------------------------------------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--append-only`` | only allow appending to repository segment files | + +-------------------------------------------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | ``--storage-quota QUOTA`` | Override storage quota of the repository (e.g. 5G, 1.5T). When a new repository is initialized, sets the storage quota on the new repository as well. Default: no quota. | + +-------------------------------------------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex + + + + optional arguments + --restrict-to-path PATH restrict repository access to PATH. Can be specified multiple times to allow the client access to several directories. Access to all sub-directories is granted implicitly; PATH doesn't need to directly point to a repository. + --restrict-to-repository PATH restrict repository access. Only the repository located at PATH (no sub-directories are considered) is accessible. Can be specified multiple times to allow the client access to several repositories. Unlike ``--restrict-to-path`` sub-directories are not accessible; PATH needs to directly point at a repository location. PATH may be an empty directory or the last element of PATH may not exist, in which case the client may initialize a repository there. + --append-only only allow appending to repository segment files + --storage-quota QUOTA Override storage quota of the repository (e.g. 5G, 1.5T). When a new repository is initialized, sets the storage quota on the new repository as well. Default: no quota. + + + :ref:`common_options` + | Description ~~~~~~~~~~~ -This command starts a repository server process. This command is usually not used manually. +This command starts a repository server process. This command is usually not used manually. \ No newline at end of file diff --git a/docs/usage/tar.rst b/docs/usage/tar.rst new file mode 100644 index 00000000..3b63afb4 --- /dev/null +++ b/docs/usage/tar.rst @@ -0,0 +1,21 @@ +.. include:: export-tar.rst.inc + +Examples +~~~~~~~~ +:: + + # export as uncompressed tar + $ borg export-tar /path/to/repo::Monday Monday.tar + + # exclude some types, compress using gzip + $ borg export-tar /path/to/repo::Monday Monday.tar.gz --exclude '*.so' + + # use higher compression level with gzip + $ borg export-tar testrepo::linux --tar-filter="gzip -9" Monday.tar.gz + + # export a gzipped tar, but instead of storing it on disk, + # upload it to a remote site using curl. + $ borg export-tar ... --tar-filter="gzip" - | curl --data-binary @- https://somewhere/to/POST + + # remote extraction via "tarpipe" + $ borg export-tar /path/to/repo::Monday - | ssh somewhere "cd extracted; tar x" diff --git a/docs/usage/umount.rst.inc b/docs/usage/umount.rst.inc new file mode 100644 index 00000000..151d76a8 --- /dev/null +++ b/docs/usage/umount.rst.inc @@ -0,0 +1,48 @@ +.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit! + +.. _borg_umount: + +borg umount +----------- +.. code-block:: none + + borg [common options] umount [options] MOUNTPOINT + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------------+----------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+----------------+----------------------------------------+ + | | ``MOUNTPOINT`` | mountpoint of the filesystem to umount | + +-------------------------------------------------------+----------------+----------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------------+----------------------------------------+ + + .. raw:: html + + + +.. only:: latex + + MOUNTPOINT + mountpoint of the filesystem to umount + + + :ref:`common_options` + | + +Description +~~~~~~~~~~~ + +This command un-mounts a FUSE filesystem that was mounted with ``borg mount``. + +This is a convenience wrapper that just calls the platform-specific shell +command - usually this is either umount or fusermount -u. \ No newline at end of file diff --git a/docs/usage/upgrade.rst b/docs/usage/upgrade.rst new file mode 100644 index 00000000..044c81cf --- /dev/null +++ b/docs/usage/upgrade.rst @@ -0,0 +1,30 @@ +.. include:: upgrade.rst.inc + +Examples +~~~~~~~~ +:: + + # Upgrade the borg repository to the most recent version. + $ borg upgrade -v /path/to/repo + making a hardlink copy in /path/to/repo.before-upgrade-2016-02-15-20:51:55 + opening attic repository with borg and converting + no key file found for repository + converting repo index /path/to/repo/index.0 + converting 1 segments... + converting borg 0.xx to borg current + no key file found for repository + +.. _borg_key_migrate-to-repokey: + +Upgrading a passphrase encrypted attic repo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +attic offered a "passphrase" encryption mode, but this was removed in borg 1.0 +and replaced by the "repokey" mode (which stores the passphrase-protected +encryption key into the repository config). + +Thus, to upgrade a "passphrase" attic repo to a "repokey" borg repo, 2 steps +are needed, in this order: + +- borg upgrade repo +- borg key migrate-to-repokey repo diff --git a/docs/usage/upgrade.rst.inc b/docs/usage/upgrade.rst.inc index 525c5ebd..eafa4362 100644 --- a/docs/usage/upgrade.rst.inc +++ b/docs/usage/upgrade.rst.inc @@ -4,37 +4,116 @@ borg upgrade ------------ -:: +.. code-block:: none - borg upgrade REPOSITORY + borg [common options] upgrade [options] [REPOSITORY] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | **positional arguments** | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | | ``REPOSITORY`` | path to the repository to be upgraded | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | **optional arguments** | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | | ``-n``, ``--dry-run`` | do not change repository | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | | ``--inplace`` | rewrite repository in place, with no chance of going back to older versions of the repository. | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | | ``--force`` | Force upgrade | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | | ``--tam`` | Enable manifest authentication (in key and cache) (Borg 1.0.9 and later). | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | | ``--disable-tam`` | Disable manifest authentication (in key and cache). | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+-----------------------+------------------------------------------------------------------------------------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY path to the repository to be upgraded -optional arguments - ``-p``, ``--progress`` - | show progress display while upgrading the repository - ``-n``, ``--dry-run`` - | do not change repository - ``-i``, ``--inplace`` - | rewrite repository in place, with no chance of going back to older - | versions of the repository. -`Common options`_ - | + optional arguments + -n, --dry-run do not change repository + --inplace rewrite repository in place, with no chance of going back to older versions of the repository. + --force Force upgrade + --tam Enable manifest authentication (in key and cache) (Borg 1.0.9 and later). + --disable-tam Disable manifest authentication (in key and cache). + + + :ref:`common_options` + | Description ~~~~~~~~~~~ -Upgrade an existing Borg repository. +Upgrade an existing, local Borg repository. + +When you do not need borg upgrade ++++++++++++++++++++++++++++++++++ + +Not every change requires that you run ``borg upgrade``. + +You do **not** need to run it when: + +- moving your repository to a different place +- upgrading to another point release (like 1.0.x to 1.0.y), + except when noted otherwise in the changelog +- upgrading from 1.0.x to 1.1.x, + except when noted otherwise in the changelog + +Borg 1.x.y upgrades ++++++++++++++++++++ + +Use ``borg upgrade --tam REPO`` to require manifest authentication +introduced with Borg 1.0.9 to address security issues. This means +that modifying the repository after doing this with a version prior +to 1.0.9 will raise a validation error, so only perform this upgrade +after updating all clients using the repository to 1.0.9 or newer. + +This upgrade should be done on each client for safety reasons. + +If a repository is accidentally modified with a pre-1.0.9 client after +this upgrade, use ``borg upgrade --tam --force REPO`` to remedy it. + +If you routinely do this you might not want to enable this upgrade +(which will leave you exposed to the security issue). You can +reverse the upgrade by issuing ``borg upgrade --disable-tam REPO``. + +See +https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability +for details. + +Attic and Borg 0.xx to Borg 1.x ++++++++++++++++++++++++++++++++ + This currently supports converting an Attic repository to Borg and also helps with converting Borg 0.xx to 1.0. Currently, only LOCAL repositories can be upgraded (issue #465). -It will change the magic strings in the repository's segments -to match the new Borg magic strings. The keyfiles found in +Please note that ``borg create`` (since 1.0.0) uses bigger chunks by +default than old borg or attic did, so the new chunks won't deduplicate +with the old chunks in the upgraded repository. +See ``--chunker-params`` option of ``borg create`` and ``borg recreate``. + +``borg upgrade`` will change the magic strings in the repository's +segments to match the new Borg magic strings. The keyfiles found in $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.config/borg/keys. @@ -53,7 +132,7 @@ make sure the cache files are also removed: Unless ``--inplace`` is specified, the upgrade process first creates a backup copy of the repository, in -REPOSITORY.upgrade-DATETIME, using hardlinks. This takes +REPOSITORY.before-upgrade-DATETIME, using hardlinks. This takes longer than in place upgrades, but is much safer and gives progress information (as opposed to ``cp -al``). Once you are satisfied with the conversion, you can safely destroy the diff --git a/docs/usage/with-lock.rst.inc b/docs/usage/with-lock.rst.inc index c77eb2f6..fc454116 100644 --- a/docs/usage/with-lock.rst.inc +++ b/docs/usage/with-lock.rst.inc @@ -4,11 +4,38 @@ borg with-lock -------------- -:: +.. code-block:: none - borg with-lock REPOSITORY COMMAND ARGS + borg [common options] with-lock [options] REPOSITORY COMMAND [ARGS...] + +.. only:: html + + .. class:: borg-options-table + + +-------------------------------------------------------+----------------+--------------------+ + | **positional arguments** | + +-------------------------------------------------------+----------------+--------------------+ + | | ``REPOSITORY`` | repository to lock | + +-------------------------------------------------------+----------------+--------------------+ + | | ``COMMAND`` | command to run | + +-------------------------------------------------------+----------------+--------------------+ + | | ``ARGS`` | command arguments | + +-------------------------------------------------------+----------------+--------------------+ + | .. class:: borg-common-opt-ref | + | | + | :ref:`common_options` | + +-------------------------------------------------------+----------------+--------------------+ + + .. raw:: html + + + +.. only:: latex -positional arguments REPOSITORY repository to lock COMMAND @@ -16,8 +43,9 @@ positional arguments ARGS command arguments -`Common options`_ - | + + :ref:`common_options` + | Description ~~~~~~~~~~~ @@ -29,6 +57,8 @@ running in the repo), then execute the given command as a subprocess and wait for its termination, release the lock and return the user command's return code as borg's return code. -Note: if you copy a repository with the lock held, the lock will be present in - the copy, obviously. Thus, before using borg on the copy, you need to - use "borg break-lock" on it. +.. note:: + + If you copy a repository with the lock held, the lock will be present in + the copy, obviously. Thus, before using borg on the copy, you need to + use "borg break-lock" on it. \ No newline at end of file diff --git a/docs/usage_general.rst.inc b/docs/usage_general.rst.inc new file mode 100644 index 00000000..59b0d0eb --- /dev/null +++ b/docs/usage_general.rst.inc @@ -0,0 +1,423 @@ +Repository URLs +~~~~~~~~~~~~~~~ + +**Local filesystem** (or locally mounted network filesystem): + +``/path/to/repo`` - filesystem path to repo directory, absolute path + +``path/to/repo`` - filesystem path to repo directory, relative path + +Also, stuff like ``~/path/to/repo`` or ``~other/path/to/repo`` works (this is +expanded by your shell). + +Note: you may also prepend a ``file://`` to a filesystem path to get URL style. + +**Remote repositories** accessed via ssh user@host: + +``user@host:/path/to/repo`` - remote repo, absolute path + +``ssh://user@host:port/path/to/repo`` - same, alternative syntax, port can be given + + +**Remote repositories with relative paths** can be given using this syntax: + +``user@host:path/to/repo`` - path relative to current directory + +``user@host:~/path/to/repo`` - path relative to user's home directory + +``user@host:~other/path/to/repo`` - path relative to other's home directory + +Note: giving ``user@host:/./path/to/repo`` or ``user@host:/~/path/to/repo`` or +``user@host:/~other/path/to/repo`` is also supported, but not required here. + + +**Remote repositories with relative paths, alternative syntax with port**: + +``ssh://user@host:port/./path/to/repo`` - path relative to current directory + +``ssh://user@host:port/~/path/to/repo`` - path relative to user's home directory + +``ssh://user@host:port/~other/path/to/repo`` - path relative to other's home directory + + +If you frequently need the same repo URL, it is a good idea to set the +``BORG_REPO`` environment variable to set a default for the repo URL: + +:: + + export BORG_REPO='ssh://user@host:port/path/to/repo' + +Then just leave away the repo URL if only a repo URL is needed and you want +to use the default - it will be read from BORG_REPO then. + +Use ``::`` syntax to give the repo URL when syntax requires giving a positional +argument for the repo (e.g. ``borg mount :: /mnt``). + + +Repository / Archive Locations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many commands want either a repository (just give the repo URL, see above) or +an archive location, which is a repo URL followed by ``::archive_name``. + +Archive names must not contain the ``/`` (slash) character. For simplicity, +maybe also avoid blanks or other characters that have special meaning on the +shell or in a filesystem (borg mount will use the archive name as directory +name). + +If you have set BORG_REPO (see above) and an archive location is needed, use +``::archive_name`` - the repo URL part is then read from BORG_REPO. + + +Type of log output +~~~~~~~~~~~~~~~~~~ + +The log level of the builtin logging configuration defaults to WARNING. +This is because we want Borg to be mostly silent and only output +warnings, errors and critical messages, unless output has been requested +by supplying an option that implies output (e.g. ``--list`` or ``--progress``). + +Log levels: DEBUG < INFO < WARNING < ERROR < CRITICAL + +Use ``--debug`` to set DEBUG log level - +to get debug, info, warning, error and critical level output. + +Use ``--info`` (or ``-v`` or ``--verbose``) to set INFO log level - +to get info, warning, error and critical level output. + +Use ``--warning`` (default) to set WARNING log level - +to get warning, error and critical level output. + +Use ``--error`` to set ERROR log level - +to get error and critical level output. + +Use ``--critical`` to set CRITICAL log level - +to get critical level output. + +While you can set misc. log levels, do not expect that every command will +give different output on different log levels - it's just a possibility. + +.. warning:: Options ``--critical`` and ``--error`` are provided for completeness, + their usage is not recommended as you might miss important information. + +Return codes +~~~~~~~~~~~~ + +Borg can exit with the following return codes (rc): + +=========== ======= +Return code Meaning +=========== ======= +0 success (logged as INFO) +1 warning (operation reached its normal end, but there were warnings -- + you should check the log, logged as WARNING) +2 error (like a fatal error, a local or remote exception, the operation + did not reach its normal end, logged as ERROR) +128+N killed by signal N (e.g. 137 == kill -9) +=========== ======= + +If you use ``--show-rc``, the return code is also logged at the indicated +level as the last log entry. + +.. _env_vars: + +Environment Variables +~~~~~~~~~~~~~~~~~~~~~ + +Borg uses some environment variables for automation: + +General: + BORG_REPO + When set, use the value to give the default repository location. If a command needs an archive + parameter, you can abbreviate as ``::archive``. If a command needs a repository parameter, you + can either leave it away or abbreviate as ``::``, if a positional parameter is required. + BORG_PASSPHRASE + When set, use the value to answer the passphrase question for encrypted repositories. + It is used when a passphrase is needed to access an encrypted repo as well as when a new + passphrase should be initially set when initializing an encrypted repo. + See also BORG_NEW_PASSPHRASE. + BORG_PASSCOMMAND + When set, use the standard output of the command (trailing newlines are stripped) to answer the + passphrase question for encrypted repositories. + It is used when a passphrase is needed to access an encrypted repo as well as when a new + passphrase should be initially set when initializing an encrypted repo. + If BORG_PASSPHRASE is also set, it takes precedence. + See also BORG_NEW_PASSPHRASE. + BORG_NEW_PASSPHRASE + When set, use the value to answer the passphrase question when a **new** passphrase is asked for. + This variable is checked first. If it is not set, BORG_PASSPHRASE and BORG_PASSCOMMAND will also + be checked. + Main usecase for this is to fully automate ``borg change-passphrase``. + BORG_DISPLAY_PASSPHRASE + When set, use the value to answer the "display the passphrase for verification" question when defining a new passphrase for encrypted repositories. + BORG_HOSTNAME_IS_UNIQUE=no + Borg assumes that it can derive a unique hostname / identity (see ``borg debug info``). + If this is not the case or you do not want Borg to automatically remove stale locks, + set this to *no*. + BORG_LOGGING_CONF + When set, use the given filename as INI_-style logging configuration. + BORG_RSH + When set, use this command instead of ``ssh``. This can be used to specify ssh options, such as + a custom identity file ``ssh -i /path/to/private/key``. See ``man ssh`` for other options. + BORG_REMOTE_PATH + When set, use the given path as borg executable on the remote (defaults to "borg" if unset). + Using ``--remote-path PATH`` commandline option overrides the environment variable. + BORG_FILES_CACHE_TTL + When set to a numeric value, this determines the maximum "time to live" for the files cache + entries (default: 20). The files cache is used to quickly determine whether a file is unchanged. + The FAQ explains this more detailed in: :ref:`always_chunking` + TMPDIR + where temporary files are stored (might need a lot of temporary space for some operations) + +Some automatic "answerers" (if set, they automatically answer confirmation questions): + BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=no (or =yes) + For "Warning: Attempting to access a previously unknown unencrypted repository" + BORG_RELOCATED_REPO_ACCESS_IS_OK=no (or =yes) + For "Warning: The repository at location ... was previously located at ..." + BORG_CHECK_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) + For "Warning: 'check --repair' is an experimental feature that might result in data loss." + BORG_DELETE_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) + For "You requested to completely DELETE the repository *including* all archives it contains:" + BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) + For "recreate is an experimental feature." + + Note: answers are case sensitive. setting an invalid answer value might either give the default + answer or ask you interactively, depending on whether retries are allowed (they by default are + allowed). So please test your scripts interactively before making them a non-interactive script. + +Directories and files: + BORG_KEYS_DIR + Default to '~/.config/borg/keys'. This directory contains keys for encrypted repositories. + BORG_KEY_FILE + When set, use the given filename as repository key file. + BORG_SECURITY_DIR + Default to '~/.config/borg/security'. This directory contains information borg uses to + track its usage of NONCES ("numbers used once" - usually in encryption context) and other + security relevant data. + BORG_CACHE_DIR + Default to '~/.cache/borg'. This directory contains the local cache and might need a lot + of space for dealing with big repositories). + BORG_CONFIG_DIR + Default to '~/.config/borg'. This directory contains the whole config directories. + +Building: + BORG_OPENSSL_PREFIX + Adds given OpenSSL header file directory to the default locations (setup.py). + BORG_LZ4_PREFIX + Adds given LZ4 header file directory to the default locations (setup.py). + BORG_LIBB2_PREFIX + Adds given prefix directory to the default locations. If a 'include/blake2.h' is found Borg + will be linked against the system libb2 instead of a bundled implementation. (setup.py) + + +Please note: + +- be very careful when using the "yes" sayers, the warnings with prompt exist for your / your data's security/safety +- also be very careful when putting your passphrase into a script, make sure it has appropriate file permissions + (e.g. mode 600, root:root). + + +.. _INI: https://docs.python.org/3.5/library/logging.config.html#configuration-file-format + +.. _file-systems: + +File systems +~~~~~~~~~~~~ + +We strongly recommend against using Borg (or any other database-like +software) on non-journaling file systems like FAT, since it is not +possible to assume any consistency in case of power failures (or a +sudden disconnect of an external drive or similar failures). + +While Borg uses a data store that is resilient against these failures +when used on journaling file systems, it is not possible to guarantee +this with some hardware -- independent of the software used. We don't +know a list of affected hardware. + +If you are suspicious whether your Borg repository is still consistent +and readable after one of the failures mentioned above occurred, run +``borg check --verify-data`` to make sure it is consistent. + +.. rubric:: Requirements for Borg repository file systems + +- Long file names +- At least three directory levels with short names +- Typically, file sizes up to a few hundred MB. + Large repositories may require large files (>2 GB). +- Up to 1000 files per directory (10000 for repositories initialized with Borg 1.0) +- mkdir(2) should be atomic, since it is used for locking +- Hardlinks are needed for :ref:`borg_upgrade` ``--inplace`` + +Units +~~~~~ + +To display quantities, Borg takes care of respecting the +usual conventions of scale. Disk sizes are displayed in `decimal +`_, using powers of ten (so +``kB`` means 1000 bytes). For memory usage, `binary prefixes +`_ are used, and are +indicated using the `IEC binary prefixes +`_, +using powers of two (so ``KiB`` means 1024 bytes). + +Date and Time +~~~~~~~~~~~~~ + +We format date and time conforming to ISO-8601, that is: YYYY-MM-DD and +HH:MM:SS (24h clock). + +For more information about that, see: https://xkcd.com/1179/ + +Unless otherwise noted, we display local date and time. +Internally, we store and process date and time as UTC. + +Resource Usage +~~~~~~~~~~~~~~ + +Borg might use a lot of resources depending on the size of the data set it is dealing with. + +If one uses Borg in a client/server way (with a ssh: repository), +the resource usage occurs in part on the client and in another part on the +server. + +If one uses Borg as a single process (with a filesystem repo), +all the resource usage occurs in that one process, so just add up client + +server to get the approximate resource usage. + +CPU client: + borg create: does chunking, hashing, compression, crypto (high CPU usage) + chunks cache sync: quite heavy on CPU, doing lots of hashtable operations. + borg extract: crypto, decompression (medium to high CPU usage) + borg check: similar to extract, but depends on options given. + borg prune / borg delete archive: low to medium CPU usage + borg delete repo: done on the server + It won't go beyond 100% of 1 core as the code is currently single-threaded. + Especially higher zlib and lzma compression levels use significant amounts + of CPU cycles. Crypto might be cheap on the CPU (if hardware accelerated) or + expensive (if not). + +CPU server: + It usually doesn't need much CPU, it just deals with the key/value store + (repository) and uses the repository index for that. + + borg check: the repository check computes the checksums of all chunks + (medium CPU usage) + borg delete repo: low CPU usage + +CPU (only for client/server operation): + When using borg in a client/server way with a ssh:-type repo, the ssh + processes used for the transport layer will need some CPU on the client and + on the server due to the crypto they are doing - esp. if you are pumping + big amounts of data. + +Memory (RAM) client: + The chunks index and the files index are read into memory for performance + reasons. Might need big amounts of memory (see below). + Compression, esp. lzma compression with high levels might need substantial + amounts of memory. + +Memory (RAM) server: + The server process will load the repository index into memory. Might need + considerable amounts of memory, but less than on the client (see below). + +Chunks index (client only): + Proportional to the amount of data chunks in your repo. Lots of chunks + in your repo imply a big chunks index. + It is possible to tweak the chunker params (see create options). + +Files index (client only): + Proportional to the amount of files in your last backups. Can be switched + off (see create options), but next backup might be much slower if you do. + The speed benefit of using the files cache is proportional to file size. + +Repository index (server only): + Proportional to the amount of data chunks in your repo. Lots of chunks + in your repo imply a big repository index. + It is possible to tweak the chunker params (see create options) to + influence the amount of chunks being created. + +Temporary files (client): + Reading data and metadata from a FUSE mounted repository will consume up to + the size of all deduplicated, small chunks in the repository. Big chunks + won't be locally cached. + +Temporary files (server): + None. + +Cache files (client only): + Contains the chunks index and files index (plus a collection of single- + archive chunk indexes which might need huge amounts of disk space, + depending on archive count and size - see FAQ about how to reduce). + +Network (only for client/server operation): + If your repository is remote, all deduplicated (and optionally compressed/ + encrypted) data of course has to go over the connection (``ssh://`` repo url). + If you use a locally mounted network filesystem, additionally some copy + operations used for transaction support also go over the connection. If + you backup multiple sources to one target repository, additional traffic + happens for cache resynchronization. + +.. _platforms: + +Support for file metadata +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Besides regular file and directory structures, Borg can preserve + +* symlinks (stored as symlink, the symlink is not followed) +* special files: + + * character and block device files (restored via mknod) + * FIFOs ("named pipes") + * special file *contents* can be backed up in ``--read-special`` mode. + By default the metadata to create them with mknod(2), mkfifo(2) etc. is stored. +* hardlinked regular files, devices, FIFOs (considering all items in the same archive) +* timestamps in nanosecond precision: mtime, atime, ctime +* permissions: + + * IDs of owning user and owning group + * names of owning user and owning group (if the IDs can be resolved) + * Unix Mode/Permissions (u/g/o permissions, suid, sgid, sticky) + +On some platforms additional features are supported: + +.. Yes/No's are grouped by reason/mechanism/reference. + ++------------------+----------+-----------+------------+ +| Platform | ACLs | xattr | Flags | +| | [#acls]_ | [#xattr]_ | [#flags]_ | ++==================+==========+===========+============+ +| Linux | Yes | Yes | Yes [1]_ | ++------------------+----------+-----------+------------+ +| Mac OS X | Yes | Yes | Yes (all) | ++------------------+----------+-----------+ | +| FreeBSD | Yes | Yes | | ++------------------+----------+-----------+ | +| OpenBSD | n/a | n/a | | ++------------------+----------+-----------+ | +| NetBSD | n/a | No [2]_ | | ++------------------+----------+-----------+------------+ +| Solaris 11 | No [3]_ | n/a | ++------------------+ | | +| OpenIndiana | | | ++------------------+----------+-----------+------------+ +| Windows (cygwin) | No [4]_ | No | No | ++------------------+----------+-----------+------------+ + +Other Unix-like operating systems may work as well, but have not been tested at all. + +Note that most of the platform-dependent features also depend on the file system. +For example, ntfs-3g on Linux isn't able to convey NTFS ACLs. + +.. [1] Only "nodump", "immutable", "compressed" and "append" are supported. + Feature request :issue:`618` for more flags. +.. [2] Feature request :issue:`1332` +.. [3] Feature request :issue:`1337` +.. [4] Cygwin tries to map NTFS ACLs to permissions with varying degress of success. + +.. [#acls] The native access control list mechanism of the OS. This normally limits access to + non-native ACLs. For example, NTFS ACLs aren't completely accessible on Linux with ntfs-3g. +.. [#xattr] extended attributes; key-value pairs attached to a file, mainly used by the OS. + This includes resource forks on Mac OS X. +.. [#flags] aka *BSD flags*. The Linux set of flags [1]_ is portable across platforms. + The BSDs define additional flags. diff --git a/requirements.d/attic.txt b/requirements.d/attic.txt deleted file mode 100644 index b5068ffd..00000000 --- a/requirements.d/attic.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Please note: -# attic only builds using OpenSSL 1.0.x, it can not be installed using OpenSSL >= 1.1.0. -# If attic is not installed, our unit tests will just skip the tests that require attic. -attic - diff --git a/requirements.d/coala.txt b/requirements.d/coala.txt new file mode 100644 index 00000000..86ebea3c --- /dev/null +++ b/requirements.d/coala.txt @@ -0,0 +1,5 @@ +# style and other checks for many languages. +# some bears (checkers) have additional requirements. +coala +coala-bears + diff --git a/requirements.d/development.txt b/requirements.d/development.txt index a0cb3c2a..88535435 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -1,6 +1,10 @@ -virtualenv<14.0 +setuptools +setuptools_scm +pip +virtualenv tox pytest +pytest-xdist pytest-cov pytest-benchmark -Cython +Cython!=0.27 diff --git a/requirements.d/docs.txt b/requirements.d/docs.txt new file mode 100644 index 00000000..b63e8185 --- /dev/null +++ b/requirements.d/docs.txt @@ -0,0 +1,2 @@ +sphinx +guzzle_sphinx_theme diff --git a/requirements.d/fuse.txt b/requirements.d/fuse.txt index be35d2ae..0df0f338 100644 --- a/requirements.d/fuse.txt +++ b/requirements.d/fuse.txt @@ -1,4 +1,4 @@ # low-level FUSE support library for "borg mount" -# see comments setup.py about this version requirement. +# please see the comments in setup.py about llfuse. llfuse<2.0 diff --git a/scripts/borg.exe.spec b/scripts/borg.exe.spec new file mode 100644 index 00000000..ea86a91d --- /dev/null +++ b/scripts/borg.exe.spec @@ -0,0 +1,55 @@ +# -*- mode: python -*- +# this pyinstaller spec file is used to build borg binaries on posix platforms + +import os, sys + +basepath = '/vagrant/borg/borg' + +block_cipher = None + +a = Analysis([os.path.join(basepath, 'src/borg/__main__.py'), ], + pathex=[basepath, ], + binaries=[], + datas=[ + ('../src/borg/paperkey.html', 'borg'), + ], + hiddenimports=['borg.platform.posix'], + hookspath=[], + runtime_hooks=[], + excludes=[ + '_ssl', 'ssl', + ], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher) + +if sys.platform == 'darwin': + # do not bundle the osxfuse libraries, so we do not get a version + # mismatch to the installed kernel driver of osxfuse. + a.binaries = [b for b in a.binaries if 'libosxfuse' not in b[0]] + +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) + +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + name='borg.exe', + debug=False, + strip=False, + upx=True, + console=True ) + +if False: + # Enable this block to build a directory-based binary instead of + # a packed single file. This allows to easily look at all included + # files (e.g. without having to strace or halt the built binary + # and introspect /tmp). + coll = COLLECT(exe, + a.binaries, + a.zipfiles, + a.datas, + strip=False, + upx=True, + name='borg-dir') diff --git a/scripts/errorlist.py b/scripts/errorlist.py new file mode 100755 index 00000000..bd33faf4 --- /dev/null +++ b/scripts/errorlist.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +from textwrap import indent + +import borg.archiver +from borg.helpers import Error, ErrorWithTraceback + +classes = Error.__subclasses__() + ErrorWithTraceback.__subclasses__() + +for cls in sorted(classes, key=lambda cls: (cls.__module__, cls.__qualname__)): + if cls is ErrorWithTraceback: + continue + print(' ', cls.__qualname__) + print(indent(cls.__doc__, ' ' * 8)) diff --git a/scripts/fuzz-cache-sync/HOWTO b/scripts/fuzz-cache-sync/HOWTO new file mode 100644 index 00000000..ae144b28 --- /dev/null +++ b/scripts/fuzz-cache-sync/HOWTO @@ -0,0 +1,10 @@ +- Install AFL and the requirements for LLVM mode (see docs) +- Compile the fuzzing target, e.g. + + AFL_HARDEN=1 afl-clang-fast main.c -o fuzz-target -O3 + + (other options, like using ASan or MSan are possible as well) +- Add additional test cases to testcase_dir +- Run afl, easiest (but inefficient) way; + + afl-fuzz -i testcase_dir -o findings_dir ./fuzz-target diff --git a/scripts/fuzz-cache-sync/main.c b/scripts/fuzz-cache-sync/main.c new file mode 100644 index 00000000..c65dd272 --- /dev/null +++ b/scripts/fuzz-cache-sync/main.c @@ -0,0 +1,33 @@ + +#define BORG_NO_PYTHON + +#include "../../src/borg/_hashindex.c" +#include "../../src/borg/cache_sync/cache_sync.c" + +#define BUFSZ 32768 + +int main() { + char buf[BUFSZ]; + int len, ret; + CacheSyncCtx *ctx; + HashIndex *idx; + + /* capacity, key size, value size */ + idx = hashindex_init(0, 32, 12); + ctx = cache_sync_init(idx); + + while (1) { + len = read(0, buf, BUFSZ); + if (!len) { + break; + } + ret = cache_sync_feed(ctx, buf, len); + if(!ret && cache_sync_error(ctx)) { + fprintf(stderr, "error: %s\n", cache_sync_error(ctx)); + return 1; + } + } + hashindex_free(idx); + cache_sync_free(ctx); + return 0; +} diff --git a/scripts/fuzz-cache-sync/testcase_dir/test_simple b/scripts/fuzz-cache-sync/testcase_dir/test_simple new file mode 100644 index 00000000..0bf5a0ea Binary files /dev/null and b/scripts/fuzz-cache-sync/testcase_dir/test_simple differ diff --git a/scripts/glibc_check.py b/scripts/glibc_check.py old mode 100644 new mode 100755 index a400bbd1..02be4aac --- a/scripts/glibc_check.py +++ b/scripts/glibc_check.py @@ -2,7 +2,9 @@ """ Check if all given binaries work with the given glibc version. -check_glibc.py 2.11 bin [bin ...] +glibc_check.py 2.11 bin [bin ...] + +rc = 0 means "yes", rc = 1 means "no". """ import re diff --git a/scripts/py36-blake2.py b/scripts/py36-blake2.py new file mode 100644 index 00000000..758a4f34 --- /dev/null +++ b/scripts/py36-blake2.py @@ -0,0 +1,36 @@ + +""" +This script checks compatibility of crypto.blake2b_256 against hashlib.blake2b in CPython 3.6. +""" + +import hashlib +import sys + + +def test_b2(b2_input, b2_output): + digest = hashlib.blake2b(b2_input, digest_size=32).digest() + identical = b2_output == digest + + print('Input: ', b2_input.hex()) + print('Expected: ', b2_output.hex()) + print('Calculated:', digest.hex()) + print('Identical: ', identical) + print() + if not identical: + sys.exit(1) + + +test_b2( + bytes.fromhex('037fb9b75b20d623f1d5a568050fccde4a1b7c5f5047432925e941a17c7a2d0d7061796c6f6164'), + bytes.fromhex('a22d4fc81bb61c3846c334a09eaf28d22dd7df08c9a7a41e713ef28d80eebd45') +) + +test_b2( + b'abc', + bytes.fromhex('bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319') +) + +test_b2( + bytes.fromhex('e944973af2256d4d670c12dd75304c319f58f4e40df6fb18ef996cb47e063676') + b'1234567890' * 100, + bytes.fromhex('97ede832378531dd0f4c668685d166e797da27b47d8cd441e885b60abd5e0cb2'), +) diff --git a/setup.cfg b/setup.cfg index 6f408a95..8e9414ac 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,11 +1,11 @@ -[pytest] +[tool:pytest] python_files = testsuite/*.py [flake8] # please note that the values are adjusted so that they do not cause failures # with existing code. if you want to change them, you should first fix all # flake8 failures that appear with your change. -ignore = E122,E123,E125,E126,E127,E128,E226,E402,F401,F405,F811 +ignore = E122,E123,E125,E126,E127,E128,E226,E402,E722,E741,F401,F405,F811 # line length long term target: 120 max-line-length = 255 exclude = build,dist,.git,.idea,.cache,.tox,docs/conf.py diff --git a/setup.py b/setup.py index 1b578459..81d80fe2 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,11 @@ # -*- encoding: utf-8 *-* import os +import io import re import sys import subprocess +from collections import OrderedDict +from datetime import datetime from glob import glob from distutils.command.build import build @@ -10,7 +13,7 @@ from distutils.core import Command import textwrap -min_python = (3, 4) +min_python = (3, 5) my_python = sys.version_info if my_python < min_python: @@ -20,16 +23,29 @@ if my_python < min_python: # Are we building on ReadTheDocs? on_rtd = os.environ.get('READTHEDOCS') -# msgpack pure python data corruption was fixed in 0.4.6. -# Also, we might use some rather recent API features. -install_requires = ['msgpack-python>=0.4.6', ] +if sys.platform != 'win32': + install_requires = [ + # msgpack pure python data corruption was fixed in 0.4.6. + # Also, we might use some rather recent API features. + 'msgpack-python>=0.4.6', + 'pyzmq', + ] +else: + install_requires = ['msgpack-python>=0.4.6'] +# note for package maintainers: if you package borgbackup for distribution, +# please add llfuse as a *requirement* on all platforms that have a working +# llfuse package. "borg mount" needs llfuse to work. +# if you do not have llfuse, do not require it, most of borgbackup will work. extras_require = { # llfuse 0.40 (tested, proven, ok), needs FUSE version >= 2.8.0 # llfuse 0.41 (tested shortly, looks ok), needs FUSE version >= 2.8.0 # llfuse 0.41.1 (tested shortly, looks ok), needs FUSE version >= 2.8.0 # llfuse 0.42 (tested shortly, looks ok), needs FUSE version >= 2.8.0 # llfuse 1.0 (tested shortly, looks ok), needs FUSE version >= 2.8.0 + # llfuse 1.1.1 (tested shortly, looks ok), needs FUSE version >= 2.8.0 + # llfuse 1.2 (tested shortly, looks ok), needs FUSE version >= 2.8.0 + # llfuse 1.3 (tested shortly, looks ok), needs FUSE version >= 2.8.0 # llfuse 2.0 will break API 'fuse': ['llfuse<2.0', ], } @@ -41,11 +57,15 @@ if sys.platform.startswith('freebsd'): from setuptools import setup, find_packages, Extension from setuptools.command.sdist import sdist +from distutils.command.clean import clean compress_source = 'src/borg/compress.pyx' -crypto_source = 'src/borg/crypto.pyx' +crypto_ll_source = 'src/borg/crypto/low_level.pyx' +crypto_helpers = 'src/borg/crypto/_crypto_helpers.c' chunker_source = 'src/borg/chunker.pyx' hashindex_source = 'src/borg/hashindex.pyx' +item_source = 'src/borg/item.pyx' +checksums_source = 'src/borg/algorithms/checksums.pyx' platform_posix_source = 'src/borg/platform/posix.pyx' platform_linux_source = 'src/borg/platform/linux.pyx' platform_darwin_source = 'src/borg/platform/darwin.pyx' @@ -54,9 +74,11 @@ platform_windows_source = 'src/borg/platform/windows.pyx' cython_sources = [ compress_source, - crypto_source, + crypto_ll_source, chunker_source, hashindex_source, + item_source, + checksums_source, platform_posix_source, platform_linux_source, @@ -78,10 +100,15 @@ try: def make_distribution(self): self.filelist.extend([ 'src/borg/compress.c', - 'src/borg/crypto.c', + 'src/borg/crypto/low_level.c', 'src/borg/chunker.c', 'src/borg/_chunker.c', 'src/borg/hashindex.c', 'src/borg/_hashindex.c', - + 'src/borg/cache_sync/cache_sync.c', 'src/borg/cache_sync/sysdep.h', 'src/borg/cache_sync/unpack.h', + 'src/borg/cache_sync/unpack_define.h', 'src/borg/cache_sync/unpack_template.h', + 'src/borg/item.c', + 'src/borg/algorithms/checksums.c', + 'src/borg/algorithms/crc32_dispatch.c', 'src/borg/algorithms/crc32_clmul.c', 'src/borg/algorithms/crc32_slice_by_8.c', + 'src/borg/algorithms/xxh64/xxhash.h', 'src/borg/algorithms/xxh64/xxhash.c', 'src/borg/platform/posix.c', 'src/borg/platform/linux.c', 'src/borg/platform/freebsd.c', @@ -96,9 +123,11 @@ except ImportError: raise Exception('Cython is required to run sdist') compress_source = compress_source.replace('.pyx', '.c') - crypto_source = crypto_source.replace('.pyx', '.c') + crypto_ll_source = crypto_ll_source.replace('.pyx', '.c') chunker_source = chunker_source.replace('.pyx', '.c') hashindex_source = hashindex_source.replace('.pyx', '.c') + item_source = item_source.replace('.pyx', '.c') + checksums_source = checksums_source.replace('.pyx', '.c') platform_posix_source = platform_posix_source.replace('.pyx', '.c') platform_linux_source = platform_linux_source.replace('.pyx', '.c') platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c') @@ -106,8 +135,9 @@ except ImportError: platform_windows_source = platform_windows_source.replace('.pyx', '.c') from distutils.command.build_ext import build_ext if not on_rtd and not all(os.path.exists(path) for path in [ - compress_source, crypto_source, chunker_source, hashindex_source, - platform_linux_source, platform_freebsd_source, platform_darwin_source, platform_windows_source]): + compress_source, crypto_ll_source, chunker_source, hashindex_source, item_source, checksums_source, + platform_posix_source, platform_linux_source, platform_freebsd_source, platform_darwin_source, + platform_windows_source]): raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version.') @@ -129,8 +159,19 @@ def detect_lz4(prefixes): return prefix +def detect_libb2(prefixes): + for prefix in prefixes: + filename = os.path.join(prefix, 'include', 'blake2.h') + if os.path.exists(filename): + with open(filename, 'r') as fd: + if 'blake2b_init' in fd.read(): + return prefix + + include_dirs = [] library_dirs = [] +define_macros = [] +crypto_libraries = ['crypto'] windowsIncludeDirs = [] if sys.platform == 'win32': @@ -173,9 +214,38 @@ if lz4_prefix: elif not on_rtd: raise Exception('Unable to find LZ4 headers. (Looked here: {})'.format(', '.join(possible_lz4_prefixes))) +possible_libb2_prefixes = ['/usr', '/usr/local', '/usr/local/opt/libb2', '/usr/local/libb2', + '/usr/local/borg', '/opt/local', '/opt/pkg', ] +if os.environ.get('BORG_LIBB2_PREFIX'): + possible_libb2_prefixes.insert(0, os.environ.get('BORG_LIBB2_PREFIX')) +libb2_prefix = detect_libb2(possible_libb2_prefixes) +if libb2_prefix: + print('Detected and preferring libb2 over bundled BLAKE2') + include_dirs.append(os.path.join(libb2_prefix, 'include')) + library_dirs.append(os.path.join(libb2_prefix, 'lib')) + crypto_libraries.append('b2') + define_macros.append(('BORG_USE_LIBB2', 'YES')) + with open('README.rst', 'r') as fd: long_description = fd.read() + # remove badges + long_description = re.compile(r'^\.\. start-badges.*^\.\. end-badges', re.M | re.S).sub('', long_description) + # remove |substitutions| + long_description = re.compile(r'\|screencast\|').sub('', long_description) + # remove unknown directives + long_description = re.compile(r'^\.\. highlight:: \w+$', re.M).sub('', long_description) + + +def format_metavar(option): + if option.nargs in ('*', '...'): + return '[%s...]' % option.metavar + elif option.nargs == '?': + return '[%s]' % option.metavar + elif option.nargs is None: + return option.metavar + else: + raise ValueError('Can\'t format metavar %s, unknown nargs %s!' % (option.metavar, option.nargs)) class build_usage(Command): @@ -193,19 +263,38 @@ class build_usage(Command): def run(self): print('generating usage docs') - # allows us to build docs without the C modules fully loaded during help generation - from borg.archiver import Archiver - parser = Archiver(prog='borg').parser - choices = {} - for action in parser._actions: - if action.choices is not None: - choices.update(action.choices) - print('found commands: %s' % list(choices.keys())) + import borg + borg.doc_mode = 'build_man' if not os.path.exists('docs/usage'): os.mkdir('docs/usage') - for command, parser in choices.items(): + # allows us to build docs without the C modules fully loaded during help generation + from borg.archiver import Archiver + parser = Archiver(prog='borg').build_parser() + + self.generate_level("", parser, Archiver) + + def generate_level(self, prefix, parser, Archiver): + is_subcommand = False + choices = {} + for action in parser._actions: + if action.choices is not None and 'SubParsersAction' in str(action.__class__): + is_subcommand = True + for cmd, parser in action.choices.items(): + choices[prefix + cmd] = parser + if prefix and not choices: + return + print('found commands: %s' % list(choices.keys())) + + for command, parser in sorted(choices.items()): + if command.startswith('debug'): + print('skipping', command) + continue print('generating help for %s' % command) - with open('docs/usage/%s.rst.inc' % command, 'w') as doc: + + if self.generate_level(command + " ", parser, Archiver): + continue + + with open('docs/usage/%s.rst.inc' % command.replace(" ", "_"), 'w') as doc: doc.write(".. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!\n\n") if command == 'help': for topic in Archiver.helptext: @@ -216,45 +305,153 @@ class build_usage(Command): doc.write(Archiver.helptext[topic]) else: params = {"command": command, + "command_": command.replace(' ', '_'), "underline": '-' * len('borg ' + command)} - doc.write(".. _borg_{command}:\n\n".format(**params)) - doc.write("borg {command}\n{underline}\n::\n\n borg {command}".format(**params)) + doc.write(".. _borg_{command_}:\n\n".format(**params)) + doc.write("borg {command}\n{underline}\n.. code-block:: none\n\n borg [common options] {command}".format(**params)) self.write_usage(parser, doc) epilog = parser.epilog parser.epilog = None self.write_options(parser, doc) doc.write("\n\nDescription\n~~~~~~~~~~~\n") doc.write(epilog) - common_options = [group for group in choices['create']._action_groups if group.title == 'Common options'][0] - with open('docs/usage/common-options.rst.inc', 'w') as doc: - self.write_options_group(common_options, doc, False) + + if 'create' in choices: + common_options = [group for group in choices['create']._action_groups if group.title == 'Common options'][0] + with open('docs/usage/common-options.rst.inc', 'w') as doc: + self.write_options_group(common_options, doc, False, base_indent=0) + + return is_subcommand def write_usage(self, parser, fp): if any(len(o.option_strings) for o in parser._actions): - fp.write(' ') + fp.write(' [options]') for option in parser._actions: if option.option_strings: continue - fp.write(' ' + option.metavar) + fp.write(' ' + format_metavar(option)) + fp.write('\n\n') def write_options(self, parser, fp): - for group in parser._action_groups: - if group.title == 'Common options': - fp.write('\n\n`Common options`_\n') - fp.write(' |') - else: - self.write_options_group(group, fp) - - def write_options_group(self, group, fp, with_title=True): def is_positional_group(group): return any(not o.option_strings for o in group._group_actions) - def get_help(option): - text = textwrap.dedent((option.help or '') % option.__dict__) - return '\n'.join('| ' + line for line in text.splitlines()) + # HTML output: + # A table using some column-spans - def shipout(text): - fp.write(textwrap.indent('\n'.join(text), ' ' * 4)) + def html_write(s): + for line in s.splitlines(): + fp.write(' ' + line + '\n') + + rows = [] + for group in parser._action_groups: + if group.title == 'Common options': + # (no of columns used, columns, ...) + rows.append((1, '.. class:: borg-common-opt-ref\n\n:ref:`common_options`')) + else: + if not group._group_actions: + continue + group_header = '**%s**' % group.title + if group.description: + group_header += ' — ' + group.description + rows.append((1, group_header)) + if is_positional_group(group): + for option in group._group_actions: + rows.append((3, '', '``%s``' % option.metavar, option.help or '')) + else: + for option in group._group_actions: + if option.metavar: + option_fmt = '``%s ' + option.metavar + '``' + else: + option_fmt = '``%s``' + option_str = ', '.join(option_fmt % s for s in option.option_strings) + option_desc = textwrap.dedent((option.help or '') % option.__dict__) + rows.append((3, '', option_str, option_desc)) + + fp.write('.. only:: html\n\n') + table = io.StringIO() + table.write('.. class:: borg-options-table\n\n') + self.rows_to_table(rows, table.write) + fp.write(textwrap.indent(table.getvalue(), ' ' * 4)) + + # LaTeX output: + # Regular rST option lists (irregular column widths) + latex_options = io.StringIO() + for group in parser._action_groups: + if group.title == 'Common options': + latex_options.write('\n\n:ref:`common_options`\n') + latex_options.write(' |') + else: + self.write_options_group(group, latex_options) + fp.write('\n.. only:: latex\n\n') + fp.write(textwrap.indent(latex_options.getvalue(), ' ' * 4)) + + def rows_to_table(self, rows, write): + def write_row_separator(): + write('+') + for column_width in column_widths: + write('-' * (column_width + 1)) + write('+') + write('\n') + + # Find column count and width + column_count = max(columns for columns, *_ in rows) + column_widths = [0] * column_count + for columns, *cells in rows: + for i in range(columns): + # "+ 1" because we want a space between the cell contents and the delimiting "|" in the output + column_widths[i] = max(column_widths[i], len(cells[i]) + 1) + + for columns, *original_cells in rows: + write_row_separator() + # If a cell contains newlines, then the row must be split up in individual rows + # where each cell contains no newline. + rowspanning_cells = [] + original_cells = list(original_cells) + while any('\n' in cell for cell in original_cells): + cell_bloc = [] + for i, cell in enumerate(original_cells): + pre, _, original_cells[i] = cell.partition('\n') + cell_bloc.append(pre) + rowspanning_cells.append(cell_bloc) + rowspanning_cells.append(original_cells) + for cells in rowspanning_cells: + for i, column_width in enumerate(column_widths): + if i < columns: + write('| ') + write(cells[i].ljust(column_width)) + else: + write(' ') + write(''.ljust(column_width)) + write('|\n') + + write_row_separator() + # This bit of JavaScript kills the that is invariably inserted by docutils, + # but does absolutely no good here. It sets bogus column widths which cannot be overridden + # with CSS alone. + # Since this is HTML-only output, it would be possible to just generate a directly, + # but then we'd lose rST formatting. + write(textwrap.dedent(""" + .. raw:: html + + + """)) + + def write_options_group(self, group, fp, with_title=True, base_indent=4): + def is_positional_group(group): + return any(not o.option_strings for o in group._group_actions) + + indent = ' ' * base_indent + + if is_positional_group(group): + for option in group._group_actions: + fp.write(option.metavar + '\n') + fp.write(textwrap.indent(option.help or '', ' ' * base_indent) + '\n') + return if not group._group_actions: return @@ -262,36 +459,65 @@ class build_usage(Command): if with_title: fp.write('\n\n') fp.write(group.title + '\n') - text = [] - if is_positional_group(group): - for option in group._group_actions: - text.append(option.metavar) - text.append(textwrap.indent(option.help or '', ' ' * 4)) - shipout(text) - return + opts = OrderedDict() - options = [] for option in group._group_actions: if option.metavar: - option_fmt = '``%%s %s``' % option.metavar + option_fmt = '%s ' + option.metavar else: - option_fmt = '``%s``' + option_fmt = '%s' option_str = ', '.join(option_fmt % s for s in option.option_strings) - options.append((option_str, option)) - for option_str, option in options: - help = textwrap.indent(get_help(option), ' ' * 4) - text.append(option_str) - text.append(help) - shipout(text) + option_desc = textwrap.dedent((option.help or '') % option.__dict__) + opts[option_str] = textwrap.indent(option_desc, ' ' * 4) + + padding = len(max(opts)) + 1 + + for option, desc in opts.items(): + fp.write(indent + option.ljust(padding) + desc + '\n') -class build_api(Command): - description = "generate a basic api.rst file based on the modules available" +class build_man(Command): + description = 'build man pages' - user_options = [ - ('output=', 'O', 'output directory'), - ] + user_options = [] + + see_also = { + 'create': ('delete', 'prune', 'check', 'patterns', 'placeholders', 'compression'), + 'recreate': ('patterns', 'placeholders', 'compression'), + 'list': ('info', 'diff', 'prune', 'patterns'), + 'info': ('list', 'diff'), + 'init': ('create', 'delete', 'check', 'list', 'key-import', 'key-export', 'key-change-passphrase'), + 'key-import': ('key-export', ), + 'key-export': ('key-import', ), + 'mount': ('umount', 'extract'), # Would be cooler if these two were on the same page + 'umount': ('mount', ), + 'extract': ('mount', ), + } + + rst_prelude = textwrap.dedent(""" + .. role:: ref(title) + + .. |project_name| replace:: Borg + + """) + + usage_group = { + 'break-lock': 'lock', + 'with-lock': 'lock', + + 'change-passphrase': 'key', + 'key_change-passphrase': 'key', + 'key_export': 'key', + 'key_import': 'key', + 'key_migrate-to-repokey': 'key', + + 'export-tar': 'tar', + + 'benchmark_crud': 'benchmark', + + 'umount': 'mount', + } def initialize_options(self): pass @@ -300,39 +526,260 @@ class build_api(Command): pass def run(self): - print("auto-generating API documentation") - with open("docs/api.rst", "w") as doc: - doc.write(""" -API Documentation -================= -""") - for mod in glob('src/borg/*.py') + glob('src/borg/*.pyx'): - print("examining module %s" % mod) - mod = mod.replace('.pyx', '').replace('.py', '').replace('/', '.') - if "._" not in mod: - doc.write(""" -.. automodule:: %s - :members: - :undoc-members: -""" % mod) + print('building man pages (in docs/man)', file=sys.stderr) + import borg + borg.doc_mode = 'build_man' + os.makedirs('docs/man', exist_ok=True) + # allows us to build docs without the C modules fully loaded during help generation + from borg.archiver import Archiver + parser = Archiver(prog='borg').build_parser() + + self.generate_level('', parser, Archiver) + self.build_topic_pages(Archiver) + self.build_intro_page() + + def generate_level(self, prefix, parser, Archiver): + is_subcommand = False + choices = {} + for action in parser._actions: + if action.choices is not None and 'SubParsersAction' in str(action.__class__): + is_subcommand = True + for cmd, parser in action.choices.items(): + choices[prefix + cmd] = parser + if prefix and not choices: + return + + for command, parser in sorted(choices.items()): + if command.startswith('debug') or command == 'help': + continue + + man_title = 'borg-' + command.replace(' ', '-') + print('building man page', man_title + '(1)', file=sys.stderr) + + is_intermediary = self.generate_level(command + ' ', parser, Archiver) + + doc, write = self.new_doc() + self.write_man_header(write, man_title, parser.description) + + self.write_heading(write, 'SYNOPSIS') + if is_intermediary: + subparsers = [action for action in parser._actions if 'SubParsersAction' in str(action.__class__)][0] + for subcommand in subparsers.choices: + write('| borg', '[common options]', command, subcommand, '...') + self.see_also.setdefault(command, []).append('%s-%s' % (command, subcommand)) + else: + write('borg', '[common options]', command, end='') + self.write_usage(write, parser) + write('\n') + + description, _, notes = parser.epilog.partition('\n.. man NOTES') + + if description: + self.write_heading(write, 'DESCRIPTION') + write(description) + + if not is_intermediary: + self.write_heading(write, 'OPTIONS') + write('See `borg-common(1)` for common options of Borg commands.') + write() + self.write_options(write, parser) + + self.write_examples(write, command) + + if notes: + self.write_heading(write, 'NOTES') + write(notes) + + self.write_see_also(write, man_title) + + self.gen_man_page(man_title, doc.getvalue()) + + # Generate the borg-common(1) man page with the common options. + if 'create' in choices: + doc, write = self.new_doc() + man_title = 'borg-common' + self.write_man_header(write, man_title, 'Common options of Borg commands') + + common_options = [group for group in choices['create']._action_groups if group.title == 'Common options'][0] + + self.write_heading(write, 'SYNOPSIS') + self.write_options_group(write, common_options) + self.write_see_also(write, man_title) + self.gen_man_page(man_title, doc.getvalue()) + + return is_subcommand + + def build_topic_pages(self, Archiver): + for topic, text in Archiver.helptext.items(): + doc, write = self.new_doc() + man_title = 'borg-' + topic + print('building man page', man_title + '(1)', file=sys.stderr) + + self.write_man_header(write, man_title, 'Details regarding ' + topic) + self.write_heading(write, 'DESCRIPTION') + write(text) + self.gen_man_page(man_title, doc.getvalue()) + + def build_intro_page(self): + print('building man page borg(1)', file=sys.stderr) + with open('docs/man_intro.rst') as fd: + man_intro = fd.read() + self.gen_man_page('borg', self.rst_prelude + man_intro) + + def new_doc(self): + doc = io.StringIO(self.rst_prelude) + doc.read() + write = self.printer(doc) + return doc, write + + def printer(self, fd): + def write(*args, **kwargs): + print(*args, file=fd, **kwargs) + return write + + def write_heading(self, write, header, char='-', double_sided=False): + write() + if double_sided: + write(char * len(header)) + write(header) + write(char * len(header)) + write() + + def write_man_header(self, write, title, description): + self.write_heading(write, title, '=', double_sided=True) + self.write_heading(write, description, double_sided=True) + # man page metadata + write(':Author: The Borg Collective') + write(':Date:', datetime.utcnow().date().isoformat()) + write(':Manual section: 1') + write(':Manual group: borg backup tool') + write() + + def write_examples(self, write, command): + command = command.replace(' ', '_') + with open('docs/usage/%s.rst' % self.usage_group.get(command, command)) as fd: + usage = fd.read() + usage_include = '.. include:: %s.rst.inc' % command + begin = usage.find(usage_include) + end = usage.find('.. include', begin + 1) + # If a command has a dedicated anchor, it will occur before the command's include. + if 0 < usage.find('.. _', begin + 1) < end: + end = usage.find('.. _', begin + 1) + examples = usage[begin:end] + examples = examples.replace(usage_include, '') + examples = examples.replace('Examples\n~~~~~~~~', '') + examples = examples.replace('Miscellaneous Help\n------------------', '') + examples = re.sub('^(~+)$', lambda matches: '+' * len(matches.group(0)), examples, flags=re.MULTILINE) + examples = examples.strip() + if examples: + self.write_heading(write, 'EXAMPLES', '-') + write(examples) + + def write_see_also(self, write, man_title): + see_also = self.see_also.get(man_title.replace('borg-', ''), ()) + see_also = ['`borg-%s(1)`' % s for s in see_also] + see_also.insert(0, '`borg-common(1)`') + self.write_heading(write, 'SEE ALSO') + write(', '.join(see_also)) + + def gen_man_page(self, name, rst): + from docutils.writers import manpage + from docutils.core import publish_string + from docutils.nodes import inline + from docutils.parsers.rst import roles + + def issue(name, rawtext, text, lineno, inliner, options={}, content=[]): + return [inline(rawtext, '#' + text)], [] + + roles.register_local_role('issue', issue) + # We give the source_path so that docutils can find relative includes + # as-if the document where located in the docs/ directory. + man_page = publish_string(source=rst, source_path='docs/virtmanpage.rst', writer=manpage.Writer()) + with open('docs/man/%s.1' % name, 'wb') as fd: + fd.write(man_page) + + def write_usage(self, write, parser): + if any(len(o.option_strings) for o in parser._actions): + write(' [options] ', end='') + for option in parser._actions: + if option.option_strings: + continue + write(format_metavar(option), end=' ') + + def write_options(self, write, parser): + for group in parser._action_groups: + if group.title == 'Common options' or not group._group_actions: + continue + title = 'arguments' if group.title == 'positional arguments' else group.title + self.write_heading(write, title, '+') + self.write_options_group(write, group) + + def write_options_group(self, write, group): + def is_positional_group(group): + return any(not o.option_strings for o in group._group_actions) + + if is_positional_group(group): + for option in group._group_actions: + write(option.metavar) + write(textwrap.indent(option.help or '', ' ' * 4)) + return + + opts = OrderedDict() + + for option in group._group_actions: + if option.metavar: + option_fmt = '%s ' + option.metavar + else: + option_fmt = '%s' + option_str = ', '.join(option_fmt % s for s in option.option_strings) + option_desc = textwrap.dedent((option.help or '') % option.__dict__) + opts[option_str] = textwrap.indent(option_desc, ' ' * 4) + + padding = len(max(opts)) + 1 + + for option, desc in opts.items(): + write(option.ljust(padding), desc) + + +def rm(file): + try: + os.unlink(file) + print('rm', file) + except FileNotFoundError: + pass + + +class Clean(clean): + def run(self): + super().run() + for source in cython_sources: + genc = source.replace('.pyx', '.c') + rm(genc) + compiled_glob = source.replace('.pyx', '.cpython*') + for compiled in sorted(glob(compiled_glob)): + rm(compiled) cmdclass = { 'build_ext': build_ext, - 'build_api': build_api, 'build_usage': build_usage, - 'sdist': Sdist + 'build_man': build_man, + 'sdist': Sdist, + 'clean': Clean, } ext_modules = [] if not on_rtd: ext_modules += [ - Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs), - Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), + Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros), + Extension('borg.crypto.low_level', [crypto_ll_source, crypto_helpers], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros), + Extension('borg.hashindex', [hashindex_source]), + Extension('borg.item', [item_source]), Extension('borg.chunker', [chunker_source]), - Extension('borg.hashindex', [hashindex_source]) + Extension('borg.algorithms.checksums', [checksums_source]), + ] - if sys.platform.startswith(('linux', 'freebsd', 'darwin')): + if not sys.platform.startswith(('win32', )): ext_modules.append(Extension('borg.platform.posix', [platform_posix_source])) if sys.platform == 'linux': @@ -347,10 +794,11 @@ if not on_rtd: def parse(root, describe_command=None): file = open('src/borg/_version.py', 'w') - output = subprocess.check_output("git describe --tags --long").decode().strip() + output = subprocess.check_output("git describe --long").decode().strip() file.write('version = "' + output + '"\n') return output + parse_function = parse if sys.platform == 'win32' else None setup( @@ -367,7 +815,7 @@ setup( license='BSD', platforms=['Linux', 'MacOS X', 'FreeBSD', 'OpenBSD', 'NetBSD', ], classifiers=[ - 'Development Status :: 4 - Beta', + 'Development Status :: 2 - Pre-Alpha', 'Environment :: Console', 'Intended Audience :: System Administrators', 'License :: OSI Approved :: BSD License', @@ -378,14 +826,13 @@ setup( 'Operating System :: POSIX :: Linux', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', 'Topic :: Security :: Cryptography', 'Topic :: System :: Archiving :: Backup', ], packages=find_packages('src'), package_dir={'': 'src'}, - include_package_data=True, zip_safe=False, entry_points={ 'console_scripts': [ @@ -393,6 +840,11 @@ setup( 'borgfs = borg.archiver:main', ] }, + include_package_data=True, + package_data={ + 'borg': ['paperkey.html'], + 'borg.testsuite': ['attic.tar.gz'], + }, cmdclass=cmdclass, ext_modules=ext_modules, setup_requires=['setuptools_scm>=1.7'], diff --git a/src/borg/__init__.py b/src/borg/__init__.py index e292841a..c2b20186 100644 --- a/src/borg/__init__.py +++ b/src/borg/__init__.py @@ -1,3 +1,20 @@ -# This is a python package +from distutils.version import LooseVersion +# IMPORTANT keep imports from borg here to a minimum because our testsuite depends on +# beeing able to import borg.constants and then monkey patching borg.constants.PBKDF2_ITERATIONS from ._version import version as __version__ + + +__version_tuple__ = tuple(LooseVersion(__version__).version[:3]) + +# assert that all semver components are integers +# this is mainly to show errors when people repackage poorly +# and setuptools_scm determines a 0.1.dev... version +assert all(isinstance(v, int) for v in __version_tuple__), \ + """\ +broken borgbackup version metadata: %r + +version metadata is obtained dynamically on installation via setuptools_scm, +please ensure your git repo has the correct tags or you provide the version +using SETUPTOOLS_SCM_PRETEND_VERSION in your build script. +""" % __version__ diff --git a/src/borg/_chunker.c b/src/borg/_chunker.c index f9f598bb..9b7e7137 100644 --- a/src/borg/_chunker.c +++ b/src/borg/_chunker.c @@ -63,7 +63,7 @@ static uint32_t table_base[] = 0xc5ae37bb, 0xa76ce12a, 0x8150d8f3, 0x2ec29218, 0xa35f0984, 0x48c0647e, 0x0b5ff98c, 0x71893f7b }; -#define BARREL_SHIFT(v, shift) ( ((v) << shift) | ((v) >> (32 - shift)) ) +#define BARREL_SHIFT(v, shift) ( ((v) << shift) | ((v) >> ((32 - shift) & 0x1f)) ) size_t pagemask; @@ -157,6 +157,8 @@ chunker_fill(Chunker *c) off_t offset, length; int overshoot; PyObject *data; + PyThreadState *thread_state; + memmove(c->data, c->data + c->last, c->position + c->remaining - c->last); c->position -= c->last; c->last = 0; @@ -165,6 +167,8 @@ chunker_fill(Chunker *c) return 1; } if(c->fh >= 0) { + thread_state = PyEval_SaveThread(); + offset = c->bytes_read; // if we have a os-level file descriptor, use os-level API n = read(c->fh, c->data + c->position + c->remaining, n); @@ -177,6 +181,7 @@ chunker_fill(Chunker *c) c->eof = 1; } else { + PyEval_RestoreThread(thread_state); // some error happened PyErr_SetFromErrno(PyExc_OSError); return 0; @@ -211,6 +216,8 @@ chunker_fill(Chunker *c) posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED); #endif + + PyEval_RestoreThread(thread_state); } else { // no os-level file descriptor, use Python file object API diff --git a/src/borg/_endian.h b/src/borg/_endian.h new file mode 100644 index 00000000..645e4078 --- /dev/null +++ b/src/borg/_endian.h @@ -0,0 +1,27 @@ +#include +#include +#include + +#if defined (__SVR4) && defined (__sun) +#include +#endif + +#if (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \ + (defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) || \ + (defined(_BIG_ENDIAN) && defined(__SVR4) && defined(__sun)) +#define BORG_BIG_ENDIAN 1 +#elif (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \ + (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ + (defined(_LITTLE_ENDIAN) && defined(__SVR4) && defined(__sun)) +#define BORG_BIG_ENDIAN 0 +#else +#error Unknown byte order +#endif + +#if BORG_BIG_ENDIAN +#define _le32toh(x) __builtin_bswap32(x) +#define _htole32(x) __builtin_bswap32(x) +#else +#define _le32toh(x) (x) +#define _htole32(x) (x) +#endif diff --git a/src/borg/_hashindex.c b/src/borg/_hashindex.c index b1193a77..53b108cc 100644 --- a/src/borg/_hashindex.c +++ b/src/borg/_hashindex.c @@ -1,4 +1,6 @@ + #include +#include #include #include #include @@ -7,36 +9,7 @@ #include #include #include - -#if defined (__SVR4) && defined (__sun) -#include -#endif -#if (defined(_BIG_ENDIAN)&&defined(__SVR4)&&defined(__sun)) -#define BIG_ENDIAN_DETECTED -#endif - -#if (defined(__MINGW32__) && defined(_WIN32)) || \ - (defined(_LITTLE_ENDIAN)&&defined(__SVR4)&&defined(__sun)) -#define LITTLE_ENDIAN_DETECTED -#endif // __MINGW32__ - -#if !defined(BIG_ENDIAN_DETECTED) && !defined(LITTLE_ENDIAN_DETECTED) -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define LITTLE_ENDIAN_DETECTED -#else -#define BIG_ENDIAN_DETECTED -#endif -#endif - -#ifdef BIG_ENDIAN_DETECTED -#define _le32toh(x) __builtin_bswap32(x) -#define _htole32(x) __builtin_bswap32(x) -#elif defined(LITTLE_ENDIAN_DETECTED) -#define _le32toh(x) (x) -#define _htole32(x) (x) -#else -#error Unknown byte order -#endif +#include "_endian.h" #define MAGIC "BORG_IDX" #define MAGIC_LEN 8 @@ -53,11 +26,17 @@ typedef struct { void *buckets; int num_entries; int num_buckets; + int num_empty; int key_size; int value_size; off_t bucket_size; int lower_limit; int upper_limit; + int min_empty; +#ifndef BORG_NO_PYTHON + /* buckets may be backed by a Python buffer. If buckets_buffer.buf is NULL then this is not used. */ + Py_buffer buckets_buffer; +#endif } HashIndex; /* prime (or w/ big prime factors) hash table sizes @@ -83,6 +62,7 @@ static int hash_sizes[] = { #define HASH_MIN_LOAD .25 #define HASH_MAX_LOAD .75 /* don't go higher than 0.75, otherwise performance severely suffers! */ +#define HASH_MAX_EFF_LOAD .93 #define MAX(x, y) ((x) > (y) ? (x): (y)) #define NELEMS(x) (sizeof(x) / sizeof((x)[0])) @@ -105,8 +85,12 @@ static int hash_sizes[] = { #define EPRINTF(msg, ...) fprintf(stderr, "hashindex: " msg "(%s)\n", ##__VA_ARGS__, strerror(errno)) #define EPRINTF_PATH(path, msg, ...) fprintf(stderr, "hashindex: %s: " msg " (%s)\n", path, ##__VA_ARGS__, strerror(errno)) -static HashIndex *hashindex_read(const char *path); -static int hashindex_write(HashIndex *index, const char *path); +#ifndef BORG_NO_PYTHON +static HashIndex *hashindex_read(PyObject *file_py, int permit_compact); +static void hashindex_write(HashIndex *index, PyObject *file_py); +#endif + +static uint64_t hashindex_compact(HashIndex *index); static HashIndex *hashindex_init(int capacity, int key_size, int value_size); static const void *hashindex_get(HashIndex *index, const void *key); static int hashindex_set(HashIndex *index, const void *key, const void *value); @@ -116,6 +100,19 @@ static void *hashindex_next_key(HashIndex *index, const void *key); /* Private API */ static void hashindex_free(HashIndex *index); +static void +hashindex_free_buckets(HashIndex *index) +{ +#ifndef BORG_NO_PYTHON + if(index->buckets_buffer.buf) { + PyBuffer_Release(&index->buckets_buffer); + } else +#endif + { + free(index->buckets); + } +} + static int hashindex_index(HashIndex *index, const void *key) { @@ -123,7 +120,7 @@ hashindex_index(HashIndex *index, const void *key) } static int -hashindex_lookup(HashIndex *index, const void *key) +hashindex_lookup(HashIndex *index, const void *key, int *start_idx) { int didx = -1; int start = hashindex_index(index, key); @@ -131,7 +128,7 @@ hashindex_lookup(HashIndex *index, const void *key) for(;;) { if(BUCKET_IS_EMPTY(index, idx)) { - return -1; + break; } if(BUCKET_IS_DELETED(index, idx)) { if(didx == -1) { @@ -140,17 +137,26 @@ hashindex_lookup(HashIndex *index, const void *key) } else if(BUCKET_MATCHES_KEY(index, idx, key)) { if (didx != -1) { + // note: although lookup is logically a read-only operation, + // we optimize (change) the hashindex here "on the fly". memcpy(BUCKET_ADDR(index, didx), BUCKET_ADDR(index, idx), index->bucket_size); BUCKET_MARK_DELETED(index, idx); idx = didx; } return idx; } - idx = (idx + 1) % index->num_buckets; + idx++; + if (idx >= index->num_buckets) { + idx -= index->num_buckets; + } if(idx == start) { - return -1; + break; } } + if (start_idx != NULL) { + (*start_idx) = (didx == -1) ? idx : didx; + } + return -1; } static int @@ -170,11 +176,13 @@ hashindex_resize(HashIndex *index, int capacity) return 0; } } - free(index->buckets); + hashindex_free_buckets(index); index->buckets = new->buckets; index->num_buckets = new->num_buckets; + index->num_empty = index->num_buckets - index->num_entries; index->lower_limit = new->lower_limit; index->upper_limit = new->upper_limit; + index->min_empty = new->min_empty; free(new); return 1; } @@ -193,6 +201,11 @@ int get_upper_limit(int num_buckets){ return (int)(num_buckets * HASH_MAX_LOAD); } +int get_min_empty(int num_buckets){ + /* Differently from load, the effective load also considers tombstones (deleted buckets). */ + return (int)(num_buckets * (1.0 - HASH_MAX_EFF_LOAD)); +} + int size_idx(int size){ /* find the hash_sizes index with entry >= size */ int elems = NELEMS(hash_sizes); @@ -226,91 +239,182 @@ int shrink_size(int current){ return hash_sizes[i]; } +int +count_empty(HashIndex *index) +{ /* count empty (never used) buckets. this does NOT include deleted buckets (tombstones). + * TODO: if we ever change HashHeader, save the count there so we do not need this function. + */ + int i, count = 0, capacity = index->num_buckets; + for(i = 0; i < capacity; i++) { + if(BUCKET_IS_EMPTY(index, i)) + count++; + } + return count; +} + /* Public API */ + +#ifndef BORG_NO_PYTHON static HashIndex * -hashindex_read(const char *path) +hashindex_read(PyObject *file_py, int permit_compact) { - FILE *fd; - off_t length, buckets_length, bytes_read; - HashHeader header; + Py_ssize_t length, buckets_length, bytes_read; + Py_buffer header_buffer; + PyObject *header_bytes, *length_object, *bucket_bytes, *tmp; + HashHeader *header; HashIndex *index = NULL; - if((fd = fopen(path, "rb")) == NULL) { - EPRINTF_PATH(path, "fopen for reading failed"); - return NULL; + header_bytes = PyObject_CallMethod(file_py, "read", "n", (Py_ssize_t)sizeof(HashHeader)); + if(!header_bytes) { + assert(PyErr_Occurred()); + goto fail; + } + + bytes_read = PyBytes_Size(header_bytes); + if(PyErr_Occurred()) { + /* TypeError, not a bytes() object */ + goto fail_decref_header; } - bytes_read = fread(&header, 1, sizeof(HashHeader), fd); if(bytes_read != sizeof(HashHeader)) { - if(ferror(fd)) { - EPRINTF_PATH(path, "fread header failed (expected %ju, got %ju)", - (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read); + /* Truncated file */ + /* Note: %zd is the format for Py_ssize_t, %zu is for size_t */ + PyErr_Format(PyExc_ValueError, "Could not read header (expected %zu, but read %zd bytes)", + sizeof(HashHeader), bytes_read); + goto fail_decref_header; + } + + /* + * Hash the header + * If the header is corrupted this bails before doing something stupid (like allocating 3.8 TB of memory) + */ + tmp = PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"); + Py_XDECREF(tmp); + if(PyErr_Occurred()) { + if(PyErr_ExceptionMatches(PyExc_AttributeError)) { + /* Be able to work with regular file objects which do not have a hash_part method. */ + PyErr_Clear(); + } else { + goto fail_decref_header; } - else { - EPRINTF_MSG_PATH(path, "fread header failed (expected %ju, got %ju)", - (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read); - } - goto fail; } - if(fseek(fd, 0, SEEK_END) < 0) { - EPRINTF_PATH(path, "fseek failed"); - goto fail; + + /* Find length of file */ + length_object = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)0, SEEK_END); + if(PyErr_Occurred()) { + goto fail_decref_header; } - if((length = ftell(fd)) < 0) { - EPRINTF_PATH(path, "ftell failed"); - goto fail; + length = PyNumber_AsSsize_t(length_object, PyExc_OverflowError); + Py_DECREF(length_object); + if(PyErr_Occurred()) { + /* This shouldn't generally happen; but can if seek() returns something that's not a number */ + goto fail_decref_header; } - if(fseek(fd, sizeof(HashHeader), SEEK_SET) < 0) { - EPRINTF_PATH(path, "fseek failed"); - goto fail; - } - if(memcmp(header.magic, MAGIC, MAGIC_LEN)) { - EPRINTF_MSG_PATH(path, "Unknown MAGIC in header"); - goto fail; - } - buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size); - if((size_t) length != sizeof(HashHeader) + buckets_length) { - EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)", - (uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length); - goto fail; + + tmp = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)sizeof(HashHeader), SEEK_SET); + Py_XDECREF(tmp); + if(PyErr_Occurred()) { + goto fail_decref_header; } + + /* Set up the in-memory header */ if(!(index = malloc(sizeof(HashIndex)))) { - EPRINTF_PATH(path, "malloc header failed"); - goto fail; + PyErr_NoMemory(); + goto fail_decref_header; } - if(!(index->buckets = malloc(buckets_length))) { - EPRINTF_PATH(path, "malloc buckets failed"); - free(index); - index = NULL; - goto fail; + + PyObject_GetBuffer(header_bytes, &header_buffer, PyBUF_SIMPLE); + if(PyErr_Occurred()) { + goto fail_free_index; } - bytes_read = fread(index->buckets, 1, buckets_length, fd); - if(bytes_read != buckets_length) { - if(ferror(fd)) { - EPRINTF_PATH(path, "fread buckets failed (expected %ju, got %ju)", - (uintmax_t) buckets_length, (uintmax_t) bytes_read); - } - else { - EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ju, got %ju)", - (uintmax_t) buckets_length, (uintmax_t) bytes_read); - } - free(index->buckets); - free(index); - index = NULL; - goto fail; + + header = (HashHeader*) header_buffer.buf; + if(memcmp(header->magic, MAGIC, MAGIC_LEN)) { + PyErr_Format(PyExc_ValueError, "Unknown MAGIC in header"); + goto fail_release_header_buffer; } - index->num_entries = _le32toh(header.num_entries); - index->num_buckets = _le32toh(header.num_buckets); - index->key_size = header.key_size; - index->value_size = header.value_size; + + buckets_length = (Py_ssize_t)_le32toh(header->num_buckets) * (header->key_size + header->value_size); + if((Py_ssize_t)length != (Py_ssize_t)sizeof(HashHeader) + buckets_length) { + PyErr_Format(PyExc_ValueError, "Incorrect file length (expected %zd, got %zd)", + sizeof(HashHeader) + buckets_length, length); + goto fail_release_header_buffer; + } + + index->num_entries = _le32toh(header->num_entries); + index->num_buckets = _le32toh(header->num_buckets); + index->key_size = header->key_size; + index->value_size = header->value_size; index->bucket_size = index->key_size + index->value_size; index->lower_limit = get_lower_limit(index->num_buckets); index->upper_limit = get_upper_limit(index->num_buckets); -fail: - if(fclose(fd) < 0) { - EPRINTF_PATH(path, "fclose failed"); + + /* + * For indices read from disk we don't malloc() the buckets ourselves, + * we have them backed by a Python bytes() object instead, and go through + * Python I/O. + * + * Note: Issuing read(buckets_length) is okay here, because buffered readers + * will issue multiple underlying reads if necessary. This supports indices + * >2 GB on Linux. We also compare lengths later. + */ + bucket_bytes = PyObject_CallMethod(file_py, "read", "n", buckets_length); + if(!bucket_bytes) { + assert(PyErr_Occurred()); + goto fail_release_header_buffer; } + bytes_read = PyBytes_Size(bucket_bytes); + if(PyErr_Occurred()) { + /* TypeError, not a bytes() object */ + goto fail_decref_buckets; + } + if(bytes_read != buckets_length) { + PyErr_Format(PyExc_ValueError, "Could not read buckets (expected %zd, got %zd)", buckets_length, bytes_read); + goto fail_decref_buckets; + } + + PyObject_GetBuffer(bucket_bytes, &index->buckets_buffer, PyBUF_SIMPLE); + if(PyErr_Occurred()) { + goto fail_decref_buckets; + } + index->buckets = index->buckets_buffer.buf; + + if(!permit_compact) { + index->min_empty = get_min_empty(index->num_buckets); + index->num_empty = count_empty(index); + + if(index->num_empty < index->min_empty) { + /* too many tombstones here / not enough empty buckets, do a same-size rebuild */ + if(!hashindex_resize(index, index->num_buckets)) { + PyErr_Format(PyExc_ValueError, "Failed to rebuild table"); + goto fail_free_buckets; + } + } + } + + /* + * Clean intermediary objects up. Note that index is only freed if an error has occurred. + * Also note that the buffer in index->buckets_buffer holds a reference to buckets_bytes. + */ + +fail_free_buckets: + if(PyErr_Occurred()) { + hashindex_free_buckets(index); + } +fail_decref_buckets: + Py_DECREF(bucket_bytes); +fail_release_header_buffer: + PyBuffer_Release(&header_buffer); +fail_free_index: + if(PyErr_Occurred()) { + free(index); + index = NULL; + } +fail_decref_header: + Py_DECREF(header_bytes); +fail: return index; } +#endif static HashIndex * hashindex_init(int capacity, int key_size, int value_size) @@ -332,9 +436,14 @@ hashindex_init(int capacity, int key_size, int value_size) index->key_size = key_size; index->value_size = value_size; index->num_buckets = capacity; + index->num_empty = capacity; index->bucket_size = index->key_size + index->value_size; index->lower_limit = get_lower_limit(index->num_buckets); index->upper_limit = get_upper_limit(index->num_buckets); + index->min_empty = get_min_empty(index->num_buckets); +#ifndef BORG_NO_PYTHON + index->buckets_buffer.buf = NULL; +#endif for(i = 0; i < capacity; i++) { BUCKET_MARK_EMPTY(index, i); } @@ -344,15 +453,17 @@ hashindex_init(int capacity, int key_size, int value_size) static void hashindex_free(HashIndex *index) { - free(index->buckets); + hashindex_free_buckets(index); free(index); } -static int -hashindex_write(HashIndex *index, const char *path) +#ifndef BORG_NO_PYTHON +static void +hashindex_write(HashIndex *index, PyObject *file_py) { - off_t buckets_length = (off_t)index->num_buckets * index->bucket_size; - FILE *fd; + PyObject *length_object, *buckets_view, *tmp; + Py_ssize_t length; + Py_ssize_t buckets_length = (Py_ssize_t)index->num_buckets * index->bucket_size; HashHeader header = { .magic = MAGIC, .num_entries = _htole32(index->num_entries), @@ -360,30 +471,62 @@ hashindex_write(HashIndex *index, const char *path) .key_size = index->key_size, .value_size = index->value_size }; - int ret = 1; - if((fd = fopen(path, "wb")) == NULL) { - EPRINTF_PATH(path, "fopen for writing failed"); - return 0; + length_object = PyObject_CallMethod(file_py, "write", "y#", &header, (Py_ssize_t)sizeof(HashHeader)); + if(PyErr_Occurred()) { + return; } - if(fwrite(&header, 1, sizeof(header), fd) != sizeof(header)) { - EPRINTF_PATH(path, "fwrite header failed"); - ret = 0; + length = PyNumber_AsSsize_t(length_object, PyExc_OverflowError); + Py_DECREF(length_object); + if(PyErr_Occurred()) { + return; } - if(fwrite(index->buckets, 1, buckets_length, fd) != (size_t) buckets_length) { - EPRINTF_PATH(path, "fwrite buckets failed"); - ret = 0; + if(length != sizeof(HashHeader)) { + PyErr_SetString(PyExc_ValueError, "Failed to write header"); + return; } - if(fclose(fd) < 0) { - EPRINTF_PATH(path, "fclose failed"); + + /* + * Hash the header + */ + tmp = PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"); + Py_XDECREF(tmp); + if(PyErr_Occurred()) { + if(PyErr_ExceptionMatches(PyExc_AttributeError)) { + /* Be able to work with regular file objects which do not have a hash_part method. */ + PyErr_Clear(); + } else { + return; + } + } + + /* Note: explicitly construct view; BuildValue can convert (pointer, length) to Python objects, but copies them for doing so */ + buckets_view = PyMemoryView_FromMemory((char*)index->buckets, buckets_length, PyBUF_READ); + if(!buckets_view) { + assert(PyErr_Occurred()); + return; + } + length_object = PyObject_CallMethod(file_py, "write", "O", buckets_view); + Py_DECREF(buckets_view); + if(PyErr_Occurred()) { + return; + } + length = PyNumber_AsSsize_t(length_object, PyExc_OverflowError); + Py_DECREF(length_object); + if(PyErr_Occurred()) { + return; + } + if(length != buckets_length) { + PyErr_SetString(PyExc_ValueError, "Failed to write buckets"); + return; } - return ret; } +#endif static const void * hashindex_get(HashIndex *index, const void *key) { - int idx = hashindex_lookup(index, key); + int idx = hashindex_lookup(index, key, NULL); if(idx < 0) { return NULL; } @@ -393,7 +536,8 @@ hashindex_get(HashIndex *index, const void *key) static int hashindex_set(HashIndex *index, const void *key, const void *value) { - int idx = hashindex_lookup(index, key); + int start_idx; + int idx = hashindex_lookup(index, key, &start_idx); uint8_t *ptr; if(idx < 0) { @@ -401,10 +545,23 @@ hashindex_set(HashIndex *index, const void *key, const void *value) if(!hashindex_resize(index, grow_size(index->num_buckets))) { return 0; } + start_idx = hashindex_index(index, key); } - idx = hashindex_index(index, key); + idx = start_idx; while(!BUCKET_IS_EMPTY(index, idx) && !BUCKET_IS_DELETED(index, idx)) { - idx = (idx + 1) % index->num_buckets; + idx++; + if (idx >= index->num_buckets){ + idx -= index->num_buckets; + } + } + if(BUCKET_IS_EMPTY(index, idx)){ + index->num_empty--; + if(index->num_empty < index->min_empty) { + /* too many tombstones here / not enough empty buckets, do a same-size rebuild */ + if(!hashindex_resize(index, index->num_buckets)) { + return 0; + } + } } ptr = BUCKET_ADDR(index, idx); memcpy(ptr, key, index->key_size); @@ -421,9 +578,9 @@ hashindex_set(HashIndex *index, const void *key, const void *value) static int hashindex_delete(HashIndex *index, const void *key) { - int idx = hashindex_lookup(index, key); + int idx = hashindex_lookup(index, key, NULL); if (idx < 0) { - return 1; + return -1; } BUCKET_MARK_DELETED(index, idx); index->num_entries -= 1; @@ -454,8 +611,78 @@ hashindex_next_key(HashIndex *index, const void *key) return BUCKET_ADDR(index, idx); } +static uint64_t +hashindex_compact(HashIndex *index) +{ + int idx = 0; + int start_idx; + int begin_used_idx; + int empty_slot_count, count, buckets_to_copy; + int compact_tail_idx = 0; + uint64_t saved_size = (index->num_buckets - index->num_entries) * (uint64_t)index->bucket_size; + + if(index->num_buckets - index->num_entries == 0) { + /* already compact */ + return 0; + } + + while(idx < index->num_buckets) { + /* Phase 1: Find some empty slots */ + start_idx = idx; + while((BUCKET_IS_EMPTY(index, idx) || BUCKET_IS_DELETED(index, idx)) && idx < index->num_buckets) { + idx++; + } + + /* everything from start_idx to idx is empty or deleted */ + count = empty_slot_count = idx - start_idx; + begin_used_idx = idx; + + if(!empty_slot_count) { + /* In case idx==compact_tail_idx, the areas overlap */ + memmove(BUCKET_ADDR(index, compact_tail_idx), BUCKET_ADDR(index, idx), index->bucket_size); + idx++; + compact_tail_idx++; + continue; + } + + /* Phase 2: Find some non-empty/non-deleted slots we can move to the compact tail */ + + while(!(BUCKET_IS_EMPTY(index, idx) || BUCKET_IS_DELETED(index, idx)) && empty_slot_count && idx < index->num_buckets) { + idx++; + empty_slot_count--; + } + + buckets_to_copy = count - empty_slot_count; + + if(!buckets_to_copy) { + /* Nothing to move, reached end of the buckets array with no used buckets. */ + break; + } + + memcpy(BUCKET_ADDR(index, compact_tail_idx), BUCKET_ADDR(index, begin_used_idx), buckets_to_copy * index->bucket_size); + compact_tail_idx += buckets_to_copy; + } + + index->num_buckets = index->num_entries; + return saved_size; +} + static int -hashindex_get_size(HashIndex *index) +hashindex_len(HashIndex *index) { return index->num_entries; } + +static int +hashindex_size(HashIndex *index) +{ + return sizeof(HashHeader) + index->num_buckets * index->bucket_size; +} + +/* + * Used by the FuseVersionsIndex. + */ +typedef struct { + uint32_t version; + char hash[16]; +} __attribute__((__packed__)) FuseVersionsElement; diff --git a/src/borg/_item.c b/src/borg/_item.c new file mode 100644 index 00000000..c5c78c40 --- /dev/null +++ b/src/borg/_item.c @@ -0,0 +1,41 @@ +#include "Python.h" + +/* + * This is not quite as dark magic as it looks. We just convert the address of (pointer to) + * a PyObject into a bytes object in _wrap_object, and convert these bytes back to the + * pointer to the original object. + * + * This mainly looks a bit confusing due to our mental special-casing of "char*" from other + * pointers. + * + * The big upside to this is that this neither does *any* serialization (beyond creating tiny + * bytes objects as "stand-ins"), nor has to copy the entire object that's passed around. + */ + +static PyObject * +_object_to_optr(PyObject *obj) +{ + /* + * Create a temporary reference to the object being passed around so it does not vanish. + * Note that we never decref this one in _unwrap_object, since we just transfer that reference + * there, i.e. there is an elided "Py_INCREF(x); Py_DECREF(x)". + * Since the reference is transferred, calls to _wrap_object and _unwrap_object must be symmetric. + */ + Py_INCREF(obj); + return PyBytes_FromStringAndSize((const char*) &obj, sizeof(void*)); +} + +static PyObject * +_optr_to_object(PyObject *bytes) +{ + if(!PyBytes_Check(bytes)) { + PyErr_SetString(PyExc_TypeError, "Cannot unwrap non-bytes object"); + return NULL; + } + if(PyBytes_Size(bytes) != sizeof(void*)) { + PyErr_SetString(PyExc_TypeError, "Invalid length of bytes object"); + return NULL; + } + PyObject *object = * (PyObject **) PyBytes_AsString(bytes); + return object; +} diff --git a/src/borg/algorithms/__init__.py b/src/borg/algorithms/__init__.py new file mode 100644 index 00000000..cfa46c51 --- /dev/null +++ b/src/borg/algorithms/__init__.py @@ -0,0 +1,11 @@ +""" +borg.algorithms +=============== + +This package is intended for hash and checksum functions. + +Ideally these would be sourced from existing libraries, +but are frequently not available yet (blake2), are +available but in poor form (crc32) or don't really +make sense as a library (xxHash). +""" diff --git a/src/borg/algorithms/blake2-libselect.h b/src/borg/algorithms/blake2-libselect.h new file mode 100644 index 00000000..5486400e --- /dev/null +++ b/src/borg/algorithms/blake2-libselect.h @@ -0,0 +1,5 @@ +#ifdef BORG_USE_LIBB2 +#include +#else +#include "blake2/blake2b-ref.c" +#endif diff --git a/src/borg/algorithms/blake2/COPYING b/src/borg/algorithms/blake2/COPYING new file mode 100644 index 00000000..6ca207ef --- /dev/null +++ b/src/borg/algorithms/blake2/COPYING @@ -0,0 +1,122 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. + diff --git a/src/borg/algorithms/blake2/README.md b/src/borg/algorithms/blake2/README.md new file mode 100644 index 00000000..696febaa --- /dev/null +++ b/src/borg/algorithms/blake2/README.md @@ -0,0 +1,13 @@ +# BLAKE2 + +This is the reference source code package of BLAKE2. + +All code is triple-licensed under the [CC0](http://creativecommons.org/publicdomain/zero/1.0), +the [OpenSSL Licence](https://www.openssl.org/source/license.html), +or the [Apache Public License 2.0](http://www.apache.org/licenses/LICENSE-2.0), +at your choosing. + +More: [https://blake2.net](https://blake2.net). [GitHub repository](https://github.com/BLAKE2/BLAKE2). + +Contact: contact@blake2.net + diff --git a/src/borg/algorithms/blake2/blake2-impl.h b/src/borg/algorithms/blake2/blake2-impl.h new file mode 100644 index 00000000..ad9089ee --- /dev/null +++ b/src/borg/algorithms/blake2/blake2-impl.h @@ -0,0 +1,161 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2_IMPL_H +#define BLAKE2_IMPL_H + +#include +#include + +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif + +static BLAKE2_INLINE uint32_t load32( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8) | + (( uint32_t )( p[2] ) << 16) | + (( uint32_t )( p[3] ) << 24) ; +#endif +} + +static BLAKE2_INLINE uint64_t load64( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) | + (( uint64_t )( p[6] ) << 48) | + (( uint64_t )( p[7] ) << 56) ; +#endif +} + +static BLAKE2_INLINE uint16_t load16( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint16_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint16_t )( p[0] ) << 0) | + (( uint16_t )( p[1] ) << 8) ; +#endif +} + +static BLAKE2_INLINE void store16( void *dst, uint16_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); +#endif +} + +static BLAKE2_INLINE void store64( void *dst, uint64_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); + p[6] = (uint8_t)(w >> 48); + p[7] = (uint8_t)(w >> 56); +#endif +} + +static BLAKE2_INLINE uint64_t load48( const void *src ) +{ + const uint8_t *p = ( const uint8_t * )src; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) ; +} + +static BLAKE2_INLINE void store48( void *dst, uint64_t w ) +{ + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); +} + +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 32 - c ) ); +} + +static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/* prevents compiler optimizing out memset() */ +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) +{ + static void *(*const volatile memset_v)(void *, int, size_t) = &memset; + memset_v(v, 0, n); +} + +#endif + diff --git a/src/borg/algorithms/blake2/blake2.h b/src/borg/algorithms/blake2/blake2.h new file mode 100644 index 00000000..6420c536 --- /dev/null +++ b/src/borg/algorithms/blake2/blake2.h @@ -0,0 +1,196 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2_H +#define BLAKE2_H + +#include +#include + +#if defined(_MSC_VER) +#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) +#else +#define BLAKE2_PACKED(x) x __attribute__((packed)) +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + + enum blake2s_constant + { + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32, + BLAKE2S_KEYBYTES = 32, + BLAKE2S_SALTBYTES = 8, + BLAKE2S_PERSONALBYTES = 8 + }; + + enum blake2b_constant + { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 + }; + + typedef struct blake2s_state__ + { + uint32_t h[8]; + uint32_t t[2]; + uint32_t f[2]; + uint8_t buf[BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; + uint8_t last_node; + } blake2s_state; + + typedef struct blake2b_state__ + { + uint64_t h[8]; + uint64_t t[2]; + uint64_t f[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; + uint8_t last_node; + } blake2b_state; + + typedef struct blake2sp_state__ + { + blake2s_state S[8][1]; + blake2s_state R[1]; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; + } blake2sp_state; + + typedef struct blake2bp_state__ + { + blake2b_state S[4][1]; + blake2b_state R[1]; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; + } blake2bp_state; + + + BLAKE2_PACKED(struct blake2s_param__ + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint32_t node_offset; /* 12 */ + uint16_t xof_length; /* 14 */ + uint8_t node_depth; /* 15 */ + uint8_t inner_length; /* 16 */ + /* uint8_t reserved[0]; */ + uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */ + uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */ + }); + + typedef struct blake2s_param__ blake2s_param; + + BLAKE2_PACKED(struct blake2b_param__ + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint32_t node_offset; /* 12 */ + uint32_t xof_length; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ + }); + + typedef struct blake2b_param__ blake2b_param; + + typedef struct blake2xs_state__ + { + blake2s_state S[1]; + blake2s_param P[1]; + } blake2xs_state; + + typedef struct blake2xb_state__ + { + blake2b_state S[1]; + blake2b_param P[1]; + } blake2xb_state; + + /* Padded structs result in a compile-time error */ + enum { + BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES), + BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES) + }; + + /* Streaming API */ + int blake2s_init( blake2s_state *S, size_t outlen ); + int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, void *out, size_t outlen ); + + int blake2b_init( blake2b_state *S, size_t outlen ); + int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); + int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final( blake2b_state *S, void *out, size_t outlen ); + + int blake2sp_init( blake2sp_state *S, size_t outlen ); + int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen ); + int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ); + + int blake2bp_init( blake2bp_state *S, size_t outlen ); + int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen ); + int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ); + + /* Variable output length API */ + int blake2xs_init( blake2xs_state *S, const size_t outlen ); + int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen ); + int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ); + int blake2xs_final(blake2xs_state *S, void *out, size_t outlen); + + int blake2xb_init( blake2xb_state *S, const size_t outlen ); + int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen ); + int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ); + int blake2xb_final(blake2xb_state *S, void *out, size_t outlen); + + /* Simple API */ + int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + /* This is simply an alias for blake2b */ + int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + +#if defined(__cplusplus) +} +#endif + +#endif + diff --git a/src/borg/algorithms/blake2/blake2b-ref.c b/src/borg/algorithms/blake2/blake2b-ref.c new file mode 100644 index 00000000..0d36fb0d --- /dev/null +++ b/src/borg/algorithms/blake2/blake2b-ref.c @@ -0,0 +1,380 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +static const uint8_t blake2b_sigma[12][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + + +static void blake2b_set_lastnode( blake2b_state *S ) +{ + S->f[1] = (uint64_t)-1; +} + +/* Some helper functions, not necessarily useful */ +static int blake2b_is_lastblock( const blake2b_state *S ) +{ + return S->f[0] != 0; +} + +static void blake2b_set_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_set_lastnode( S ); + + S->f[0] = (uint64_t)-1; +} + +static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); +} + +static void blake2b_init0( blake2b_state *S ) +{ + size_t i; + memset( S, 0, sizeof( blake2b_state ) ); + + for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; +} + +/* init xors IV with input parameter block */ +int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) +{ + const uint8_t *p = ( const uint8_t * )( P ); + size_t i; + + blake2b_init0( S ); + + /* IV XOR ParamBlock */ + for( i = 0; i < 8; ++i ) + S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); + + S->outlen = P->digest_length; + return 0; +} + + + +int blake2b_init( blake2b_state *S, size_t outlen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + return blake2b_init_param( S, P ); +} + + +int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; + + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2b_init_param( S, P ) < 0 ) return -1; + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset( block, 0, BLAKE2B_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2*i+0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2*i+1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + uint64_t m[16]; + uint64_t v[16]; + size_t i; + + for( i = 0; i < 16; ++i ) { + m[i] = load64( block + i * sizeof( m[i] ) ); + } + + for( i = 0; i < 8; ++i ) { + v[i] = S->h[i]; + } + + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + + for( i = 0; i < 8; ++i ) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} + +#undef G +#undef ROUND + +int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) +{ + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + if( inlen > fill ) + { + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress( S, in ); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } + } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; + } + return 0; +} + +int blake2b_final( blake2b_state *S, void *out, size_t outlen ) +{ + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + size_t i; + + if( out == NULL || outlen < S->outlen ) + return -1; + + if( blake2b_is_lastblock( S ) ) + return -1; + + blake2b_increment_counter( S, S->buflen ); + blake2b_set_lastblock( S ); + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + blake2b_compress( S, S->buf ); + + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + memcpy( out, buffer, S->outlen ); + secure_zero_memory(buffer, sizeof(buffer)); + return 0; +} + +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) +{ + blake2b_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key && keylen > 0 ) return -1; + + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; + + if( keylen > BLAKE2B_KEYBYTES ) return -1; + + if( keylen > 0 ) + { + if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2b_init( S, outlen ) < 0 ) return -1; + } + + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; +} + +int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { + return blake2b(out, outlen, in, inlen, key, keylen); +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 ); +} +#endif + +#if defined(BLAKE2B_SELFTEST) +#include +#include "blake2-kat.h" +int main( void ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ); + + if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2b_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2b_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif + diff --git a/src/borg/algorithms/checksums.pyx b/src/borg/algorithms/checksums.pyx new file mode 100644 index 00000000..6645dd0f --- /dev/null +++ b/src/borg/algorithms/checksums.pyx @@ -0,0 +1,106 @@ +from ..helpers import bin_to_hex + +from libc.stdint cimport uint32_t +from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release +from cpython.bytes cimport PyBytes_FromStringAndSize + + +cdef extern from "crc32_dispatch.c": + uint32_t _crc32_slice_by_8 "crc32_slice_by_8"(const void* data, size_t length, uint32_t initial_crc) + uint32_t _crc32_clmul "crc32_clmul"(const void* data, size_t length, uint32_t initial_crc) + + int _have_clmul "have_clmul"() + + +cdef extern from "xxh64/xxhash.c": + ctypedef struct XXH64_canonical_t: + char digest[8] + + ctypedef struct XXH64_state_t: + pass # opaque + + ctypedef unsigned long long XXH64_hash_t + + ctypedef enum XXH_errorcode: + XXH_OK, + XXH_ERROR + + XXH64_hash_t XXH64(const void* input, size_t length, unsigned long long seed); + + XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed); + XXH_errorcode XXH64_update(XXH64_state_t* statePtr, const void* input, size_t length); + XXH64_hash_t XXH64_digest(const XXH64_state_t* statePtr); + + void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + + +cdef Py_buffer ro_buffer(object data) except *: + cdef Py_buffer view + PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) + return view + + +def crc32_slice_by_8(data, value=0): + cdef Py_buffer data_buf = ro_buffer(data) + cdef uint32_t val = value + try: + return _crc32_slice_by_8(data_buf.buf, data_buf.len, val) + finally: + PyBuffer_Release(&data_buf) + + +def crc32_clmul(data, value=0): + cdef Py_buffer data_buf = ro_buffer(data) + cdef uint32_t val = value + try: + return _crc32_clmul(data_buf.buf, data_buf.len, val) + finally: + PyBuffer_Release(&data_buf) + + +have_clmul = _have_clmul() +if have_clmul: + crc32 = crc32_clmul +else: + crc32 = crc32_slice_by_8 + + +def xxh64(data, seed=0): + cdef unsigned long long _seed = seed + cdef XXH64_hash_t hash + cdef XXH64_canonical_t digest + cdef Py_buffer data_buf = ro_buffer(data) + try: + hash = XXH64(data_buf.buf, data_buf.len, _seed) + finally: + PyBuffer_Release(&data_buf) + XXH64_canonicalFromHash(&digest, hash) + return PyBytes_FromStringAndSize( digest.digest, 8) + + +cdef class StreamingXXH64: + cdef XXH64_state_t state + + def __cinit__(self, seed=0): + cdef unsigned long long _seed = seed + if XXH64_reset(&self.state, _seed) != XXH_OK: + raise Exception('XXH64_reset failed') + + def update(self, data): + cdef Py_buffer data_buf = ro_buffer(data) + try: + if XXH64_update(&self.state, data_buf.buf, data_buf.len) != XXH_OK: + raise Exception('XXH64_update failed') + finally: + PyBuffer_Release(&data_buf) + + def digest(self): + cdef XXH64_hash_t hash + cdef XXH64_canonical_t digest + hash = XXH64_digest(&self.state) + XXH64_canonicalFromHash(&digest, hash) + return PyBytes_FromStringAndSize( digest.digest, 8) + + def hexdigest(self): + return bin_to_hex(self.digest()) diff --git a/src/borg/algorithms/crc32_clmul.c b/src/borg/algorithms/crc32_clmul.c new file mode 100644 index 00000000..8a065390 --- /dev/null +++ b/src/borg/algorithms/crc32_clmul.c @@ -0,0 +1,536 @@ +/* + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ + * instruction. + * + * A white paper describing this algorithm can be found at: + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf + * + * Copyright (C) 2013 Intel Corporation. All rights reserved. + * Authors: + * Wajdi Feghali + * Jim Guilford + * Vinodh Gopal + * Erdinc Ozturk + * Jim Kukunas + * + * Copyright (c) 2016 Marian Beermann (add support for initial value, restructuring) + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include +#include +#include + +#ifdef _MSC_VER +#include +#else +/* + * Newer versions of GCC and clang come with cpuid.h + * (ftr GCC 4.7 in Debian Wheezy has this) + */ +#include +#endif + +static void +cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) +{ +#ifdef _MSC_VER + unsigned int registers[4]; + __cpuid(registers, info); + *eax = registers[0]; + *ebx = registers[1]; + *ecx = registers[2]; + *edx = registers[3]; +#else + /* GCC, clang */ + unsigned int _eax; + unsigned int _ebx; + unsigned int _ecx; + unsigned int _edx; + __cpuid(info, _eax, _ebx, _ecx, _edx); + *eax = _eax; + *ebx = _ebx; + *ecx = _ecx; + *edx = _edx; +#endif +} + +static int +have_clmul(void) +{ + unsigned eax, ebx, ecx, edx; + int has_pclmulqdq; + int has_sse41; + cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx); + + has_pclmulqdq = ecx & 0x2; /* bit 1 */ + has_sse41 = ecx & 0x80000; /* bit 19 */ + + return has_pclmulqdq && has_sse41; +} + +CLMUL +static void +fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) +{ + const __m128i xmm_fold4 = _mm_set_epi32( + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596); + + __m128i x_tmp3; + __m128 ps_crc0, ps_crc3, ps_res; + + x_tmp3 = *xmm_crc3; + + *xmm_crc3 = *xmm_crc0; + *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); + *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); + ps_crc0 = _mm_castsi128_ps(*xmm_crc0); + ps_crc3 = _mm_castsi128_ps(*xmm_crc3); + ps_res = _mm_xor_ps(ps_crc0, ps_crc3); + + *xmm_crc0 = *xmm_crc1; + *xmm_crc1 = *xmm_crc2; + *xmm_crc2 = x_tmp3; + *xmm_crc3 = _mm_castps_si128(ps_res); +} + +CLMUL +static void +fold_2(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) +{ + const __m128i xmm_fold4 = _mm_set_epi32( + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596); + + __m128i x_tmp3, x_tmp2; + __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res31, ps_res20; + + x_tmp3 = *xmm_crc3; + x_tmp2 = *xmm_crc2; + + *xmm_crc3 = *xmm_crc1; + *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); + *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); + ps_crc3 = _mm_castsi128_ps(*xmm_crc3); + ps_crc1 = _mm_castsi128_ps(*xmm_crc1); + ps_res31 = _mm_xor_ps(ps_crc3, ps_crc1); + + *xmm_crc2 = *xmm_crc0; + *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); + *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10); + ps_crc0 = _mm_castsi128_ps(*xmm_crc0); + ps_crc2 = _mm_castsi128_ps(*xmm_crc2); + ps_res20 = _mm_xor_ps(ps_crc0, ps_crc2); + + *xmm_crc0 = x_tmp2; + *xmm_crc1 = x_tmp3; + *xmm_crc2 = _mm_castps_si128(ps_res20); + *xmm_crc3 = _mm_castps_si128(ps_res31); +} + +CLMUL +static void +fold_3(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) +{ + const __m128i xmm_fold4 = _mm_set_epi32( + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596); + + __m128i x_tmp3; + __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res32, ps_res21, ps_res10; + + x_tmp3 = *xmm_crc3; + + *xmm_crc3 = *xmm_crc2; + *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01); + *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); + ps_crc2 = _mm_castsi128_ps(*xmm_crc2); + ps_crc3 = _mm_castsi128_ps(*xmm_crc3); + ps_res32 = _mm_xor_ps(ps_crc2, ps_crc3); + + *xmm_crc2 = *xmm_crc1; + *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); + *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10); + ps_crc1 = _mm_castsi128_ps(*xmm_crc1); + ps_crc2 = _mm_castsi128_ps(*xmm_crc2); + ps_res21 = _mm_xor_ps(ps_crc1, ps_crc2); + + *xmm_crc1 = *xmm_crc0; + *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); + *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x10); + ps_crc0 = _mm_castsi128_ps(*xmm_crc0); + ps_crc1 = _mm_castsi128_ps(*xmm_crc1); + ps_res10 = _mm_xor_ps(ps_crc0, ps_crc1); + + *xmm_crc0 = x_tmp3; + *xmm_crc1 = _mm_castps_si128(ps_res10); + *xmm_crc2 = _mm_castps_si128(ps_res21); + *xmm_crc3 = _mm_castps_si128(ps_res32); +} + +CLMUL +static void +fold_4(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) +{ + const __m128i xmm_fold4 = _mm_set_epi32( + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596); + + __m128i x_tmp0, x_tmp1, x_tmp2, x_tmp3; + __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3; + __m128 ps_t0, ps_t1, ps_t2, ps_t3; + __m128 ps_res0, ps_res1, ps_res2, ps_res3; + + x_tmp0 = *xmm_crc0; + x_tmp1 = *xmm_crc1; + x_tmp2 = *xmm_crc2; + x_tmp3 = *xmm_crc3; + + *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); + x_tmp0 = _mm_clmulepi64_si128(x_tmp0, xmm_fold4, 0x10); + ps_crc0 = _mm_castsi128_ps(*xmm_crc0); + ps_t0 = _mm_castsi128_ps(x_tmp0); + ps_res0 = _mm_xor_ps(ps_crc0, ps_t0); + + *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); + x_tmp1 = _mm_clmulepi64_si128(x_tmp1, xmm_fold4, 0x10); + ps_crc1 = _mm_castsi128_ps(*xmm_crc1); + ps_t1 = _mm_castsi128_ps(x_tmp1); + ps_res1 = _mm_xor_ps(ps_crc1, ps_t1); + + *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01); + x_tmp2 = _mm_clmulepi64_si128(x_tmp2, xmm_fold4, 0x10); + ps_crc2 = _mm_castsi128_ps(*xmm_crc2); + ps_t2 = _mm_castsi128_ps(x_tmp2); + ps_res2 = _mm_xor_ps(ps_crc2, ps_t2); + + *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x01); + x_tmp3 = _mm_clmulepi64_si128(x_tmp3, xmm_fold4, 0x10); + ps_crc3 = _mm_castsi128_ps(*xmm_crc3); + ps_t3 = _mm_castsi128_ps(x_tmp3); + ps_res3 = _mm_xor_ps(ps_crc3, ps_t3); + + *xmm_crc0 = _mm_castps_si128(ps_res0); + *xmm_crc1 = _mm_castps_si128(ps_res1); + *xmm_crc2 = _mm_castps_si128(ps_res2); + *xmm_crc3 = _mm_castps_si128(ps_res3); +} + +static const unsigned ALIGNED_(32) pshufb_shf_table[60] = { + 0x84838281, 0x88878685, 0x8c8b8a89, 0x008f8e8d, /* shl 15 (16 - 1)/shr1 */ + 0x85848382, 0x89888786, 0x8d8c8b8a, 0x01008f8e, /* shl 14 (16 - 3)/shr2 */ + 0x86858483, 0x8a898887, 0x8e8d8c8b, 0x0201008f, /* shl 13 (16 - 4)/shr3 */ + 0x87868584, 0x8b8a8988, 0x8f8e8d8c, 0x03020100, /* shl 12 (16 - 4)/shr4 */ + 0x88878685, 0x8c8b8a89, 0x008f8e8d, 0x04030201, /* shl 11 (16 - 5)/shr5 */ + 0x89888786, 0x8d8c8b8a, 0x01008f8e, 0x05040302, /* shl 10 (16 - 6)/shr6 */ + 0x8a898887, 0x8e8d8c8b, 0x0201008f, 0x06050403, /* shl 9 (16 - 7)/shr7 */ + 0x8b8a8988, 0x8f8e8d8c, 0x03020100, 0x07060504, /* shl 8 (16 - 8)/shr8 */ + 0x8c8b8a89, 0x008f8e8d, 0x04030201, 0x08070605, /* shl 7 (16 - 9)/shr9 */ + 0x8d8c8b8a, 0x01008f8e, 0x05040302, 0x09080706, /* shl 6 (16 -10)/shr10*/ + 0x8e8d8c8b, 0x0201008f, 0x06050403, 0x0a090807, /* shl 5 (16 -11)/shr11*/ + 0x8f8e8d8c, 0x03020100, 0x07060504, 0x0b0a0908, /* shl 4 (16 -12)/shr12*/ + 0x008f8e8d, 0x04030201, 0x08070605, 0x0c0b0a09, /* shl 3 (16 -13)/shr13*/ + 0x01008f8e, 0x05040302, 0x09080706, 0x0d0c0b0a, /* shl 2 (16 -14)/shr14*/ + 0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b /* shl 1 (16 -15)/shr15*/ +}; + +CLMUL +static void +partial_fold(const size_t len, + __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3, + __m128i *xmm_crc_part) +{ + + const __m128i xmm_fold4 = _mm_set_epi32( + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596); + const __m128i xmm_mask3 = _mm_set1_epi32(0x80808080); + + __m128i xmm_shl, xmm_shr, xmm_tmp1, xmm_tmp2, xmm_tmp3; + __m128i xmm_a0_0, xmm_a0_1; + __m128 ps_crc3, psa0_0, psa0_1, ps_res; + + xmm_shl = _mm_load_si128((__m128i *)pshufb_shf_table + (len - 1)); + xmm_shr = xmm_shl; + xmm_shr = _mm_xor_si128(xmm_shr, xmm_mask3); + + xmm_a0_0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shl); + + *xmm_crc0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shr); + xmm_tmp1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shl); + *xmm_crc0 = _mm_or_si128(*xmm_crc0, xmm_tmp1); + + *xmm_crc1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shr); + xmm_tmp2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shl); + *xmm_crc1 = _mm_or_si128(*xmm_crc1, xmm_tmp2); + + *xmm_crc2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shr); + xmm_tmp3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shl); + *xmm_crc2 = _mm_or_si128(*xmm_crc2, xmm_tmp3); + + *xmm_crc3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shr); + *xmm_crc_part = _mm_shuffle_epi8(*xmm_crc_part, xmm_shl); + *xmm_crc3 = _mm_or_si128(*xmm_crc3, *xmm_crc_part); + + xmm_a0_1 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x10); + xmm_a0_0 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x01); + + ps_crc3 = _mm_castsi128_ps(*xmm_crc3); + psa0_0 = _mm_castsi128_ps(xmm_a0_0); + psa0_1 = _mm_castsi128_ps(xmm_a0_1); + + ps_res = _mm_xor_ps(ps_crc3, psa0_0); + ps_res = _mm_xor_ps(ps_res, psa0_1); + + *xmm_crc3 = _mm_castps_si128(ps_res); +} + +static const unsigned ALIGNED_(16) crc_k[] = { + 0xccaa009e, 0x00000000, /* rk1 */ + 0x751997d0, 0x00000001, /* rk2 */ + 0xccaa009e, 0x00000000, /* rk5 */ + 0x63cd6124, 0x00000001, /* rk6 */ + 0xf7011640, 0x00000001, /* rk7 */ + 0xdb710640, 0x00000001 /* rk8 */ +}; + +static const unsigned ALIGNED_(16) crc_mask[4] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 +}; + +static const unsigned ALIGNED_(16) crc_mask2[4] = { + 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF +}; + +#define ONCE(op) if(first) { \ + first = 0; \ + (op); \ +} + +/* + * somewhat surprisingly the "naive" way of doing this, ie. with a flag and a cond. branch, + * is consistently ~5 % faster on average than the implied-recommended branchless way (always xor, + * always zero xmm_initial). Guess speculative execution and branch prediction got the better of + * yet another "optimization tip". + */ +#define XOR_INITIAL(where) ONCE(where = _mm_xor_si128(where, xmm_initial)) + + +CLMUL +static uint32_t +crc32_clmul(const uint8_t *src, long len, uint32_t initial_crc) +{ + unsigned long algn_diff; + __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; + __m128i xmm_initial = _mm_cvtsi32_si128(initial_crc); + __m128i xmm_crc0 = _mm_cvtsi32_si128(0x9db42487); + __m128i xmm_crc1 = _mm_setzero_si128(); + __m128i xmm_crc2 = _mm_setzero_si128(); + __m128i xmm_crc3 = _mm_setzero_si128(); + __m128i xmm_crc_part; + + int first = 1; + + /* fold 512 to 32 step variable declarations for ISO-C90 compat. */ + const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask); + const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2); + + uint32_t crc; + __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold; + + if (len < 16) { + if (len == 0) + return initial_crc; + if (len < 4) { + /* + * no idea how to do this for <4 bytes, delegate to classic impl. + */ + uint32_t crc = ~initial_crc; + switch (len) { + case 3: crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *src++]; + case 2: crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *src++]; + case 1: crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *src++]; + } + return ~crc; + } + xmm_crc_part = _mm_loadu_si128((__m128i *)src); + XOR_INITIAL(xmm_crc_part); + goto partial; + } + + /* this alignment computation would be wrong for len<16 handled above */ + algn_diff = (0 - (uintptr_t)src) & 0xF; + if (algn_diff) { + xmm_crc_part = _mm_loadu_si128((__m128i *)src); + XOR_INITIAL(xmm_crc_part); + + src += algn_diff; + len -= algn_diff; + + partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part); + } + + while ((len -= 64) >= 0) { + xmm_t0 = _mm_load_si128((__m128i *)src); + xmm_t1 = _mm_load_si128((__m128i *)src + 1); + xmm_t2 = _mm_load_si128((__m128i *)src + 2); + xmm_t3 = _mm_load_si128((__m128i *)src + 3); + + XOR_INITIAL(xmm_t0); + + fold_4(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); + + xmm_crc0 = _mm_xor_si128(xmm_crc0, xmm_t0); + xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t1); + xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t2); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t3); + + src += 64; + } + + /* + * len = num bytes left - 64 + */ + if (len + 16 >= 0) { + len += 16; + + xmm_t0 = _mm_load_si128((__m128i *)src); + xmm_t1 = _mm_load_si128((__m128i *)src + 1); + xmm_t2 = _mm_load_si128((__m128i *)src + 2); + + XOR_INITIAL(xmm_t0); + + fold_3(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); + + xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t0); + xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t1); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t2); + + if (len == 0) + goto done; + + xmm_crc_part = _mm_load_si128((__m128i *)src + 3); + } else if (len + 32 >= 0) { + len += 32; + + xmm_t0 = _mm_load_si128((__m128i *)src); + xmm_t1 = _mm_load_si128((__m128i *)src + 1); + + XOR_INITIAL(xmm_t0); + + fold_2(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); + + xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t0); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t1); + + if (len == 0) + goto done; + + xmm_crc_part = _mm_load_si128((__m128i *)src + 2); + } else if (len + 48 >= 0) { + len += 48; + + xmm_t0 = _mm_load_si128((__m128i *)src); + + XOR_INITIAL(xmm_t0); + + fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); + + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); + + if (len == 0) + goto done; + + xmm_crc_part = _mm_load_si128((__m128i *)src + 1); + } else { + len += 64; + if (len == 0) + goto done; + xmm_crc_part = _mm_load_si128((__m128i *)src); + XOR_INITIAL(xmm_crc_part); + } + +partial: + partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part); + +done: + (void)0; + + /* fold 512 to 32 */ + + /* + * k1 + */ + crc_fold = _mm_load_si128((__m128i *)crc_k); + + x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10); + xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01); + xmm_crc1 = _mm_xor_si128(xmm_crc1, x_tmp0); + xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_crc0); + + x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10); + xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01); + xmm_crc2 = _mm_xor_si128(xmm_crc2, x_tmp1); + xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_crc1); + + x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10); + xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01); + xmm_crc3 = _mm_xor_si128(xmm_crc3, x_tmp2); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); + + /* + * k5 + */ + crc_fold = _mm_load_si128((__m128i *)crc_k + 1); + + xmm_crc0 = xmm_crc3; + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); + xmm_crc0 = _mm_srli_si128(xmm_crc0, 8); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); + + xmm_crc0 = xmm_crc3; + xmm_crc3 = _mm_slli_si128(xmm_crc3, 4); + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); + xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask2); + + /* + * k7 + */ + xmm_crc1 = xmm_crc3; + xmm_crc2 = xmm_crc3; + crc_fold = _mm_load_si128((__m128i *)crc_k + 2); + + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); + xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask); + + xmm_crc2 = xmm_crc3; + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); + + /* + * could just as well write xmm_crc3[2], doing a movaps and truncating, but + * no real advantage - it's a tiny bit slower per call, while no additional CPUs + * would be supported by only requiring SSSE3 and CLMUL instead of SSE4.1 + CLMUL + */ + crc = _mm_extract_epi32(xmm_crc3, 2); + return ~crc; +} diff --git a/src/borg/algorithms/crc32_dispatch.c b/src/borg/algorithms/crc32_dispatch.c new file mode 100644 index 00000000..30700bc0 --- /dev/null +++ b/src/borg/algorithms/crc32_dispatch.c @@ -0,0 +1,118 @@ + +/* always compile slice by 8 as a runtime fallback */ +#include "crc32_slice_by_8.c" + +#ifdef __GNUC__ +/* + * GCC 4.4(.7) has a bug that causes it to recurse infinitely if an unknown option + * is pushed onto the options stack. GCC 4.5 was not tested, so is excluded as well. + * GCC 4.6 is known good. + */ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +/* + * clang also has or had GCC bug #56298 explained below, but doesn't support + * target attributes or the options stack. So we disable this faster code path for clang. + */ +#ifndef __clang__ +/* + * While OpenBSD uses GCC, they don't have Intel intrinsics, so we can't compile this code + * on OpenBSD. + */ +#ifndef __OpenBSD__ +#if __x86_64__ +/* + * Because we don't want a configure script we need compiler-dependent pre-defined macros for detecting this, + * also some compiler-dependent stuff to invoke SSE modes and align things. + */ + +#define FOLDING_CRC + +/* + * SSE2 misses _mm_shuffle_epi32, and _mm_extract_epi32 + * SSSE3 added _mm_shuffle_epi32 + * SSE4.1 added _mm_extract_epi32 + * Also requires CLMUL of course (all AES-NI CPUs have it) + * Note that there are no CPUs with AES-NI/CLMUL but without SSE4.1 + */ +#define CLMUL __attribute__ ((target ("pclmul,sse4.1"))) + +#define ALIGNED_(n) __attribute__ ((aligned(n))) + +/* + * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56298 + * These are taken from GCC 6.x, so apparently the above bug has been resolved in that version, + * but it still affects widely used GCC 4.x. + * Part 2 of 2 follows below. + */ + +#ifndef __PCLMUL__ +#pragma GCC push_options +#pragma GCC target("pclmul") +#define __BORG_DISABLE_PCLMUL__ +#endif + +#ifndef __SSE3__ +#pragma GCC push_options +#pragma GCC target("sse3") +#define __BORG_DISABLE_SSE3__ +#endif + +#ifndef __SSSE3__ +#pragma GCC push_options +#pragma GCC target("ssse3") +#define __BORG_DISABLE_SSSE3__ +#endif + +#ifndef __SSE4_1__ +#pragma GCC push_options +#pragma GCC target("sse4.1") +#define __BORG_DISABLE_SSE4_1__ +#endif + +#endif /* if __x86_64__ */ +#endif /* ifndef __OpenBSD__ */ +#endif /* ifndef __clang__ */ +#endif /* __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) */ +#endif /* ifdef __GNUC__ */ + +#ifdef FOLDING_CRC +#include "crc32_clmul.c" +#else + +static uint32_t +crc32_clmul(const uint8_t *src, long len, uint32_t initial_crc) +{ + (void)src; (void)len; (void)initial_crc; + assert(0); + return 0; +} + +static int +have_clmul(void) +{ + return 0; +} +#endif + +/* + * Part 2 of 2 of the GCC workaround. + */ +#ifdef __BORG_DISABLE_PCLMUL__ +#undef __BORG_DISABLE_PCLMUL__ +#pragma GCC pop_options +#endif + +#ifdef __BORG_DISABLE_SSE3__ +#undef __BORG_DISABLE_SSE3__ +#pragma GCC pop_options +#endif + +#ifdef __BORG_DISABLE_SSSE3__ +#undef __BORG_DISABLE_SSSE3__ +#pragma GCC pop_options +#endif + +#ifdef __BORG_DISABLE_SSE4_1__ +#undef __BORG_DISABLE_SSE4_1__ +#pragma GCC pop_options +#endif diff --git a/src/borg/algorithms/crc32_slice_by_8.c b/src/borg/algorithms/crc32_slice_by_8.c new file mode 100644 index 00000000..dcfd8b8f --- /dev/null +++ b/src/borg/algorithms/crc32_slice_by_8.c @@ -0,0 +1,381 @@ +// ////////////////////////////////////////////////////////// +// Crc32.h +// Copyright (c) 2011-2016 Stephan Brumme. All rights reserved. +// see http://create.stephan-brumme.com/disclaimer.html +// + +// uint8_t, uint32_t, int32_t +#include +// size_t +#include + +#include "../_endian.h" + +/// compute CRC32 (Slicing-by-8 algorithm), unroll inner loop 4 times +uint32_t crc32_4x8bytes(const void* data, size_t length, uint32_t previousCrc32); + +// ////////////////////////////////////////////////////////// +// Crc32.cpp +// Copyright (c) 2011-2016 Stephan Brumme. All rights reserved. +// Slicing-by-16 contributed by Bulat Ziganshin +// Tableless bytewise CRC contributed by Hagai Gold +// see http://create.stephan-brumme.com/disclaimer.html +// + +/// zlib's CRC32 polynomial +const uint32_t Polynomial = 0xEDB88320; + +// ////////////////////////////////////////////////////////// +// constants + +/// look-up table, already declared above +const uint32_t Crc32Lookup[8][256] = +{ + //// same algorithm as crc32_bitwise + //for (int i = 0; i <= 0xFF; i++) + //{ + // uint32_t crc = i; + // for (int j = 0; j < 8; j++) + // crc = (crc >> 1) ^ ((crc & 1) * Polynomial); + // Crc32Lookup[0][i] = crc; + //} + //// ... and the following slicing-by-8 algorithm (from Intel): + //// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf + //// http://sourceforge.net/projects/slicing-by-8/ + //for (int slice = 1; slice < MaxSlice; slice++) + // Crc32Lookup[slice][i] = (Crc32Lookup[slice - 1][i] >> 8) ^ Crc32Lookup[0][Crc32Lookup[slice - 1][i] & 0xFF]; + { + // note: the first number of every second row corresponds to the half-byte look-up table ! + 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3, + 0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91, + 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7, + 0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5, + 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B, + 0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59, + 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F, + 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D, + 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433, + 0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01, + 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457, + 0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65, + 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB, + 0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9, + 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F, + 0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD, + 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683, + 0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1, + 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7, + 0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5, + 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B, + 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79, + 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F, + 0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D, + 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713, + 0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21, + 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777, + 0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45, + 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB, + 0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9, + 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF, + 0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D, + } + + ,{ + 0x00000000,0x191B3141,0x32366282,0x2B2D53C3,0x646CC504,0x7D77F445,0x565AA786,0x4F4196C7, + 0xC8D98A08,0xD1C2BB49,0xFAEFE88A,0xE3F4D9CB,0xACB54F0C,0xB5AE7E4D,0x9E832D8E,0x87981CCF, + 0x4AC21251,0x53D92310,0x78F470D3,0x61EF4192,0x2EAED755,0x37B5E614,0x1C98B5D7,0x05838496, + 0x821B9859,0x9B00A918,0xB02DFADB,0xA936CB9A,0xE6775D5D,0xFF6C6C1C,0xD4413FDF,0xCD5A0E9E, + 0x958424A2,0x8C9F15E3,0xA7B24620,0xBEA97761,0xF1E8E1A6,0xE8F3D0E7,0xC3DE8324,0xDAC5B265, + 0x5D5DAEAA,0x44469FEB,0x6F6BCC28,0x7670FD69,0x39316BAE,0x202A5AEF,0x0B07092C,0x121C386D, + 0xDF4636F3,0xC65D07B2,0xED705471,0xF46B6530,0xBB2AF3F7,0xA231C2B6,0x891C9175,0x9007A034, + 0x179FBCFB,0x0E848DBA,0x25A9DE79,0x3CB2EF38,0x73F379FF,0x6AE848BE,0x41C51B7D,0x58DE2A3C, + 0xF0794F05,0xE9627E44,0xC24F2D87,0xDB541CC6,0x94158A01,0x8D0EBB40,0xA623E883,0xBF38D9C2, + 0x38A0C50D,0x21BBF44C,0x0A96A78F,0x138D96CE,0x5CCC0009,0x45D73148,0x6EFA628B,0x77E153CA, + 0xBABB5D54,0xA3A06C15,0x888D3FD6,0x91960E97,0xDED79850,0xC7CCA911,0xECE1FAD2,0xF5FACB93, + 0x7262D75C,0x6B79E61D,0x4054B5DE,0x594F849F,0x160E1258,0x0F152319,0x243870DA,0x3D23419B, + 0x65FD6BA7,0x7CE65AE6,0x57CB0925,0x4ED03864,0x0191AEA3,0x188A9FE2,0x33A7CC21,0x2ABCFD60, + 0xAD24E1AF,0xB43FD0EE,0x9F12832D,0x8609B26C,0xC94824AB,0xD05315EA,0xFB7E4629,0xE2657768, + 0x2F3F79F6,0x362448B7,0x1D091B74,0x04122A35,0x4B53BCF2,0x52488DB3,0x7965DE70,0x607EEF31, + 0xE7E6F3FE,0xFEFDC2BF,0xD5D0917C,0xCCCBA03D,0x838A36FA,0x9A9107BB,0xB1BC5478,0xA8A76539, + 0x3B83984B,0x2298A90A,0x09B5FAC9,0x10AECB88,0x5FEF5D4F,0x46F46C0E,0x6DD93FCD,0x74C20E8C, + 0xF35A1243,0xEA412302,0xC16C70C1,0xD8774180,0x9736D747,0x8E2DE606,0xA500B5C5,0xBC1B8484, + 0x71418A1A,0x685ABB5B,0x4377E898,0x5A6CD9D9,0x152D4F1E,0x0C367E5F,0x271B2D9C,0x3E001CDD, + 0xB9980012,0xA0833153,0x8BAE6290,0x92B553D1,0xDDF4C516,0xC4EFF457,0xEFC2A794,0xF6D996D5, + 0xAE07BCE9,0xB71C8DA8,0x9C31DE6B,0x852AEF2A,0xCA6B79ED,0xD37048AC,0xF85D1B6F,0xE1462A2E, + 0x66DE36E1,0x7FC507A0,0x54E85463,0x4DF36522,0x02B2F3E5,0x1BA9C2A4,0x30849167,0x299FA026, + 0xE4C5AEB8,0xFDDE9FF9,0xD6F3CC3A,0xCFE8FD7B,0x80A96BBC,0x99B25AFD,0xB29F093E,0xAB84387F, + 0x2C1C24B0,0x350715F1,0x1E2A4632,0x07317773,0x4870E1B4,0x516BD0F5,0x7A468336,0x635DB277, + 0xCBFAD74E,0xD2E1E60F,0xF9CCB5CC,0xE0D7848D,0xAF96124A,0xB68D230B,0x9DA070C8,0x84BB4189, + 0x03235D46,0x1A386C07,0x31153FC4,0x280E0E85,0x674F9842,0x7E54A903,0x5579FAC0,0x4C62CB81, + 0x8138C51F,0x9823F45E,0xB30EA79D,0xAA1596DC,0xE554001B,0xFC4F315A,0xD7626299,0xCE7953D8, + 0x49E14F17,0x50FA7E56,0x7BD72D95,0x62CC1CD4,0x2D8D8A13,0x3496BB52,0x1FBBE891,0x06A0D9D0, + 0x5E7EF3EC,0x4765C2AD,0x6C48916E,0x7553A02F,0x3A1236E8,0x230907A9,0x0824546A,0x113F652B, + 0x96A779E4,0x8FBC48A5,0xA4911B66,0xBD8A2A27,0xF2CBBCE0,0xEBD08DA1,0xC0FDDE62,0xD9E6EF23, + 0x14BCE1BD,0x0DA7D0FC,0x268A833F,0x3F91B27E,0x70D024B9,0x69CB15F8,0x42E6463B,0x5BFD777A, + 0xDC656BB5,0xC57E5AF4,0xEE530937,0xF7483876,0xB809AEB1,0xA1129FF0,0x8A3FCC33,0x9324FD72, + }, + + { + 0x00000000,0x01C26A37,0x0384D46E,0x0246BE59,0x0709A8DC,0x06CBC2EB,0x048D7CB2,0x054F1685, + 0x0E1351B8,0x0FD13B8F,0x0D9785D6,0x0C55EFE1,0x091AF964,0x08D89353,0x0A9E2D0A,0x0B5C473D, + 0x1C26A370,0x1DE4C947,0x1FA2771E,0x1E601D29,0x1B2F0BAC,0x1AED619B,0x18ABDFC2,0x1969B5F5, + 0x1235F2C8,0x13F798FF,0x11B126A6,0x10734C91,0x153C5A14,0x14FE3023,0x16B88E7A,0x177AE44D, + 0x384D46E0,0x398F2CD7,0x3BC9928E,0x3A0BF8B9,0x3F44EE3C,0x3E86840B,0x3CC03A52,0x3D025065, + 0x365E1758,0x379C7D6F,0x35DAC336,0x3418A901,0x3157BF84,0x3095D5B3,0x32D36BEA,0x331101DD, + 0x246BE590,0x25A98FA7,0x27EF31FE,0x262D5BC9,0x23624D4C,0x22A0277B,0x20E69922,0x2124F315, + 0x2A78B428,0x2BBADE1F,0x29FC6046,0x283E0A71,0x2D711CF4,0x2CB376C3,0x2EF5C89A,0x2F37A2AD, + 0x709A8DC0,0x7158E7F7,0x731E59AE,0x72DC3399,0x7793251C,0x76514F2B,0x7417F172,0x75D59B45, + 0x7E89DC78,0x7F4BB64F,0x7D0D0816,0x7CCF6221,0x798074A4,0x78421E93,0x7A04A0CA,0x7BC6CAFD, + 0x6CBC2EB0,0x6D7E4487,0x6F38FADE,0x6EFA90E9,0x6BB5866C,0x6A77EC5B,0x68315202,0x69F33835, + 0x62AF7F08,0x636D153F,0x612BAB66,0x60E9C151,0x65A6D7D4,0x6464BDE3,0x662203BA,0x67E0698D, + 0x48D7CB20,0x4915A117,0x4B531F4E,0x4A917579,0x4FDE63FC,0x4E1C09CB,0x4C5AB792,0x4D98DDA5, + 0x46C49A98,0x4706F0AF,0x45404EF6,0x448224C1,0x41CD3244,0x400F5873,0x4249E62A,0x438B8C1D, + 0x54F16850,0x55330267,0x5775BC3E,0x56B7D609,0x53F8C08C,0x523AAABB,0x507C14E2,0x51BE7ED5, + 0x5AE239E8,0x5B2053DF,0x5966ED86,0x58A487B1,0x5DEB9134,0x5C29FB03,0x5E6F455A,0x5FAD2F6D, + 0xE1351B80,0xE0F771B7,0xE2B1CFEE,0xE373A5D9,0xE63CB35C,0xE7FED96B,0xE5B86732,0xE47A0D05, + 0xEF264A38,0xEEE4200F,0xECA29E56,0xED60F461,0xE82FE2E4,0xE9ED88D3,0xEBAB368A,0xEA695CBD, + 0xFD13B8F0,0xFCD1D2C7,0xFE976C9E,0xFF5506A9,0xFA1A102C,0xFBD87A1B,0xF99EC442,0xF85CAE75, + 0xF300E948,0xF2C2837F,0xF0843D26,0xF1465711,0xF4094194,0xF5CB2BA3,0xF78D95FA,0xF64FFFCD, + 0xD9785D60,0xD8BA3757,0xDAFC890E,0xDB3EE339,0xDE71F5BC,0xDFB39F8B,0xDDF521D2,0xDC374BE5, + 0xD76B0CD8,0xD6A966EF,0xD4EFD8B6,0xD52DB281,0xD062A404,0xD1A0CE33,0xD3E6706A,0xD2241A5D, + 0xC55EFE10,0xC49C9427,0xC6DA2A7E,0xC7184049,0xC25756CC,0xC3953CFB,0xC1D382A2,0xC011E895, + 0xCB4DAFA8,0xCA8FC59F,0xC8C97BC6,0xC90B11F1,0xCC440774,0xCD866D43,0xCFC0D31A,0xCE02B92D, + 0x91AF9640,0x906DFC77,0x922B422E,0x93E92819,0x96A63E9C,0x976454AB,0x9522EAF2,0x94E080C5, + 0x9FBCC7F8,0x9E7EADCF,0x9C381396,0x9DFA79A1,0x98B56F24,0x99770513,0x9B31BB4A,0x9AF3D17D, + 0x8D893530,0x8C4B5F07,0x8E0DE15E,0x8FCF8B69,0x8A809DEC,0x8B42F7DB,0x89044982,0x88C623B5, + 0x839A6488,0x82580EBF,0x801EB0E6,0x81DCDAD1,0x8493CC54,0x8551A663,0x8717183A,0x86D5720D, + 0xA9E2D0A0,0xA820BA97,0xAA6604CE,0xABA46EF9,0xAEEB787C,0xAF29124B,0xAD6FAC12,0xACADC625, + 0xA7F18118,0xA633EB2F,0xA4755576,0xA5B73F41,0xA0F829C4,0xA13A43F3,0xA37CFDAA,0xA2BE979D, + 0xB5C473D0,0xB40619E7,0xB640A7BE,0xB782CD89,0xB2CDDB0C,0xB30FB13B,0xB1490F62,0xB08B6555, + 0xBBD72268,0xBA15485F,0xB853F606,0xB9919C31,0xBCDE8AB4,0xBD1CE083,0xBF5A5EDA,0xBE9834ED, + }, + + { + 0x00000000,0xB8BC6765,0xAA09C88B,0x12B5AFEE,0x8F629757,0x37DEF032,0x256B5FDC,0x9DD738B9, + 0xC5B428EF,0x7D084F8A,0x6FBDE064,0xD7018701,0x4AD6BFB8,0xF26AD8DD,0xE0DF7733,0x58631056, + 0x5019579F,0xE8A530FA,0xFA109F14,0x42ACF871,0xDF7BC0C8,0x67C7A7AD,0x75720843,0xCDCE6F26, + 0x95AD7F70,0x2D111815,0x3FA4B7FB,0x8718D09E,0x1ACFE827,0xA2738F42,0xB0C620AC,0x087A47C9, + 0xA032AF3E,0x188EC85B,0x0A3B67B5,0xB28700D0,0x2F503869,0x97EC5F0C,0x8559F0E2,0x3DE59787, + 0x658687D1,0xDD3AE0B4,0xCF8F4F5A,0x7733283F,0xEAE41086,0x525877E3,0x40EDD80D,0xF851BF68, + 0xF02BF8A1,0x48979FC4,0x5A22302A,0xE29E574F,0x7F496FF6,0xC7F50893,0xD540A77D,0x6DFCC018, + 0x359FD04E,0x8D23B72B,0x9F9618C5,0x272A7FA0,0xBAFD4719,0x0241207C,0x10F48F92,0xA848E8F7, + 0x9B14583D,0x23A83F58,0x311D90B6,0x89A1F7D3,0x1476CF6A,0xACCAA80F,0xBE7F07E1,0x06C36084, + 0x5EA070D2,0xE61C17B7,0xF4A9B859,0x4C15DF3C,0xD1C2E785,0x697E80E0,0x7BCB2F0E,0xC377486B, + 0xCB0D0FA2,0x73B168C7,0x6104C729,0xD9B8A04C,0x446F98F5,0xFCD3FF90,0xEE66507E,0x56DA371B, + 0x0EB9274D,0xB6054028,0xA4B0EFC6,0x1C0C88A3,0x81DBB01A,0x3967D77F,0x2BD27891,0x936E1FF4, + 0x3B26F703,0x839A9066,0x912F3F88,0x299358ED,0xB4446054,0x0CF80731,0x1E4DA8DF,0xA6F1CFBA, + 0xFE92DFEC,0x462EB889,0x549B1767,0xEC277002,0x71F048BB,0xC94C2FDE,0xDBF98030,0x6345E755, + 0x6B3FA09C,0xD383C7F9,0xC1366817,0x798A0F72,0xE45D37CB,0x5CE150AE,0x4E54FF40,0xF6E89825, + 0xAE8B8873,0x1637EF16,0x048240F8,0xBC3E279D,0x21E91F24,0x99557841,0x8BE0D7AF,0x335CB0CA, + 0xED59B63B,0x55E5D15E,0x47507EB0,0xFFEC19D5,0x623B216C,0xDA874609,0xC832E9E7,0x708E8E82, + 0x28ED9ED4,0x9051F9B1,0x82E4565F,0x3A58313A,0xA78F0983,0x1F336EE6,0x0D86C108,0xB53AA66D, + 0xBD40E1A4,0x05FC86C1,0x1749292F,0xAFF54E4A,0x322276F3,0x8A9E1196,0x982BBE78,0x2097D91D, + 0x78F4C94B,0xC048AE2E,0xD2FD01C0,0x6A4166A5,0xF7965E1C,0x4F2A3979,0x5D9F9697,0xE523F1F2, + 0x4D6B1905,0xF5D77E60,0xE762D18E,0x5FDEB6EB,0xC2098E52,0x7AB5E937,0x680046D9,0xD0BC21BC, + 0x88DF31EA,0x3063568F,0x22D6F961,0x9A6A9E04,0x07BDA6BD,0xBF01C1D8,0xADB46E36,0x15080953, + 0x1D724E9A,0xA5CE29FF,0xB77B8611,0x0FC7E174,0x9210D9CD,0x2AACBEA8,0x38191146,0x80A57623, + 0xD8C66675,0x607A0110,0x72CFAEFE,0xCA73C99B,0x57A4F122,0xEF189647,0xFDAD39A9,0x45115ECC, + 0x764DEE06,0xCEF18963,0xDC44268D,0x64F841E8,0xF92F7951,0x41931E34,0x5326B1DA,0xEB9AD6BF, + 0xB3F9C6E9,0x0B45A18C,0x19F00E62,0xA14C6907,0x3C9B51BE,0x842736DB,0x96929935,0x2E2EFE50, + 0x2654B999,0x9EE8DEFC,0x8C5D7112,0x34E11677,0xA9362ECE,0x118A49AB,0x033FE645,0xBB838120, + 0xE3E09176,0x5B5CF613,0x49E959FD,0xF1553E98,0x6C820621,0xD43E6144,0xC68BCEAA,0x7E37A9CF, + 0xD67F4138,0x6EC3265D,0x7C7689B3,0xC4CAEED6,0x591DD66F,0xE1A1B10A,0xF3141EE4,0x4BA87981, + 0x13CB69D7,0xAB770EB2,0xB9C2A15C,0x017EC639,0x9CA9FE80,0x241599E5,0x36A0360B,0x8E1C516E, + 0x866616A7,0x3EDA71C2,0x2C6FDE2C,0x94D3B949,0x090481F0,0xB1B8E695,0xA30D497B,0x1BB12E1E, + 0x43D23E48,0xFB6E592D,0xE9DBF6C3,0x516791A6,0xCCB0A91F,0x740CCE7A,0x66B96194,0xDE0506F1, + } + + ,{ + 0x00000000,0x3D6029B0,0x7AC05360,0x47A07AD0,0xF580A6C0,0xC8E08F70,0x8F40F5A0,0xB220DC10, + 0x30704BC1,0x0D106271,0x4AB018A1,0x77D03111,0xC5F0ED01,0xF890C4B1,0xBF30BE61,0x825097D1, + 0x60E09782,0x5D80BE32,0x1A20C4E2,0x2740ED52,0x95603142,0xA80018F2,0xEFA06222,0xD2C04B92, + 0x5090DC43,0x6DF0F5F3,0x2A508F23,0x1730A693,0xA5107A83,0x98705333,0xDFD029E3,0xE2B00053, + 0xC1C12F04,0xFCA106B4,0xBB017C64,0x866155D4,0x344189C4,0x0921A074,0x4E81DAA4,0x73E1F314, + 0xF1B164C5,0xCCD14D75,0x8B7137A5,0xB6111E15,0x0431C205,0x3951EBB5,0x7EF19165,0x4391B8D5, + 0xA121B886,0x9C419136,0xDBE1EBE6,0xE681C256,0x54A11E46,0x69C137F6,0x2E614D26,0x13016496, + 0x9151F347,0xAC31DAF7,0xEB91A027,0xD6F18997,0x64D15587,0x59B17C37,0x1E1106E7,0x23712F57, + 0x58F35849,0x659371F9,0x22330B29,0x1F532299,0xAD73FE89,0x9013D739,0xD7B3ADE9,0xEAD38459, + 0x68831388,0x55E33A38,0x124340E8,0x2F236958,0x9D03B548,0xA0639CF8,0xE7C3E628,0xDAA3CF98, + 0x3813CFCB,0x0573E67B,0x42D39CAB,0x7FB3B51B,0xCD93690B,0xF0F340BB,0xB7533A6B,0x8A3313DB, + 0x0863840A,0x3503ADBA,0x72A3D76A,0x4FC3FEDA,0xFDE322CA,0xC0830B7A,0x872371AA,0xBA43581A, + 0x9932774D,0xA4525EFD,0xE3F2242D,0xDE920D9D,0x6CB2D18D,0x51D2F83D,0x167282ED,0x2B12AB5D, + 0xA9423C8C,0x9422153C,0xD3826FEC,0xEEE2465C,0x5CC29A4C,0x61A2B3FC,0x2602C92C,0x1B62E09C, + 0xF9D2E0CF,0xC4B2C97F,0x8312B3AF,0xBE729A1F,0x0C52460F,0x31326FBF,0x7692156F,0x4BF23CDF, + 0xC9A2AB0E,0xF4C282BE,0xB362F86E,0x8E02D1DE,0x3C220DCE,0x0142247E,0x46E25EAE,0x7B82771E, + 0xB1E6B092,0x8C869922,0xCB26E3F2,0xF646CA42,0x44661652,0x79063FE2,0x3EA64532,0x03C66C82, + 0x8196FB53,0xBCF6D2E3,0xFB56A833,0xC6368183,0x74165D93,0x49767423,0x0ED60EF3,0x33B62743, + 0xD1062710,0xEC660EA0,0xABC67470,0x96A65DC0,0x248681D0,0x19E6A860,0x5E46D2B0,0x6326FB00, + 0xE1766CD1,0xDC164561,0x9BB63FB1,0xA6D61601,0x14F6CA11,0x2996E3A1,0x6E369971,0x5356B0C1, + 0x70279F96,0x4D47B626,0x0AE7CCF6,0x3787E546,0x85A73956,0xB8C710E6,0xFF676A36,0xC2074386, + 0x4057D457,0x7D37FDE7,0x3A978737,0x07F7AE87,0xB5D77297,0x88B75B27,0xCF1721F7,0xF2770847, + 0x10C70814,0x2DA721A4,0x6A075B74,0x576772C4,0xE547AED4,0xD8278764,0x9F87FDB4,0xA2E7D404, + 0x20B743D5,0x1DD76A65,0x5A7710B5,0x67173905,0xD537E515,0xE857CCA5,0xAFF7B675,0x92979FC5, + 0xE915E8DB,0xD475C16B,0x93D5BBBB,0xAEB5920B,0x1C954E1B,0x21F567AB,0x66551D7B,0x5B3534CB, + 0xD965A31A,0xE4058AAA,0xA3A5F07A,0x9EC5D9CA,0x2CE505DA,0x11852C6A,0x562556BA,0x6B457F0A, + 0x89F57F59,0xB49556E9,0xF3352C39,0xCE550589,0x7C75D999,0x4115F029,0x06B58AF9,0x3BD5A349, + 0xB9853498,0x84E51D28,0xC34567F8,0xFE254E48,0x4C059258,0x7165BBE8,0x36C5C138,0x0BA5E888, + 0x28D4C7DF,0x15B4EE6F,0x521494BF,0x6F74BD0F,0xDD54611F,0xE03448AF,0xA794327F,0x9AF41BCF, + 0x18A48C1E,0x25C4A5AE,0x6264DF7E,0x5F04F6CE,0xED242ADE,0xD044036E,0x97E479BE,0xAA84500E, + 0x4834505D,0x755479ED,0x32F4033D,0x0F942A8D,0xBDB4F69D,0x80D4DF2D,0xC774A5FD,0xFA148C4D, + 0x78441B9C,0x4524322C,0x028448FC,0x3FE4614C,0x8DC4BD5C,0xB0A494EC,0xF704EE3C,0xCA64C78C, + }, + + { + 0x00000000,0xCB5CD3A5,0x4DC8A10B,0x869472AE,0x9B914216,0x50CD91B3,0xD659E31D,0x1D0530B8, + 0xEC53826D,0x270F51C8,0xA19B2366,0x6AC7F0C3,0x77C2C07B,0xBC9E13DE,0x3A0A6170,0xF156B2D5, + 0x03D6029B,0xC88AD13E,0x4E1EA390,0x85427035,0x9847408D,0x531B9328,0xD58FE186,0x1ED33223, + 0xEF8580F6,0x24D95353,0xA24D21FD,0x6911F258,0x7414C2E0,0xBF481145,0x39DC63EB,0xF280B04E, + 0x07AC0536,0xCCF0D693,0x4A64A43D,0x81387798,0x9C3D4720,0x57619485,0xD1F5E62B,0x1AA9358E, + 0xEBFF875B,0x20A354FE,0xA6372650,0x6D6BF5F5,0x706EC54D,0xBB3216E8,0x3DA66446,0xF6FAB7E3, + 0x047A07AD,0xCF26D408,0x49B2A6A6,0x82EE7503,0x9FEB45BB,0x54B7961E,0xD223E4B0,0x197F3715, + 0xE82985C0,0x23755665,0xA5E124CB,0x6EBDF76E,0x73B8C7D6,0xB8E41473,0x3E7066DD,0xF52CB578, + 0x0F580A6C,0xC404D9C9,0x4290AB67,0x89CC78C2,0x94C9487A,0x5F959BDF,0xD901E971,0x125D3AD4, + 0xE30B8801,0x28575BA4,0xAEC3290A,0x659FFAAF,0x789ACA17,0xB3C619B2,0x35526B1C,0xFE0EB8B9, + 0x0C8E08F7,0xC7D2DB52,0x4146A9FC,0x8A1A7A59,0x971F4AE1,0x5C439944,0xDAD7EBEA,0x118B384F, + 0xE0DD8A9A,0x2B81593F,0xAD152B91,0x6649F834,0x7B4CC88C,0xB0101B29,0x36846987,0xFDD8BA22, + 0x08F40F5A,0xC3A8DCFF,0x453CAE51,0x8E607DF4,0x93654D4C,0x58399EE9,0xDEADEC47,0x15F13FE2, + 0xE4A78D37,0x2FFB5E92,0xA96F2C3C,0x6233FF99,0x7F36CF21,0xB46A1C84,0x32FE6E2A,0xF9A2BD8F, + 0x0B220DC1,0xC07EDE64,0x46EAACCA,0x8DB67F6F,0x90B34FD7,0x5BEF9C72,0xDD7BEEDC,0x16273D79, + 0xE7718FAC,0x2C2D5C09,0xAAB92EA7,0x61E5FD02,0x7CE0CDBA,0xB7BC1E1F,0x31286CB1,0xFA74BF14, + 0x1EB014D8,0xD5ECC77D,0x5378B5D3,0x98246676,0x852156CE,0x4E7D856B,0xC8E9F7C5,0x03B52460, + 0xF2E396B5,0x39BF4510,0xBF2B37BE,0x7477E41B,0x6972D4A3,0xA22E0706,0x24BA75A8,0xEFE6A60D, + 0x1D661643,0xD63AC5E6,0x50AEB748,0x9BF264ED,0x86F75455,0x4DAB87F0,0xCB3FF55E,0x006326FB, + 0xF135942E,0x3A69478B,0xBCFD3525,0x77A1E680,0x6AA4D638,0xA1F8059D,0x276C7733,0xEC30A496, + 0x191C11EE,0xD240C24B,0x54D4B0E5,0x9F886340,0x828D53F8,0x49D1805D,0xCF45F2F3,0x04192156, + 0xF54F9383,0x3E134026,0xB8873288,0x73DBE12D,0x6EDED195,0xA5820230,0x2316709E,0xE84AA33B, + 0x1ACA1375,0xD196C0D0,0x5702B27E,0x9C5E61DB,0x815B5163,0x4A0782C6,0xCC93F068,0x07CF23CD, + 0xF6999118,0x3DC542BD,0xBB513013,0x700DE3B6,0x6D08D30E,0xA65400AB,0x20C07205,0xEB9CA1A0, + 0x11E81EB4,0xDAB4CD11,0x5C20BFBF,0x977C6C1A,0x8A795CA2,0x41258F07,0xC7B1FDA9,0x0CED2E0C, + 0xFDBB9CD9,0x36E74F7C,0xB0733DD2,0x7B2FEE77,0x662ADECF,0xAD760D6A,0x2BE27FC4,0xE0BEAC61, + 0x123E1C2F,0xD962CF8A,0x5FF6BD24,0x94AA6E81,0x89AF5E39,0x42F38D9C,0xC467FF32,0x0F3B2C97, + 0xFE6D9E42,0x35314DE7,0xB3A53F49,0x78F9ECEC,0x65FCDC54,0xAEA00FF1,0x28347D5F,0xE368AEFA, + 0x16441B82,0xDD18C827,0x5B8CBA89,0x90D0692C,0x8DD55994,0x46898A31,0xC01DF89F,0x0B412B3A, + 0xFA1799EF,0x314B4A4A,0xB7DF38E4,0x7C83EB41,0x6186DBF9,0xAADA085C,0x2C4E7AF2,0xE712A957, + 0x15921919,0xDECECABC,0x585AB812,0x93066BB7,0x8E035B0F,0x455F88AA,0xC3CBFA04,0x089729A1, + 0xF9C19B74,0x329D48D1,0xB4093A7F,0x7F55E9DA,0x6250D962,0xA90C0AC7,0x2F987869,0xE4C4ABCC, + }, + + { + 0x00000000,0xA6770BB4,0x979F1129,0x31E81A9D,0xF44F2413,0x52382FA7,0x63D0353A,0xC5A73E8E, + 0x33EF4E67,0x959845D3,0xA4705F4E,0x020754FA,0xC7A06A74,0x61D761C0,0x503F7B5D,0xF64870E9, + 0x67DE9CCE,0xC1A9977A,0xF0418DE7,0x56368653,0x9391B8DD,0x35E6B369,0x040EA9F4,0xA279A240, + 0x5431D2A9,0xF246D91D,0xC3AEC380,0x65D9C834,0xA07EF6BA,0x0609FD0E,0x37E1E793,0x9196EC27, + 0xCFBD399C,0x69CA3228,0x582228B5,0xFE552301,0x3BF21D8F,0x9D85163B,0xAC6D0CA6,0x0A1A0712, + 0xFC5277FB,0x5A257C4F,0x6BCD66D2,0xCDBA6D66,0x081D53E8,0xAE6A585C,0x9F8242C1,0x39F54975, + 0xA863A552,0x0E14AEE6,0x3FFCB47B,0x998BBFCF,0x5C2C8141,0xFA5B8AF5,0xCBB39068,0x6DC49BDC, + 0x9B8CEB35,0x3DFBE081,0x0C13FA1C,0xAA64F1A8,0x6FC3CF26,0xC9B4C492,0xF85CDE0F,0x5E2BD5BB, + 0x440B7579,0xE27C7ECD,0xD3946450,0x75E36FE4,0xB044516A,0x16335ADE,0x27DB4043,0x81AC4BF7, + 0x77E43B1E,0xD19330AA,0xE07B2A37,0x460C2183,0x83AB1F0D,0x25DC14B9,0x14340E24,0xB2430590, + 0x23D5E9B7,0x85A2E203,0xB44AF89E,0x123DF32A,0xD79ACDA4,0x71EDC610,0x4005DC8D,0xE672D739, + 0x103AA7D0,0xB64DAC64,0x87A5B6F9,0x21D2BD4D,0xE47583C3,0x42028877,0x73EA92EA,0xD59D995E, + 0x8BB64CE5,0x2DC14751,0x1C295DCC,0xBA5E5678,0x7FF968F6,0xD98E6342,0xE86679DF,0x4E11726B, + 0xB8590282,0x1E2E0936,0x2FC613AB,0x89B1181F,0x4C162691,0xEA612D25,0xDB8937B8,0x7DFE3C0C, + 0xEC68D02B,0x4A1FDB9F,0x7BF7C102,0xDD80CAB6,0x1827F438,0xBE50FF8C,0x8FB8E511,0x29CFEEA5, + 0xDF879E4C,0x79F095F8,0x48188F65,0xEE6F84D1,0x2BC8BA5F,0x8DBFB1EB,0xBC57AB76,0x1A20A0C2, + 0x8816EAF2,0x2E61E146,0x1F89FBDB,0xB9FEF06F,0x7C59CEE1,0xDA2EC555,0xEBC6DFC8,0x4DB1D47C, + 0xBBF9A495,0x1D8EAF21,0x2C66B5BC,0x8A11BE08,0x4FB68086,0xE9C18B32,0xD82991AF,0x7E5E9A1B, + 0xEFC8763C,0x49BF7D88,0x78576715,0xDE206CA1,0x1B87522F,0xBDF0599B,0x8C184306,0x2A6F48B2, + 0xDC27385B,0x7A5033EF,0x4BB82972,0xEDCF22C6,0x28681C48,0x8E1F17FC,0xBFF70D61,0x198006D5, + 0x47ABD36E,0xE1DCD8DA,0xD034C247,0x7643C9F3,0xB3E4F77D,0x1593FCC9,0x247BE654,0x820CEDE0, + 0x74449D09,0xD23396BD,0xE3DB8C20,0x45AC8794,0x800BB91A,0x267CB2AE,0x1794A833,0xB1E3A387, + 0x20754FA0,0x86024414,0xB7EA5E89,0x119D553D,0xD43A6BB3,0x724D6007,0x43A57A9A,0xE5D2712E, + 0x139A01C7,0xB5ED0A73,0x840510EE,0x22721B5A,0xE7D525D4,0x41A22E60,0x704A34FD,0xD63D3F49, + 0xCC1D9F8B,0x6A6A943F,0x5B828EA2,0xFDF58516,0x3852BB98,0x9E25B02C,0xAFCDAAB1,0x09BAA105, + 0xFFF2D1EC,0x5985DA58,0x686DC0C5,0xCE1ACB71,0x0BBDF5FF,0xADCAFE4B,0x9C22E4D6,0x3A55EF62, + 0xABC30345,0x0DB408F1,0x3C5C126C,0x9A2B19D8,0x5F8C2756,0xF9FB2CE2,0xC813367F,0x6E643DCB, + 0x982C4D22,0x3E5B4696,0x0FB35C0B,0xA9C457BF,0x6C636931,0xCA146285,0xFBFC7818,0x5D8B73AC, + 0x03A0A617,0xA5D7ADA3,0x943FB73E,0x3248BC8A,0xF7EF8204,0x519889B0,0x6070932D,0xC6079899, + 0x304FE870,0x9638E3C4,0xA7D0F959,0x01A7F2ED,0xC400CC63,0x6277C7D7,0x539FDD4A,0xF5E8D6FE, + 0x647E3AD9,0xC209316D,0xF3E12BF0,0x55962044,0x90311ECA,0x3646157E,0x07AE0FE3,0xA1D90457, + 0x579174BE,0xF1E67F0A,0xC00E6597,0x66796E23,0xA3DE50AD,0x05A95B19,0x34414184,0x92364A30, + }, + + { + 0x00000000,0xCCAA009E,0x4225077D,0x8E8F07E3,0x844A0EFA,0x48E00E64,0xC66F0987,0x0AC50919, + 0xD3E51BB5,0x1F4F1B2B,0x91C01CC8,0x5D6A1C56,0x57AF154F,0x9B0515D1,0x158A1232,0xD92012AC, + 0x7CBB312B,0xB01131B5,0x3E9E3656,0xF23436C8,0xF8F13FD1,0x345B3F4F,0xBAD438AC,0x767E3832, + 0xAF5E2A9E,0x63F42A00,0xED7B2DE3,0x21D12D7D,0x2B142464,0xE7BE24FA,0x69312319,0xA59B2387, + 0xF9766256,0x35DC62C8,0xBB53652B,0x77F965B5,0x7D3C6CAC,0xB1966C32,0x3F196BD1,0xF3B36B4F, + 0x2A9379E3,0xE639797D,0x68B67E9E,0xA41C7E00,0xAED97719,0x62737787,0xECFC7064,0x205670FA, + 0x85CD537D,0x496753E3,0xC7E85400,0x0B42549E,0x01875D87,0xCD2D5D19,0x43A25AFA,0x8F085A64, + 0x562848C8,0x9A824856,0x140D4FB5,0xD8A74F2B,0xD2624632,0x1EC846AC,0x9047414F,0x5CED41D1, + 0x299DC2ED,0xE537C273,0x6BB8C590,0xA712C50E,0xADD7CC17,0x617DCC89,0xEFF2CB6A,0x2358CBF4, + 0xFA78D958,0x36D2D9C6,0xB85DDE25,0x74F7DEBB,0x7E32D7A2,0xB298D73C,0x3C17D0DF,0xF0BDD041, + 0x5526F3C6,0x998CF358,0x1703F4BB,0xDBA9F425,0xD16CFD3C,0x1DC6FDA2,0x9349FA41,0x5FE3FADF, + 0x86C3E873,0x4A69E8ED,0xC4E6EF0E,0x084CEF90,0x0289E689,0xCE23E617,0x40ACE1F4,0x8C06E16A, + 0xD0EBA0BB,0x1C41A025,0x92CEA7C6,0x5E64A758,0x54A1AE41,0x980BAEDF,0x1684A93C,0xDA2EA9A2, + 0x030EBB0E,0xCFA4BB90,0x412BBC73,0x8D81BCED,0x8744B5F4,0x4BEEB56A,0xC561B289,0x09CBB217, + 0xAC509190,0x60FA910E,0xEE7596ED,0x22DF9673,0x281A9F6A,0xE4B09FF4,0x6A3F9817,0xA6959889, + 0x7FB58A25,0xB31F8ABB,0x3D908D58,0xF13A8DC6,0xFBFF84DF,0x37558441,0xB9DA83A2,0x7570833C, + 0x533B85DA,0x9F918544,0x111E82A7,0xDDB48239,0xD7718B20,0x1BDB8BBE,0x95548C5D,0x59FE8CC3, + 0x80DE9E6F,0x4C749EF1,0xC2FB9912,0x0E51998C,0x04949095,0xC83E900B,0x46B197E8,0x8A1B9776, + 0x2F80B4F1,0xE32AB46F,0x6DA5B38C,0xA10FB312,0xABCABA0B,0x6760BA95,0xE9EFBD76,0x2545BDE8, + 0xFC65AF44,0x30CFAFDA,0xBE40A839,0x72EAA8A7,0x782FA1BE,0xB485A120,0x3A0AA6C3,0xF6A0A65D, + 0xAA4DE78C,0x66E7E712,0xE868E0F1,0x24C2E06F,0x2E07E976,0xE2ADE9E8,0x6C22EE0B,0xA088EE95, + 0x79A8FC39,0xB502FCA7,0x3B8DFB44,0xF727FBDA,0xFDE2F2C3,0x3148F25D,0xBFC7F5BE,0x736DF520, + 0xD6F6D6A7,0x1A5CD639,0x94D3D1DA,0x5879D144,0x52BCD85D,0x9E16D8C3,0x1099DF20,0xDC33DFBE, + 0x0513CD12,0xC9B9CD8C,0x4736CA6F,0x8B9CCAF1,0x8159C3E8,0x4DF3C376,0xC37CC495,0x0FD6C40B, + 0x7AA64737,0xB60C47A9,0x3883404A,0xF42940D4,0xFEEC49CD,0x32464953,0xBCC94EB0,0x70634E2E, + 0xA9435C82,0x65E95C1C,0xEB665BFF,0x27CC5B61,0x2D095278,0xE1A352E6,0x6F2C5505,0xA386559B, + 0x061D761C,0xCAB77682,0x44387161,0x889271FF,0x825778E6,0x4EFD7878,0xC0727F9B,0x0CD87F05, + 0xD5F86DA9,0x19526D37,0x97DD6AD4,0x5B776A4A,0x51B26353,0x9D1863CD,0x1397642E,0xDF3D64B0, + 0x83D02561,0x4F7A25FF,0xC1F5221C,0x0D5F2282,0x079A2B9B,0xCB302B05,0x45BF2CE6,0x89152C78, + 0x50353ED4,0x9C9F3E4A,0x121039A9,0xDEBA3937,0xD47F302E,0x18D530B0,0x965A3753,0x5AF037CD, + 0xFF6B144A,0x33C114D4,0xBD4E1337,0x71E413A9,0x7B211AB0,0xB78B1A2E,0x39041DCD,0xF5AE1D53, + 0x2C8E0FFF,0xE0240F61,0x6EAB0882,0xA201081C,0xA8C40105,0x646E019B,0xEAE10678,0x264B06E6, + } +}; + +/// compute CRC32 (Slicing-by-8 algorithm), unroll inner loop 4 times +uint32_t crc32_slice_by_8(const void* data, size_t length, uint32_t previousCrc32) +{ + uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF + const uint32_t* current = (const uint32_t*) data; + + // enabling optimization (at least -O2) automatically unrolls the inner for-loop + const size_t Unroll = 4; + const size_t BytesAtOnce = 8 * Unroll; + const uint8_t* currentChar; + + // process 4x eight bytes at once (Slicing-by-8) + while (length >= BytesAtOnce) + { + size_t unrolling; + for (unrolling = 0; unrolling < Unroll; unrolling++) + { +#if BORG_BIG_ENDIAN + uint32_t one = *current++ ^ _le32toh(crc); + uint32_t two = *current++; + crc = Crc32Lookup[0][ two & 0xFF] ^ + Crc32Lookup[1][(two>> 8) & 0xFF] ^ + Crc32Lookup[2][(two>>16) & 0xFF] ^ + Crc32Lookup[3][(two>>24) & 0xFF] ^ + Crc32Lookup[4][ one & 0xFF] ^ + Crc32Lookup[5][(one>> 8) & 0xFF] ^ + Crc32Lookup[6][(one>>16) & 0xFF] ^ + Crc32Lookup[7][(one>>24) & 0xFF]; +#else + uint32_t one = *current++ ^ crc; + uint32_t two = *current++; + crc = Crc32Lookup[0][(two>>24) & 0xFF] ^ + Crc32Lookup[1][(two>>16) & 0xFF] ^ + Crc32Lookup[2][(two>> 8) & 0xFF] ^ + Crc32Lookup[3][ two & 0xFF] ^ + Crc32Lookup[4][(one>>24) & 0xFF] ^ + Crc32Lookup[5][(one>>16) & 0xFF] ^ + Crc32Lookup[6][(one>> 8) & 0xFF] ^ + Crc32Lookup[7][ one & 0xFF]; +#endif + + } + + length -= BytesAtOnce; + } + + currentChar = (const uint8_t*) current; + // remaining 1 to 31 bytes (standard algorithm) + while (length-- != 0) + crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++]; + + return ~crc; // same as crc ^ 0xFFFFFFFF +} diff --git a/src/borg/algorithms/xxh64/xxhash.c b/src/borg/algorithms/xxh64/xxhash.c new file mode 100644 index 00000000..0d0b3a52 --- /dev/null +++ b/src/borg/algorithms/xxh64/xxhash.c @@ -0,0 +1,615 @@ +/* +* xxHash - Fast Hash algorithm +* Copyright (C) 2012-2016, Yann Collet +* +* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following disclaimer +* in the documentation and/or other materials provided with the +* distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* You can contact the author at : +* - xxHash homepage: http://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ +#include +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/* for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# define FORCE_INLINE static __forceinline +#else +# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; +# endif +#endif + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; } __attribute__((packed)) unalign; +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +* 64-bits hash functions +*********************************************************************/ + +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +/*====== Memory access ======*/ + +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t U64; +# else + typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; + +static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/*====== xxh64 ======*/ + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/*====== Hash Streaming ======*/ + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + +FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/*====== Canonical representation ======*/ + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + +#endif /* XXH_NO_LONG_LONG */ diff --git a/src/borg/algorithms/xxh64/xxhash.h b/src/borg/algorithms/xxh64/xxhash.h new file mode 100644 index 00000000..5e5d4cf4 --- /dev/null +++ b/src/borg/algorithms/xxh64/xxhash.h @@ -0,0 +1,245 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +#define XXH_STATIC_LINKING_ONLY + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************** +* Compiler specifics +******************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + + +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with other libraries which may also include xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + +#ifndef XXH_NO_LONG_LONG +/*-********************************************************************** +* 64-bits hash +************************************************************************/ +typedef unsigned long long XXH64_hash_t; + +/*! XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*====== Streaming ======*/ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/*====== Canonical representation ======*/ +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); +#endif /* XXH_NO_LONG_LONG */ + + +#ifdef XXH_STATIC_LINKING_ONLY + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + +#ifndef XXH_NO_LONG_LONG + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ +#endif + +# ifdef XXH_PRIVATE_API +# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ +# endif + +#endif /* XXH_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* XXHASH_H_5627135585666179 */ diff --git a/src/borg/archive.py b/src/borg/archive.py index c3dde440..1b943494 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1,44 +1,50 @@ import errno +import json import os import socket import stat import sys import time +from collections import OrderedDict from contextlib import contextmanager -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta +from functools import partial from getpass import getuser from io import BytesIO -from itertools import groupby +from itertools import groupby, zip_longest from shutil import get_terminal_size import msgpack from .logger import create_logger + logger = create_logger() from . import xattr -from .cache import ChunkListEntry from .chunker import Chunker +from .cache import ChunkListEntry +from .crypto.key import key_factory +from .compress import Compressor, CompressionSpec from .constants import * # NOQA -from .hashindex import ChunkIndex, ChunkIndexEntry +from .crypto.low_level import IntegrityError as IntegrityErrorBase +from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer from .helpers import Manifest -from .helpers import Chunk, ChunkIteratorFileWrapper, open_item -from .helpers import Error, IntegrityError +from .helpers import hardlinkable +from .helpers import ChunkIteratorFileWrapper, open_item +from .helpers import Error, IntegrityError, set_ec from .helpers import uid2user, user2uid, gid2group, group2gid from .helpers import parse_timestamp, to_localtime -from .helpers import format_time, format_timedelta, format_file_size, file_status +from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates -from .helpers import decode_dict, StableDict -from .helpers import int_to_bigint, bigint_to_int, bin_to_hex -from .helpers import ProgressIndicatorPercent, log_multi -from .helpers import PathPrefixPattern, FnmatchPattern -from .helpers import consume -from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec -from .item import Item -from .key import key_factory +from .helpers import StableDict +from .helpers import bin_to_hex +from .helpers import safe_ns +from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi +from .patterns import PathPrefixPattern, FnmatchPattern, IECommand +from .item import Item, ArchiveItem, ItemDiff from .platform import acl_get, acl_set, set_flags, get_flags, swidth from .remote import cache_if_remote -from .repository import Repository +from .repository import Repository, LIST_SCAN_LIMIT if sys.platform == 'win32': from .platform import get_owner, set_owner @@ -51,7 +57,8 @@ flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0) class Statistics: - def __init__(self): + def __init__(self, output_json=False): + self.output_json = output_json self.osize = self.csize = self.usize = self.nfiles = 0 self.last_progress = 0 # timestamp when last progress was shown @@ -61,9 +68,7 @@ class Statistics: if unique: self.usize += csize - summary = """\ - Original size Compressed size Deduplicated size -{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}""" + summary = "{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}" def __str__(self): return self.summary.format(stats=self, label='This archive:') @@ -72,6 +77,14 @@ class Statistics: return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format( cls=type(self).__name__, hash=id(self), self=self) + def as_dict(self): + return { + 'original_size': FileSize(self.osize), + 'compressed_size': FileSize(self.csize), + 'deduplicated_size': FileSize(self.usize), + 'nfiles': self.nfiles, + } + @property def osize_fmt(self): return format_file_size(self.osize) @@ -85,20 +98,33 @@ class Statistics: return format_file_size(self.csize) def show_progress(self, item=None, final=False, stream=None, dt=None): - now = time.time() + now = time.monotonic() if dt is None or now - self.last_progress > dt: self.last_progress = now - columns, lines = get_terminal_size() - if not final: - msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) - path = remove_surrogates(item.path) if item else '' - space = columns - swidth(msg) - if space < swidth('...') + swidth(path): - path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:]) - msg += "{0:<{space}}".format(path, space=space) + if self.output_json: + data = self.as_dict() + data.update({ + 'time': time.time(), + 'type': 'archive_progress', + 'path': remove_surrogates(item.path if item else ''), + }) + msg = json.dumps(data) + end = '\n' else: - msg = ' ' * columns - print(msg, file=stream or sys.stderr, end="\r", flush=True) + columns, lines = get_terminal_size() + if not final: + msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) + path = remove_surrogates(item.path) if item else '' + space = columns - swidth(msg) + if space < 12: + msg = '' + space = columns - swidth(msg) + if space >= 8: + msg += ellipsis_truncate(path, space) + else: + msg = ' ' * columns + end = '\r' + print(msg, end=end, file=stream or sys.stderr, flush=True) def is_special(mode): @@ -116,32 +142,46 @@ class BackupOSError(Exception): Any unwrapped IO error is critical and aborts execution (for example repository IO failure). """ - def __init__(self, os_error): + def __init__(self, op, os_error): + self.op = op self.os_error = os_error self.errno = os_error.errno self.strerror = os_error.strerror self.filename = os_error.filename def __str__(self): - return str(self.os_error) + if self.op: + return '%s: %s' % (self.op, self.os_error) + else: + return str(self.os_error) -@contextmanager -def backup_io(): - """Context manager changing OSError to BackupOSError.""" - try: - yield - except OSError as os_error: - raise BackupOSError(os_error) from os_error +class BackupIO: + op = '' + + def __call__(self, op=''): + self.op = op + return self + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type and issubclass(exc_type, OSError): + raise BackupOSError(self.op, exc_val) from exc_val + + +backup_io = BackupIO() def backup_io_iter(iterator): + backup_io.op = 'read' while True: - try: - with backup_io(): + with backup_io: + try: item = next(iterator) - except StopIteration: - return + except StopIteration: + return yield item @@ -152,15 +192,24 @@ class DownloadPipeline: self.key = key def unpack_many(self, ids, filter=None, preload=False): + """ + Return iterator of items. + + *ids* is a chunk ID list of an item stream. *filter* is a callable + to decide whether an item will be yielded. *preload* preloads the data chunks of every yielded item. + + Warning: if *preload* is True then all data chunks of every yielded item have to be retrieved, + otherwise preloaded chunks will accumulate in RemoteRepository and create a memory leak. + """ unpacker = msgpack.Unpacker(use_list=False) - for _, data in self.fetch_many(ids): + for data in self.fetch_many(ids): unpacker.feed(data) items = [Item(internal_dict=item) for item in unpacker] - if filter: - items = [item for item in items if filter(item)] for item in items: if 'chunks' in item: item.chunks = [ChunkListEntry(*e) for e in item.chunks] + if filter: + items = [item for item in items if filter(item)] if preload: for item in items: if 'chunks' in item: @@ -174,7 +223,7 @@ class DownloadPipeline: class ChunkBuffer: - BUFFER_SIZE = 1 * 1024 * 1024 + BUFFER_SIZE = 8 * 1024 * 1024 def __init__(self, key, chunker_params=ITEMS_CHUNKER_PARAMS): self.buffer = BytesIO() @@ -195,7 +244,9 @@ class ChunkBuffer: if self.buffer.tell() == 0: return self.buffer.seek(0) - chunks = list(Chunk(bytes(s)) for s in self.chunker.chunkify(self.buffer)) + # The chunker returns a memoryview to its internal buffer, + # thus a copy is needed before resuming the chunker iterator. + chunks = list(bytes(s) for s in self.chunker.chunkify(self.buffer)) self.buffer.seek(0) self.buffer.truncate(0) # Leave the last partial chunk in the buffer unless flush is True @@ -203,7 +254,7 @@ class ChunkBuffer: for chunk in chunks[:end]: self.chunks.append(self.write_chunk(chunk)) if end == -1: - self.buffer.write(chunks[-1].data) + self.buffer.write(chunks[-1]) def is_full(self): return self.buffer.tell() > self.BUFFER_SIZE @@ -217,7 +268,8 @@ class CacheChunkBuffer(ChunkBuffer): self.stats = stats def write_chunk(self, chunk): - id_, _, _ = self.cache.add_chunk(self.key.id_hash(chunk.data), chunk, self.stats) + id_, _, _ = self.cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats, wait=False) + self.cache.repository.async_response(wait=False) return id_ @@ -233,36 +285,42 @@ class Archive: """Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable.""" def __init__(self, repository, key, manifest, name, cache=None, create=False, - checkpoint_interval=300, numeric_owner=False, progress=False, - chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None): + checkpoint_interval=300, numeric_owner=False, noatime=False, noctime=False, nobsdflags=False, + progress=False, chunker_params=CHUNKER_PARAMS, start=None, start_monotonic=None, end=None, + consider_part_files=False, log_json=False): self.cwd = os.getcwd() self.key = key self.repository = repository self.cache = cache self.manifest = manifest self.hard_links = {} - self.stats = Statistics() + self.stats = Statistics(output_json=log_json) self.show_progress = progress - self.name = name + self.name = name # overwritten later with name from archive metadata + self.name_in_manifest = name # can differ from .name later (if borg check fixed duplicate archive names) + self.comment = None self.checkpoint_interval = checkpoint_interval self.numeric_owner = numeric_owner + self.noatime = noatime + self.noctime = noctime + self.nobsdflags = nobsdflags + assert (start is None) == (start_monotonic is None), 'Logic error: if start is given, start_monotonic must be given as well and vice versa.' if start is None: start = datetime.utcnow() + start_monotonic = time.monotonic() self.chunker_params = chunker_params self.start = start + self.start_monotonic = start_monotonic if end is None: end = datetime.utcnow() self.end = end + self.consider_part_files = consider_part_files self.pipeline = DownloadPipeline(self.repository, self.key) - if create: + self.create = create + if self.create: self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats) - self.chunker = Chunker(self.key.chunk_seed, *chunker_params) - self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'), - compression_files or []) - key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none')) if name in manifest.archives: raise self.AlreadyExists(name) - self.last_checkpoint = time.time() i = 0 while True: self.checkpoint_name = '%s.checkpoint%s' % (name, i and ('.%d' % i) or '') @@ -270,37 +328,37 @@ class Archive: break i += 1 else: - if name not in self.manifest.archives: + info = self.manifest.archives.get(name) + if info is None: raise self.DoesNotExist(name) - info = self.manifest.archives[name] - self.load(info[b'id']) - self.zeros = b'\0' * (1 << chunker_params[1]) + self.load(info.id) + self.zeros = None def _load_meta(self, id): - _, data = self.key.decrypt(id, self.repository.get(id)) - metadata = msgpack.unpackb(data) - if metadata[b'version'] != 1: + data = self.key.decrypt(id, self.repository.get(id)) + metadata = ArchiveItem(internal_dict=msgpack.unpackb(data, unicode_errors='surrogateescape')) + if metadata.version != 1: raise Exception('Unknown archive metadata version') return metadata def load(self, id): self.id = id self.metadata = self._load_meta(self.id) - decode_dict(self.metadata, ARCHIVE_TEXT_KEYS) - self.metadata[b'cmdline'] = [safe_decode(arg) for arg in self.metadata[b'cmdline']] - self.name = self.metadata[b'name'] + self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline] + self.name = self.metadata.name + self.comment = self.metadata.get('comment', '') @property def ts(self): """Timestamp of archive creation (start) in UTC""" - ts = self.metadata[b'time'] + ts = self.metadata.time return parse_timestamp(ts) @property def ts_end(self): """Timestamp of archive creation (end) in UTC""" # fall back to time if there is no time_end present in metadata - ts = self.metadata.get(b'time_end') or self.metadata[b'time'] + ts = self.metadata.get('time_end') or self.metadata.time return parse_timestamp(ts) @property @@ -315,6 +373,37 @@ class Archive: def duration_from_meta(self): return format_timedelta(self.ts_end - self.ts) + def info(self): + if self.create: + stats = self.stats + start = self.start.replace(tzinfo=timezone.utc) + end = self.end.replace(tzinfo=timezone.utc) + else: + stats = self.calc_stats(self.cache) + start = self.ts + end = self.ts_end + info = { + 'name': self.name, + 'id': self.fpr, + 'start': OutputTimestamp(start), + 'end': OutputTimestamp(end), + 'duration': (end - start).total_seconds(), + 'stats': stats.as_dict(), + 'limits': { + 'max_archive_size': self.cache.chunks[self.id].csize / MAX_DATA_SIZE, + }, + } + if self.create: + info['command_line'] = sys.argv + else: + info.update({ + 'command_line': self.metadata.cmdline, + 'hostname': self.metadata.hostname, + 'username': self.metadata.username, + 'comment': self.metadata.get('comment', ''), + }) + return info + def __str__(self): return '''\ Archive name: {0.name} @@ -322,25 +411,32 @@ Archive fingerprint: {0.fpr} Time (start): {start} Time (end): {end} Duration: {0.duration} -Number of files: {0.stats.nfiles}'''.format( +Number of files: {0.stats.nfiles} +Utilization of max. archive size: {csize_max:.0%} +'''.format( self, - start=format_time(to_localtime(self.start.replace(tzinfo=timezone.utc))), - end=format_time(to_localtime(self.end.replace(tzinfo=timezone.utc)))) + start=OutputTimestamp(self.start.replace(tzinfo=timezone.utc)), + end=OutputTimestamp(self.end.replace(tzinfo=timezone.utc)), + csize_max=self.cache.chunks[self.id].csize / MAX_DATA_SIZE) def __repr__(self): return 'Archive(%r)' % self.name + def item_filter(self, item, filter=None): + if not self.consider_part_files and 'part' in item: + # this is a part(ial) file, we usually don't want to consider it. + return False + return filter(item) if filter else True + def iter_items(self, filter=None, preload=False): - for item in self.pipeline.unpack_many(self.metadata[b'items'], filter=filter, preload=preload): + for item in self.pipeline.unpack_many(self.metadata.items, preload=preload, + filter=lambda item: self.item_filter(item, filter)): yield item - def add_item(self, item): - if self.show_progress: + def add_item(self, item, show_progress=True): + if show_progress and self.show_progress: self.stats.show_progress(item=item, dt=0.2) self.items_buffer.add(item) - if self.checkpoint_interval and time.time() - self.last_checkpoint > self.checkpoint_interval: - self.write_checkpoint() - self.last_checkpoint = time.time() def write_checkpoint(self): self.save(self.checkpoint_name) @@ -352,65 +448,81 @@ Number of files: {0.stats.nfiles}'''.format( if name in self.manifest.archives: raise self.AlreadyExists(name) self.items_buffer.flush(flush=True) + duration = timedelta(seconds=time.monotonic() - self.start_monotonic) if timestamp is None: - self.end = datetime.utcnow() - start = self.start - end = self.end + end = datetime.utcnow() + start = end - duration else: - self.end = timestamp + end = timestamp + duration start = timestamp - end = timestamp # we only have 1 value + self.start = start + self.end = end metadata = { 'version': 1, 'name': name, - 'comment': comment, + 'comment': comment or '', 'items': self.items_buffer.chunks, 'cmdline': sys.argv, 'hostname': socket.gethostname(), 'username': getuser(), - 'time': start.isoformat(), - 'time_end': end.isoformat(), + 'time': start.strftime(ISO_FORMAT), + 'time_end': end.strftime(ISO_FORMAT), 'chunker_params': self.chunker_params, } metadata.update(additional_metadata or {}) - data = msgpack.packb(StableDict(metadata), unicode_errors='surrogateescape') + metadata = ArchiveItem(metadata) + data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b'archive') self.id = self.key.id_hash(data) - self.cache.add_chunk(self.id, Chunk(data), self.stats) - self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']} + self.cache.add_chunk(self.id, data, self.stats) + while self.repository.async_response(wait=True) is not None: + pass + self.manifest.archives[name] = (self.id, metadata.time) self.manifest.write() self.repository.commit() self.cache.commit() def calc_stats(self, cache): def add(id): - count, size, csize = cache.chunks[id] - stats.update(size, csize, count == 1) - cache.chunks[id] = count - 1, size, csize + entry = cache.chunks[id] + archive_index.add(id, 1, entry.size, entry.csize) - def add_file_chunks(chunks): - for id, _, _ in chunks: - add(id) - - # This function is a bit evil since it abuses the cache to calculate - # the stats. The cache transaction must be rolled back afterwards - unpacker = msgpack.Unpacker(use_list=False) - cache.begin_txn() - stats = Statistics() + archive_index = ChunkIndex() + sync = CacheSynchronizer(archive_index) add(self.id) - for id, chunk in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])): + pi = ProgressIndicatorPercent(total=len(self.metadata.items), msg='Calculating statistics... %3d%%') + for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)): + pi.show(increase=1) add(id) - _, data = self.key.decrypt(id, chunk) - unpacker.feed(data) - for item in unpacker: - item = Item(internal_dict=item) - if 'chunks' in item: - stats.nfiles += 1 - add_file_chunks(item.chunks) - cache.rollback() + data = self.key.decrypt(id, chunk) + sync.feed(data) + stats = Statistics() + stats.osize, stats.csize, unique_size, stats.usize, unique_chunks, chunks = archive_index.stats_against(cache.chunks) + stats.nfiles = sync.num_files + pi.finish() return stats + @contextmanager + def extract_helper(self, dest, item, path, stripped_components, original_path, hardlink_masters): + hardlink_set = False + # Hard link? + if 'source' in item: + source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:]) + chunks, link_target = hardlink_masters.get(item.source, (None, source)) + if link_target: + # Hard link was extracted previously, just link + with backup_io('link'): + os.link(link_target, path) + hardlink_set = True + elif chunks is not None: + # assign chunks to this item, since the item which had the chunks was not extracted + item.chunks = chunks + yield hardlink_set + if not hardlink_set and hardlink_masters: + # Update master entry with extracted item path, so that following hardlinks don't extract twice. + hardlink_masters[item.get('source') or original_path] = (None, path) + def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False, - hardlink_masters=None, original_path=None): + hardlink_masters=None, stripped_components=0, original_path=None, pi=None): """ Extract archive item. @@ -420,16 +532,28 @@ Number of files: {0.stats.nfiles}'''.format( :param stdout: write extracted data to stdout :param sparse: write sparse files (chunk-granularity, independent of the original being sparse) :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly + :param stripped_components: stripped leading path components to correct hard link extraction :param original_path: 'path' key as stored in archive + :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes) """ + hardlink_masters = hardlink_masters or {} has_damaged_chunks = 'chunks_healthy' in item if dry_run or stdout: if 'chunks' in item: - for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True): + item_chunks_size = 0 + for data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True): + if pi: + pi.show(increase=len(data), info=[remove_surrogates(item.path)]) if stdout: sys.stdout.buffer.write(data) + item_chunks_size += len(data) if stdout: sys.stdout.buffer.flush() + if 'size' in item: + item_size = item.size + if item_size != item_chunks_size: + logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format( + item.path, item_size, item_chunks_size)) if has_damaged_chunks: logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' % remove_surrogates(item.path)) @@ -437,12 +561,12 @@ Number of files: {0.stats.nfiles}'''.format( original_path = original_path or item.path dest = self.cwd - if item.path.startswith('/') or item.path.startswith('..') or (sys.platform == 'win32' and len(item.path) > 1 and item.path[1] == ':'): + if item.path.startswith(('/', '../')) or (sys.platform == 'win32' and len(item.path) > 1 and item.path[1] == ':'): raise Exception('Path should be relative and local') path = os.path.join(dest, item.path) # Attempt to remove existing files, ignore errors on failure try: - st = os.lstat(path) + st = os.stat(path, follow_symlinks=False) if stat.S_ISDIR(st.st_mode): os.rmdir(path) else: @@ -451,81 +575,86 @@ Number of files: {0.stats.nfiles}'''.format( raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None except OSError: pass + + def make_parent(path): + parent_dir = os.path.dirname(path) + if not os.path.exists(parent_dir): + os.makedirs(parent_dir) + mode = item.mode if stat.S_ISREG(mode): - with backup_io(): - if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) - # Hard link? - if 'source' in item: - source = os.path.join(dest, item.source) - with backup_io(): - if os.path.exists(path): - os.unlink(path) - if not hardlink_masters: - os.link(source, path) - return - item.chunks, link_target = hardlink_masters[item.source] - if link_target: - # Hard link was extracted previously, just link - with backup_io(): - os.link(link_target, path) + with backup_io('makedirs'): + make_parent(path) + with self.extract_helper(dest, item, path, stripped_components, original_path, + hardlink_masters) as hardlink_set: + if hardlink_set: return - # Extract chunks, since the item which had the chunks was not extracted - with backup_io(): - fd = open(path, 'wb') - with fd: - ids = [c.id for c in item.chunks] - for _, data in self.pipeline.fetch_many(ids, is_preloaded=True): - with backup_io(): - if sparse and self.zeros.startswith(data): - # all-zero chunk: create a hole in a sparse file - fd.seek(len(data), 1) + if sparse and self.zeros is None: + self.zeros = b'\0' * (1 << self.chunker_params[1]) + with backup_io('open'): + fd = open(path, 'wb') + with fd: + ids = [c.id for c in item.chunks] + for data in self.pipeline.fetch_many(ids, is_preloaded=True): + if pi: + pi.show(increase=len(data), info=[remove_surrogates(item.path)]) + with backup_io('write'): + if sparse and self.zeros.startswith(data): + # all-zero chunk: create a hole in a sparse file + fd.seek(len(data), 1) + else: + fd.write(data) + with backup_io('truncate_and_attrs'): + pos = item_chunks_size = fd.tell() + fd.truncate(pos) + fd.flush() + if sys.platform != 'win32': + self.restore_attrs(path, item, fd=fd.fileno()) else: - fd.write(data) - with backup_io(): - pos = fd.tell() - fd.truncate(pos) - fd.flush() - if sys.platform != 'win32': - self.restore_attrs(path, item, fd=fd.fileno()) - else: - # File needs to be closed or timestamps are rewritten at close - fd.close() - self.restore_attrs(path, item) - if has_damaged_chunks: - logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' % - remove_surrogates(item.path)) - if hardlink_masters: - # Update master entry with extracted file path, so that following hardlinks don't extract twice. - hardlink_masters[item.get('source') or original_path] = (None, path) + # File needs to be closed or timestamps are rewritten at close + fd.close() + self.restore_attrs(path, item) + if 'size' in item: + item_size = item.size + if item_size != item_chunks_size: + logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format( + item.path, item_size, item_chunks_size)) + if has_damaged_chunks: + logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' % + remove_surrogates(item.path)) return - with backup_io(): + with backup_io: # No repository access beyond this point. if stat.S_ISDIR(mode): + make_parent(path) if not os.path.exists(path): - os.makedirs(path) + os.mkdir(path) if restore_attrs: self.restore_attrs(path, item) elif stat.S_ISLNK(mode): - if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) + make_parent(path) source = item.source - if os.path.exists(path): - os.unlink(path) try: os.symlink(source, path) except UnicodeEncodeError: raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None self.restore_attrs(path, item, symlink=True) elif stat.S_ISFIFO(mode): - if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) - os.mkfifo(path) - self.restore_attrs(path, item) + make_parent(path) + with self.extract_helper(dest, item, path, stripped_components, original_path, + hardlink_masters) as hardlink_set: + if hardlink_set: + return + os.mkfifo(path) + self.restore_attrs(path, item) elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): - os.mknod(path, item.mode, item.rdev) - self.restore_attrs(path, item) + make_parent(path) + with self.extract_helper(dest, item, path, stripped_components, original_path, + hardlink_masters) as hardlink_set: + if hardlink_set: + return + os.mknod(path, item.mode, item.rdev) + self.restore_attrs(path, item) else: raise Exception('Unknown archive item type %r' % item.mode) @@ -535,10 +664,12 @@ Number of files: {0.stats.nfiles}'''.format( Does not access the repository. """ + backup_io.op = 'attrs' uid = gid = None if not self.numeric_owner: - uid = user2uid(item.user) - gid = group2gid(item.group) + if sys.platform != 'win32': + uid = user2uid(item.user) + gid = group2gid(item.group) uid = item.uid if uid is None else uid gid = item.gid if gid is None else gid # This code is a bit of a mess due to os specific differences @@ -546,15 +677,16 @@ Number of files: {0.stats.nfiles}'''.format( try: if fd: os.fchown(fd, uid, gid) - os.fchmod(fd, item.mode) else: - os.lchown(path, uid, gid) - if not symlink: - os.chmod(path, item.mode) - elif has_lchmod: # Not available on Linux - os.lchmod(path, item.mode) + os.chown(path, uid, gid, follow_symlinks=False) except OSError: pass + if fd: + os.fchmod(fd, item.mode) + elif not symlink: + os.chmod(path, item.mode) + elif has_lchmod: # Not available on Linux + os.lchmod(path, item.mode) else: try: set_owner(path, item.user, item.user_sid) @@ -566,14 +698,19 @@ Number of files: {0.stats.nfiles}'''.format( else: # old archives only had mtime in item metadata atime = mtime - if sys.platform == 'win32': - os.utime(path, ns=(atime, mtime)) - elif fd: - os.utime(fd, None, ns=(atime, mtime)) - else: - os.utime(path, None, ns=(atime, mtime), follow_symlinks=False) + try: + if sys.platform == 'win32': + os.utime(path, ns=(atime, mtime)) + elif fd: + os.utime(fd, None, ns=(atime, mtime)) + else: + os.utime(path, None, ns=(atime, mtime), follow_symlinks=False) + except OSError: + # some systems don't support calling utime on a symlink + pass acl_set(path, item, self.numeric_owner) - if 'bsdflags' in item: + + if not self.nobsdflags and 'bsdflags' in item: try: set_flags(path, item.bsdflags, fd=fd) except OSError: @@ -585,20 +722,29 @@ Number of files: {0.stats.nfiles}'''.format( try: xattr.setxattr(fd or path, k, v, follow_symlinks=False) except OSError as e: - if e.errno not in (errno.ENOTSUP, errno.EACCES): - # only raise if the errno is not on our ignore list: - # ENOTSUP == xattrs not supported here - # EACCES == permission denied to set this specific xattr - # (this may happen related to security.* keys) + if e.errno == errno.E2BIG: + # xattr is too big + logger.warning('%s: Value or key of extended attribute %s is too big for this filesystem' % + (path, k.decode())) + set_ec(EXIT_WARNING) + elif e.errno == errno.ENOTSUP: + # xattrs not supported here + logger.warning('%s: Extended attributes are not supported on this filesystem' % path) + set_ec(EXIT_WARNING) + elif e.errno == errno.EACCES: + # permission denied to set this specific xattr (this may happen related to security.* keys) + logger.warning('%s: Permission denied when setting extended attribute %s' % (path, k.decode())) + set_ec(EXIT_WARNING) + else: raise def set_meta(self, key, value): - metadata = StableDict(self._load_meta(self.id)) - metadata[key] = value - data = msgpack.packb(metadata, unicode_errors='surrogateescape') + metadata = self._load_meta(self.id) + setattr(metadata, key, value) + data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape') new_id = self.key.id_hash(data) - self.cache.add_chunk(new_id, Chunk(data), self.stats) - self.manifest.archives[self.name] = {'id': new_id, 'time': metadata[b'time']} + self.cache.add_chunk(new_id, data, self.stats) + self.manifest.archives[self.name] = (new_id, metadata.time) self.cache.chunk_decref(self.id, self.stats) self.id = new_id @@ -607,35 +753,44 @@ Number of files: {0.stats.nfiles}'''.format( raise self.AlreadyExists(name) oldname = self.name self.name = name - self.set_meta(b'name', name) + self.set_meta('name', name) del self.manifest.archives[oldname] def delete(self, stats, progress=False, forced=False): class ChunksIndexError(Error): """Chunk ID {} missing from chunks index, corrupted chunks index - aborting transaction.""" - def chunk_decref(id, stats): - nonlocal error + exception_ignored = object() + + def fetch_async_response(wait=True): try: - self.cache.chunk_decref(id, stats) + return self.repository.async_response(wait=wait) + except Repository.ObjectNotFound as e: + nonlocal error + # object not in repo - strange, but we wanted to delete it anyway. + if forced == 0: + raise + error = True + return exception_ignored # must not return None here + + def chunk_decref(id, stats): + try: + self.cache.chunk_decref(id, stats, wait=False) except KeyError: cid = bin_to_hex(id) raise ChunksIndexError(cid) - except Repository.ObjectNotFound as e: - # object not in repo - strange, but we wanted to delete it anyway. - if not forced: - raise - error = True + else: + fetch_async_response(wait=False) error = False try: unpacker = msgpack.Unpacker(use_list=False) - items_ids = self.metadata[b'items'] - pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True) + items_ids = self.metadata.items + pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", msgid='archive.delete') for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))): if progress: pi.show(i) - _, data = self.key.decrypt(items_id, data) + data = self.key.decrypt(items_id, data) unpacker.feed(data) chunk_decref(items_id, stats) try: @@ -647,14 +802,14 @@ Number of files: {0.stats.nfiles}'''.format( except (TypeError, ValueError): # if items metadata spans multiple chunks and one chunk got dropped somehow, # it could be that unpacker yields bad types - if not forced: + if forced == 0: raise error = True if progress: pi.finish() except (msgpack.UnpackException, Repository.ObjectNotFound): # items metadata corrupted - if not forced: + if forced == 0: raise error = True # in forced delete mode, we try hard to delete at least the manifest entry, @@ -662,164 +817,14 @@ Number of files: {0.stats.nfiles}'''.format( # some harmless exception. chunk_decref(self.id, stats) del self.manifest.archives[self.name] + while fetch_async_response(wait=True) is not None: + # we did async deletes, process outstanding results (== exceptions), + # so there is nothing pending when we return and our caller wants to commit. + pass if error: logger.warning('forced deletion succeeded, but the deleted archive was corrupted.') logger.warning('borg check --repair is required to free all space.') - def stat_attrs(self, st, path): - attrs = dict( - mode=st.st_mode, - - atime=st.st_atime_ns, - ctime=st.st_ctime_ns, - mtime=st.st_mtime_ns, - ) - if sys.platform == 'win32': - user_name, user_sid = get_owner(path) - attrs.update({ - 'uid': 0, 'user_sid': user_sid, 'user': user_name, - 'gid': st.st_gid, 'group': gid2group(st.st_gid), - }) - else: - attrs.update({ - 'uid': st.st_uid, 'user': uid2user(st.st_uid), - 'gid': st.st_gid, 'group': gid2group(st.st_gid), - }) - if self.numeric_owner: - attrs['user'] = attrs['group'] = None - with backup_io(): - xattrs = xattr.get_all(path, follow_symlinks=False) - bsdflags = get_flags(path, st) - acl_get(path, attrs, st, self.numeric_owner) - if xattrs: - attrs['xattrs'] = StableDict(xattrs) - if bsdflags: - attrs['bsdflags'] = bsdflags - return attrs - - def process_dir(self, path, st): - item = Item(path=make_path_safe(path)) - item.update(self.stat_attrs(st, path)) - self.add_item(item) - return 'd' # directory - - def process_fifo(self, path, st): - item = Item(path=make_path_safe(path)) - item.update(self.stat_attrs(st, path)) - self.add_item(item) - return 'f' # fifo - - def process_dev(self, path, st): - item = Item(path=make_path_safe(path), rdev=st.st_rdev) - item.update(self.stat_attrs(st, path)) - self.add_item(item) - if stat.S_ISCHR(st.st_mode): - return 'c' # char device - elif stat.S_ISBLK(st.st_mode): - return 'b' # block device - - def process_symlink(self, path, st): - source = os.readlink(path) - item = Item(path=make_path_safe(path), source=source) - item.update(self.stat_attrs(st, path)) - self.add_item(item) - return 's' # symlink - - def process_stdin(self, path, cache): - uid, gid = 0, 0 - fd = sys.stdin.buffer # binary - chunks = [] - for data in backup_io_iter(self.chunker.chunkify(fd)): - chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats)) - self.stats.nfiles += 1 - t = int(time.time()) * 1000000000 - item = Item( - path=path, - chunks=chunks, - mode=0o100660, # regular file, ug=rw - uid=uid, user=uid2user(uid), - gid=gid, group=gid2group(gid), - mtime=t, atime=t, ctime=t, - ) - self.add_item(item) - return 'i' # stdin - - def process_file(self, path, st, cache, ignore_inode=False): - status = None - safe_path = make_path_safe(path) - # Is it a hard link? - if st.st_nlink > 1: - source = self.hard_links.get((st.st_ino, st.st_dev)) - if (st.st_ino, st.st_dev) in self.hard_links: - item = Item(path=safe_path, source=source) - item.update(self.stat_attrs(st, path)) - self.add_item(item) - status = 'h' # regular file, hardlink (to already seen inodes) - return status - else: - self.hard_links[st.st_ino, st.st_dev] = safe_path - is_special_file = is_special(st.st_mode) - if not is_special_file: - path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path))) - ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode) - else: - # in --read-special mode, we may be called for special files. - # there should be no information in the cache about special files processed in - # read-special mode, but we better play safe as this was wrong in the past: - path_hash = ids = None - first_run = not cache.files - if first_run: - logger.debug('Processing files ...') - chunks = None - if ids is not None: - # Make sure all ids are available - for id_ in ids: - if not cache.seen_chunk(id_): - break - else: - chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] - status = 'U' # regular file, unchanged - else: - status = 'A' # regular file, added - item = Item( - path=safe_path, - hardlink_master=st.st_nlink > 1, # item is a hard link and has the chunks - ) - # Only chunkify the file if needed - if chunks is None: - compress = self.compression_decider1.decide(path) - logger.debug('%s -> compression %s', path, compress['name']) - with backup_io(): - fh = Archive._open_rb(path) - with os.fdopen(fh, 'rb') as fd: - chunks = [] - for data in backup_io_iter(self.chunker.chunkify(fd, fh)): - chunks.append(cache.add_chunk(self.key.id_hash(data), - Chunk(data, compress=compress), - self.stats)) - if self.show_progress: - self.stats.show_progress(item=item, dt=0.2) - if not is_special_file: - # we must not memorize special files, because the contents of e.g. a - # block or char device will change without its mtime/size/inode changing. - cache.memorize_file(path_hash, st, [c.id for c in chunks]) - status = status or 'M' # regular file, modified (if not 'A' already) - item.chunks = chunks - item.update(self.stat_attrs(st, path)) - if is_special_file: - # we processed a special file like a regular file. reflect that in mode, - # so it can be extracted / accessed in FUSE mount like a regular file: - item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) - self.stats.nfiles += 1 - self.add_item(item) - return status - - @staticmethod - def list_archives(repository, key, manifest, cache=None): - # expensive! see also Manifest.list_archive_infos. - for name, info in manifest.archives.items(): - yield Archive(repository, key, manifest, name, cache=cache) - @staticmethod def _open_rb(path): try: @@ -832,6 +837,336 @@ Number of files: {0.stats.nfiles}'''.format( # Was this EPERM due to the O_NOATIME flag? Try again without it: return os.open(path, flags_normal) + @staticmethod + def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False): + """ + Yields tuples with a path and an ItemDiff instance describing changes/indicating equality. + + :param matcher: PatternMatcher class to restrict results to only matching paths. + :param can_compare_chunk_ids: Whether --chunker-params are the same for both archives. + """ + + def hardlink_master_seen(item): + return 'source' not in item or not hardlinkable(item.mode) or item.source in hardlink_masters + + def is_hardlink_master(item): + return item.get('hardlink_master', True) and 'source' not in item + + def update_hardlink_masters(item1, item2): + if is_hardlink_master(item1) or is_hardlink_master(item2): + hardlink_masters[item1.path] = (item1, item2) + + def has_hardlink_master(item, hardlink_masters): + return hardlinkable(item.mode) and item.get('source') in hardlink_masters + + def compare_items(item1, item2): + if has_hardlink_master(item1, hardlink_masters): + item1 = hardlink_masters[item1.source][0] + if has_hardlink_master(item2, hardlink_masters): + item2 = hardlink_masters[item2.source][1] + return ItemDiff(item1, item2, + archive1.pipeline.fetch_many([c.id for c in item1.get('chunks', [])]), + archive2.pipeline.fetch_many([c.id for c in item2.get('chunks', [])]), + can_compare_chunk_ids=can_compare_chunk_ids) + + def defer_if_necessary(item1, item2): + """Adds item tuple to deferred if necessary and returns True, if items were deferred""" + update_hardlink_masters(item1, item2) + defer = not hardlink_master_seen(item1) or not hardlink_master_seen(item2) + if defer: + deferred.append((item1, item2)) + return defer + + orphans_archive1 = OrderedDict() + orphans_archive2 = OrderedDict() + deferred = [] + hardlink_masters = {} + + for item1, item2 in zip_longest( + archive1.iter_items(lambda item: matcher.match(item.path)), + archive2.iter_items(lambda item: matcher.match(item.path)), + ): + if item1 and item2 and item1.path == item2.path: + if not defer_if_necessary(item1, item2): + yield (item1.path, compare_items(item1, item2)) + continue + if item1: + matching_orphan = orphans_archive2.pop(item1.path, None) + if matching_orphan: + if not defer_if_necessary(item1, matching_orphan): + yield (item1.path, compare_items(item1, matching_orphan)) + else: + orphans_archive1[item1.path] = item1 + if item2: + matching_orphan = orphans_archive1.pop(item2.path, None) + if matching_orphan: + if not defer_if_necessary(matching_orphan, item2): + yield (matching_orphan.path, compare_items(matching_orphan, item2)) + else: + orphans_archive2[item2.path] = item2 + # At this point orphans_* contain items that had no matching partner in the other archive + for added in orphans_archive2.values(): + path = added.path + deleted_item = Item.create_deleted(path) + update_hardlink_masters(deleted_item, added) + yield (path, compare_items(deleted_item, added)) + for deleted in orphans_archive1.values(): + path = deleted.path + deleted_item = Item.create_deleted(path) + update_hardlink_masters(deleted, deleted_item) + yield (path, compare_items(deleted, deleted_item)) + for item1, item2 in deferred: + assert hardlink_master_seen(item1) + assert hardlink_master_seen(item2) + yield (path, compare_items(item1, item2)) + + +class MetadataCollector: + def __init__(self, *, noatime, noctime, numeric_owner, nobsdflags): + self.noatime = noatime + self.noctime = noctime + self.numeric_owner = numeric_owner + self.nobsdflags = nobsdflags + + def stat_simple_attrs(self, st): + attrs = dict( + mode=st.st_mode, + uid=st.st_uid, + gid=st.st_gid, + mtime=safe_ns(st.st_mtime_ns), + ) + # borg can work with archives only having mtime (older attic archives do not have + # atime/ctime). it can be useful to omit atime/ctime, if they change without the + # file content changing - e.g. to get better metadata deduplication. + if not self.noatime: + attrs['atime'] = safe_ns(st.st_atime_ns) + if not self.noctime: + attrs['ctime'] = safe_ns(st.st_ctime_ns) + if self.numeric_owner: + attrs['user'] = attrs['group'] = None + else: + if sys.platform != 'win32': + attrs['user'] = uid2user(st.st_uid) + attrs['group'] = gid2group(st.st_gid) + return attrs + + def stat_ext_attrs(self, st, path): + attrs = {} + bsdflags = 0 + with backup_io('extended stat'): + xattrs = xattr.get_all(path, follow_symlinks=False) + if not self.nobsdflags: + bsdflags = get_flags(path, st) + acl_get(path, attrs, st, self.numeric_owner) + if xattrs: + attrs['xattrs'] = StableDict(xattrs) + if bsdflags: + attrs['bsdflags'] = bsdflags + return attrs + + def stat_attrs(self, st, path): + attrs = self.stat_simple_attrs(st) + if sys.platform == 'win32': + user_name, user_sid = get_owner(path) + attrs.update({ + 'uid': 0, 'user_sid': user_sid, 'user': user_name, + 'gid': st.st_gid, 'group': '', + }) + attrs.update(self.stat_ext_attrs(st, path)) + return attrs + + +class ChunksProcessor: + # Processes an iterator of chunks for an Item + + def __init__(self, *, key, cache, + add_item, write_checkpoint, + checkpoint_interval): + self.key = key + self.cache = cache + self.add_item = add_item + self.write_checkpoint = write_checkpoint + self.checkpoint_interval = checkpoint_interval + self.last_checkpoint = time.monotonic() + + def write_part_file(self, item, from_chunk, number): + item = Item(internal_dict=item.as_dict()) + length = len(item.chunks) + # the item should only have the *additional* chunks we processed after the last partial item: + item.chunks = item.chunks[from_chunk:] + # for borg recreate, we already have a size member in the source item (giving the total file size), + # but we consider only a part of the file here, thus we must recompute the size from the chunks: + item.get_size(memorize=True, from_chunks=True) + item.path += '.borg_part_%d' % number + item.part = number + number += 1 + self.add_item(item, show_progress=False) + self.write_checkpoint() + return length, number + + def process_file_chunks(self, item, cache, stats, chunk_iter, chunk_processor=None): + if not chunk_processor: + def chunk_processor(data): + chunk_entry = cache.add_chunk(self.key.id_hash(data), data, stats, wait=False) + self.cache.repository.async_response(wait=False) + return chunk_entry + + item.chunks = [] + from_chunk = 0 + part_number = 1 + for data in chunk_iter: + item.chunks.append(chunk_processor(data)) + if self.checkpoint_interval and time.monotonic() - self.last_checkpoint > self.checkpoint_interval: + from_chunk, part_number = self.write_part_file(item, from_chunk, part_number) + self.last_checkpoint = time.monotonic() + else: + if part_number > 1: + if item.chunks[from_chunk:]: + # if we already have created a part item inside this file, we want to put the final + # chunks (if any) into a part item also (so all parts can be concatenated to get + # the complete file): + from_chunk, part_number = self.write_part_file(item, from_chunk, part_number) + self.last_checkpoint = time.monotonic() + + # if we created part files, we have referenced all chunks from the part files, + # but we also will reference the same chunks also from the final, complete file: + for chunk in item.chunks: + cache.chunk_incref(chunk.id, stats, size=chunk.size) + + +class FilesystemObjectProcessors: + # When ported to threading, then this doesn't need chunker, cache, key any more. + # write_checkpoint should then be in the item buffer, + # and process_file becomes a callback passed to __init__. + + def __init__(self, *, metadata_collector, cache, key, + add_item, process_file_chunks, + chunker_params): + self.metadata_collector = metadata_collector + self.cache = cache + self.key = key + self.add_item = add_item + self.process_file_chunks = process_file_chunks + + self.hard_links = {} + self.stats = Statistics() # threading: done by cache (including progress) + self.cwd = os.getcwd() + self.chunker = Chunker(key.chunk_seed, *chunker_params) + + @contextmanager + def create_helper(self, path, st, status=None, hardlinkable=True): + safe_path = make_path_safe(path) + item = Item(path=safe_path) + hardlink_master = False + hardlinked = hardlinkable and st.st_nlink > 1 + if hardlinked: + source = self.hard_links.get((st.st_ino, st.st_dev)) + if source is not None: + item.source = source + status = 'h' # hardlink (to already seen inodes) + else: + hardlink_master = True + yield item, status, hardlinked, hardlink_master + # if we get here, "with"-block worked ok without error/exception, the item was processed ok... + self.add_item(item) + # ... and added to the archive, so we can remember it to refer to it later in the archive: + if hardlink_master: + self.hard_links[(st.st_ino, st.st_dev)] = safe_path + + def process_dir(self, path, st): + with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): + item.update(self.metadata_collector.stat_attrs(st, path)) + return status + + def process_fifo(self, path, st): + with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo + item.update(self.metadata_collector.stat_attrs(st, path)) + return status + + def process_dev(self, path, st, dev_type): + with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device + item.rdev = st.st_rdev + item.update(self.metadata_collector.stat_attrs(st, path)) + return status + + def process_symlink(self, path, st): + # note: using hardlinkable=False because we can not support hardlinked symlinks, + # due to the dual-use of item.source, see issue #2343: + # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks. + with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master): + with backup_io('readlink'): + source = os.readlink(path) + item.source = source + item.update(self.metadata_collector.stat_attrs(st, path)) + return status + + def process_stdin(self, path, cache): + uid, gid = 0, 0 + t = int(time.time()) * 1000000000 + item = Item( + path=path, + mode=0o100660, # regular file, ug=rw + uid=uid, user=uid2user(uid), + gid=gid, group=gid2group(gid), + mtime=t, atime=t, ctime=t, + ) + fd = sys.stdin.buffer # binary + self.process_file_chunks(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd))) + item.get_size(memorize=True) + self.stats.nfiles += 1 + self.add_item(item) + return 'i' # stdin + + def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEFAULT_FILES_CACHE_MODE): + with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet + is_special_file = is_special(st.st_mode) + if not hardlinked or hardlink_master: + if not is_special_file: + path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path))) + ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode) + else: + # in --read-special mode, we may be called for special files. + # there should be no information in the cache about special files processed in + # read-special mode, but we better play safe as this was wrong in the past: + path_hash = ids = None + first_run = not cache.files and cache.do_files + if first_run: + logger.debug('Processing files ...') + chunks = None + if ids is not None: + # Make sure all ids are available + for id_ in ids: + if not cache.seen_chunk(id_): + break + else: + chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] + status = 'U' # regular file, unchanged + else: + status = 'A' # regular file, added + item.hardlink_master = hardlinked + item.update(self.metadata_collector.stat_simple_attrs(st)) + # Only chunkify the file if needed + if chunks is not None: + item.chunks = chunks + else: + with backup_io('open'): + fh = Archive._open_rb(path) + with os.fdopen(fh, 'rb') as fd: + self.process_file_chunks(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh))) + if not is_special_file: + # we must not memorize special files, because the contents of e.g. a + # block or char device will change without its mtime/size/inode changing. + cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode) + status = status or 'M' # regular file, modified (if not 'A' already) + self.stats.nfiles += 1 + item.update(self.metadata_collector.stat_attrs(st, path)) + item.get_size(memorize=True) + if is_special_file: + # we processed a special file like a regular file. reflect that in mode, + # so it can be extracted / accessed in FUSE mount like a regular file: + item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) + return status + def valid_msgpacked_dict(d, keys_serialized): """check if the data looks like a msgpacked dict""" @@ -864,6 +1199,9 @@ def valid_msgpacked_dict(d, keys_serialized): class RobustUnpacker: """A restartable/robust version of the streaming msgpack unpacker """ + class UnpackerCrashed(Exception): + """raise if unpacker crashed""" + def __init__(self, validator, item_keys): super().__init__() self.item_keys = [msgpack.packb(name.encode()) for name in item_keys] @@ -886,6 +1224,14 @@ class RobustUnpacker: return self def __next__(self): + def unpack_next(): + try: + return next(self._unpacker) + except (TypeError, ValueError) as err: + # transform exceptions that might be raised when feeding + # msgpack with invalid data to a more specific exception + raise self.UnpackerCrashed(str(err)) + if self._resync: data = b''.join(self._buffered_data) while self._resync: @@ -898,17 +1244,17 @@ class RobustUnpacker: self._unpacker = msgpack.Unpacker(object_hook=StableDict) self._unpacker.feed(data) try: - item = next(self._unpacker) + item = unpack_next() + except (self.UnpackerCrashed, StopIteration): + # as long as we are resyncing, we also ignore StopIteration + pass + else: if self.validator(item): self._resync = False return item - # Ignore exceptions that might be raised when feeding - # msgpack with invalid data - except (TypeError, ValueError, StopIteration): - pass data = data[1:] else: - return next(self._unpacker) + return unpack_next() class ArchiveChecker: @@ -917,32 +1263,41 @@ class ArchiveChecker: self.error_found = False self.possibly_superseded = set() - def check(self, repository, repair=False, archive=None, last=None, prefix=None, verify_data=False, - save_space=False): + def check(self, repository, repair=False, archive=None, first=0, last=0, sort_by='', glob=None, + verify_data=False, save_space=False): """Perform a set of checks on 'repository' :param repair: enable repair mode, write updated or corrected data into repository :param archive: only check this archive - :param last: only check this number of recent archives - :param prefix: only check archives with this prefix + :param first/last/sort_by: only check this number of first/last archives ordered by sort_by + :param glob: only check archives matching this glob :param verify_data: integrity verification of data referenced by archives :param save_space: Repository.commit(save_space) """ logger.info('Starting archive consistency check...') - self.check_all = archive is None and last is None and prefix is None + self.check_all = archive is None and not any((first, last, glob)) self.repair = repair self.repository = repository self.init_chunks() + if not self.chunks: + logger.error('Repository contains no apparent data at all, cannot continue check/repair.') + return False self.key = self.identify_key(repository) + if verify_data: + self.verify_data() if Manifest.MANIFEST_ID not in self.chunks: logger.error("Repository manifest not found!") self.error_found = True self.manifest = self.rebuild_manifest() else: - self.manifest, _ = Manifest.load(repository, key=self.key) - self.rebuild_refcounts(archive=archive, last=last, prefix=prefix) - if verify_data: - self.verify_data() + try: + self.manifest, _ = Manifest.load(repository, (Manifest.Operation.CHECK,), key=self.key) + except IntegrityErrorBase as exc: + logger.error('Repository manifest is corrupted: %s', exc) + self.error_found = True + del self.chunks[Manifest.MANIFEST_ID] + self.manifest = self.rebuild_manifest() + self.rebuild_refcounts(archive=archive, first=first, last=last, sort_by=sort_by, glob=glob) self.orphan_chunks_check() self.finish(save_space=save_space) if self.error_found: @@ -955,12 +1310,13 @@ class ArchiveChecker: """Fetch a list of all object keys from repository """ # Explicitly set the initial hash table capacity to avoid performance issues - # due to hash table "resonance" - capacity = int(len(self.repository) * 1.35 + 1) # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c) + # due to hash table "resonance". + # Since reconstruction of archive items can add some new chunks, add 10 % headroom + capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1) self.chunks = ChunkIndex(capacity) marker = None while True: - result = self.repository.list(limit=10000, marker=marker) + result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker) if not result: break marker = result[-1] @@ -979,23 +1335,84 @@ class ArchiveChecker: def verify_data(self): logger.info('Starting cryptographic data integrity verification...') - pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Verifying data %6.2f%%", step=0.01, same_line=True) - count = errors = 0 - for chunk_id, (refcount, *_) in self.chunks.iteritems(): - pi.show() - if not refcount: - continue - encrypted_data = self.repository.get(chunk_id) - try: - _, data = self.key.decrypt(chunk_id, encrypted_data) - except IntegrityError as integrity_error: - self.error_found = True - errors += 1 - logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error) - count += 1 + chunks_count_index = len(self.chunks) + chunks_count_segments = 0 + errors = 0 + defect_chunks = [] + pi = ProgressIndicatorPercent(total=chunks_count_index, msg="Verifying data %6.2f%%", step=0.01, + msgid='check.verify_data') + marker = None + while True: + chunk_ids = self.repository.scan(limit=100, marker=marker) + if not chunk_ids: + break + chunks_count_segments += len(chunk_ids) + marker = chunk_ids[-1] + chunk_data_iter = self.repository.get_many(chunk_ids) + chunk_ids_revd = list(reversed(chunk_ids)) + while chunk_ids_revd: + pi.show() + chunk_id = chunk_ids_revd.pop(-1) # better efficiency + try: + encrypted_data = next(chunk_data_iter) + except (Repository.ObjectNotFound, IntegrityErrorBase) as err: + self.error_found = True + errors += 1 + logger.error('chunk %s: %s', bin_to_hex(chunk_id), err) + if isinstance(err, IntegrityErrorBase): + defect_chunks.append(chunk_id) + # as the exception killed our generator, make a new one for remaining chunks: + if chunk_ids_revd: + chunk_ids = list(reversed(chunk_ids_revd)) + chunk_data_iter = self.repository.get_many(chunk_ids) + else: + _chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id + try: + self.key.decrypt(_chunk_id, encrypted_data) + except IntegrityErrorBase as integrity_error: + self.error_found = True + errors += 1 + logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error) + defect_chunks.append(chunk_id) pi.finish() + if chunks_count_index != chunks_count_segments: + logger.error('Repo/Chunks index object count vs. segment files object count mismatch.') + logger.error('Repo/Chunks index: %d objects != segment files: %d objects', + chunks_count_index, chunks_count_segments) + if defect_chunks: + if self.repair: + # if we kill the defect chunk here, subsequent actions within this "borg check" + # run will find missing chunks and replace them with all-zero replacement + # chunks and flag the files as "repaired". + # if another backup is done later and the missing chunks get backupped again, + # a "borg check" afterwards can heal all files where this chunk was missing. + logger.warning('Found defect chunks. They will be deleted now, so affected files can ' + 'get repaired now and maybe healed later.') + for defect_chunk in defect_chunks: + # remote repo (ssh): retry might help for strange network / NIC / RAM errors + # as the chunk will be retransmitted from remote server. + # local repo (fs): as chunks.iteritems loop usually pumps a lot of data through, + # a defect chunk is likely not in the fs cache any more and really gets re-read + # from the underlying media. + try: + encrypted_data = self.repository.get(defect_chunk) + _chunk_id = None if defect_chunk == Manifest.MANIFEST_ID else defect_chunk + self.key.decrypt(_chunk_id, encrypted_data) + except IntegrityErrorBase: + # failed twice -> get rid of this chunk + del self.chunks[defect_chunk] + self.repository.delete(defect_chunk) + logger.debug('chunk %s deleted.', bin_to_hex(defect_chunk)) + else: + logger.warning('chunk %s not deleted, did not consistently fail.') + else: + logger.warning('Found defect chunks. With --repair, they would get deleted, so affected ' + 'files could get repaired then and maybe healed later.') + for defect_chunk in defect_chunks: + logger.debug('chunk %s is defect.', bin_to_hex(defect_chunk)) log = logger.error if errors else logger.info - log('Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.', count, errors) + log('Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.', + chunks_count_segments, errors) def rebuild_manifest(self): """Rebuild the manifest object if it is missing @@ -1020,7 +1437,12 @@ class ArchiveChecker: archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS] for chunk_id, _ in self.chunks.iteritems(): cdata = self.repository.get(chunk_id) - _, data = self.key.decrypt(chunk_id, cdata) + try: + data = self.key.decrypt(chunk_id, cdata) + except IntegrityErrorBase as exc: + logger.error('Skipping corrupted chunk: %s', exc) + self.error_found = True + continue if not valid_msgpacked_dict(data, archive_keys_serialized): continue if b'cmdline' not in data or b'\xa7version\x01' not in data: @@ -1032,27 +1454,38 @@ class ArchiveChecker: except (TypeError, ValueError, StopIteration): continue if valid_archive(archive): - logger.info('Found archive %s', archive[b'name'].decode('utf-8')) - manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']} + archive = ArchiveItem(internal_dict=archive) + name = archive.name + logger.info('Found archive %s', name) + if name in manifest.archives: + i = 1 + while True: + new_name = '%s.%d' % (name, i) + if new_name not in manifest.archives: + break + i += 1 + logger.warning('Duplicate archive name %s, storing as %s', name, new_name) + name = new_name + manifest.archives[name] = (chunk_id, archive.time) logger.info('Manifest rebuild complete.') return manifest - def rebuild_refcounts(self, archive=None, last=None, prefix=None): + def rebuild_refcounts(self, archive=None, first=0, last=0, sort_by='', glob=None): """Rebuild object reference counts by walking the metadata Missing and/or incorrect data is repaired when detected """ - # Exclude the manifest from chunks - del self.chunks[Manifest.MANIFEST_ID] + # Exclude the manifest from chunks (manifest entry might be already deleted from self.chunks) + self.chunks.pop(Manifest.MANIFEST_ID, None) def mark_as_possibly_superseded(id_): if self.chunks.get(id_, ChunkIndexEntry(0, 0, 0)).refcount == 0: self.possibly_superseded.add(id_) def add_callback(chunk): - id_ = self.key.id_hash(chunk.data) + id_ = self.key.id_hash(chunk) cdata = self.key.encrypt(chunk) - add_reference(id_, len(chunk.data), len(cdata), cdata) + add_reference(id_, len(chunk), len(cdata), cdata) return id_ def add_reference(id_, size, csize, cdata=None): @@ -1070,6 +1503,13 @@ class ArchiveChecker: Missing file chunks will be replaced with new chunks of the same length containing all zeros. If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one. """ + def replacement_chunk(size): + data = bytes(size) + chunk_id = self.key.id_hash(data) + cdata = self.key.encrypt(data) + csize = len(cdata) + return chunk_id, size, csize, cdata + offset = 0 chunk_list = [] chunks_replaced = False @@ -1085,16 +1525,20 @@ class ArchiveChecker: logger.error('{}: New missing file chunk detected (Byte {}-{}). ' 'Replacing with all-zero chunk.'.format(item.path, offset, offset + size)) self.error_found = chunks_replaced = True - data = bytes(size) - chunk_id = self.key.id_hash(data) - cdata = self.key.encrypt(Chunk(data)) - csize = len(cdata) + chunk_id, size, csize, cdata = replacement_chunk(size) add_reference(chunk_id, size, csize, cdata) else: logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). It has a ' 'all-zero replacement chunk already.'.format(item.path, offset, offset + size)) chunk_id, size, csize = chunk_current - add_reference(chunk_id, size, csize) + if chunk_id in self.chunks: + add_reference(chunk_id, size, csize) + else: + logger.warning('{}: Missing all-zero replacement chunk detected (Byte {}-{}). ' + 'Generating new replacement chunk.'.format(item.path, offset, offset + size)) + self.error_found = chunks_replaced = True + chunk_id, size, csize, cdata = replacement_chunk(size) + add_reference(chunk_id, size, csize, cdata) else: if chunk_current == chunk_healthy: # normal case, all fine. @@ -1113,6 +1557,13 @@ class ArchiveChecker: logger.info('{}: Completely healed previously damaged file!'.format(item.path)) del item.chunks_healthy item.chunks = chunk_list + if 'size' in item: + item_size = item.size + item_chunks_size = item.get_size(compressed=False, from_chunks=True) + if item_size != item_chunks_size: + # just warn, but keep the inconsistency, so that borg extract can warn about it. + logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format( + item.path, item_size, item_chunks_size)) def robust_iterator(archive): """Iterates through all archive items @@ -1133,18 +1584,28 @@ class ArchiveChecker: def report(msg, chunk_id, chunk_no): cid = bin_to_hex(chunk_id) - msg += ' [chunk: %06d_%s]' % (chunk_no, cid) # see debug-dump-archive-items + msg += ' [chunk: %06d_%s]' % (chunk_no, cid) # see "debug dump-archive-items" self.error_found = True logger.error(msg) + def list_keys_safe(keys): + return ', '.join((k.decode() if isinstance(k, bytes) else str(k) for k in keys)) + def valid_item(obj): if not isinstance(obj, StableDict): - return False + return False, 'not a dictionary' + # A bug in Attic up to and including release 0.13 added a (meaningless) b'acl' key to every item. + # We ignore it here, should it exist. See test_attic013_acl_bug for details. + obj.pop(b'acl', None) keys = set(obj) - return required_item_keys.issubset(keys) and keys.issubset(item_keys) + if not required_item_keys.issubset(keys): + return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys) + if not keys.issubset(item_keys): + return False, 'invalid keys: ' + list_keys_safe(keys - item_keys) + return True, '' i = 0 - for state, items in groupby(archive[b'items'], missing_chunk_detector): + for state, items in groupby(archive.items, missing_chunk_detector): items = list(items) if state % 2: for chunk_id in items: @@ -1154,52 +1615,61 @@ class ArchiveChecker: if state > 0: unpacker.resync() for chunk_id, cdata in zip(items, repository.get_many(items)): - _, data = self.key.decrypt(chunk_id, cdata) + data = self.key.decrypt(chunk_id, cdata) unpacker.feed(data) try: for item in unpacker: - if valid_item(item): + valid, reason = valid_item(item) + if valid: yield Item(internal_dict=item) else: - report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i) + report('Did not get expected metadata dict when unpacking item metadata (%s)' % reason, chunk_id, i) + except RobustUnpacker.UnpackerCrashed as err: + report('Unpacker crashed while unpacking item metadata, trying to resync...', chunk_id, i) + unpacker.resync() except Exception: report('Exception while unpacking item metadata', chunk_id, i) raise i += 1 if archive is None: - # we need last N or all archives - archive_items = sorted(self.manifest.archives.items(), reverse=True, - key=lambda name_info: name_info[1][b'time']) - if prefix is not None: - archive_items = [item for item in archive_items if item[0].startswith(prefix)] - num_archives = len(archive_items) - end = None if last is None else min(num_archives, last) + sort_by = sort_by.split(',') + if any((first, last, glob)): + archive_infos = self.manifest.archives.list(sort_by=sort_by, glob=glob, first=first, last=last) + if glob and not archive_infos: + logger.warning('--glob-archives %s does not match any archives', glob) + if first and len(archive_infos) < first: + logger.warning('--first %d archives: only found %d archives', first, len(archive_infos)) + if last and len(archive_infos) < last: + logger.warning('--last %d archives: only found %d archives', last, len(archive_infos)) + else: + archive_infos = self.manifest.archives.list(sort_by=sort_by) else: # we only want one specific archive - archive_items = [item for item in self.manifest.archives.items() if item[0] == archive] - if not archive_items: + try: + archive_infos = [self.manifest.archives[archive]] + except KeyError: logger.error("Archive '%s' not found.", archive) - num_archives = 1 - end = 1 + self.error_found = True + return + num_archives = len(archive_infos) with cache_if_remote(self.repository) as repository: - for i, (name, info) in enumerate(archive_items[:end]): - logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) - archive_id = info[b'id'] + for i, info in enumerate(archive_infos): + logger.info('Analyzing archive {} ({}/{})'.format(info.name, i + 1, num_archives)) + archive_id = info.id if archive_id not in self.chunks: logger.error('Archive metadata block is missing!') self.error_found = True - del self.manifest.archives[name] + del self.manifest.archives[info.name] continue mark_as_possibly_superseded(archive_id) cdata = self.repository.get(archive_id) - _, data = self.key.decrypt(archive_id, cdata) - archive = StableDict(msgpack.unpackb(data)) - if archive[b'version'] != 1: + data = self.key.decrypt(archive_id, cdata) + archive = ArchiveItem(internal_dict=msgpack.unpackb(data)) + if archive.version != 1: raise Exception('Unknown archive metadata version') - decode_dict(archive, ARCHIVE_TEXT_KEYS) - archive[b'cmdline'] = [safe_decode(arg) for arg in archive[b'cmdline']] + archive.cmdline = [safe_decode(arg) for arg in archive.cmdline] items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): @@ -1207,14 +1677,14 @@ class ArchiveChecker: verify_file_chunks(item) items_buffer.add(item) items_buffer.flush(flush=True) - for previous_item_id in archive[b'items']: + for previous_item_id in archive.items: mark_as_possibly_superseded(previous_item_id) - archive[b'items'] = items_buffer.chunks - data = msgpack.packb(archive, unicode_errors='surrogateescape') + archive.items = items_buffer.chunks + data = msgpack.packb(archive.as_dict(), unicode_errors='surrogateescape') new_archive_id = self.key.id_hash(data) - cdata = self.key.encrypt(Chunk(data)) + cdata = self.key.encrypt(data) add_reference(new_archive_id, len(data), len(cdata), cdata) - info[b'id'] = new_archive_id + self.manifest.archives[info.name] = (new_archive_id, info.ts) def orphan_chunks_check(self): if self.check_all: @@ -1236,13 +1706,6 @@ class ArchiveChecker: class ArchiveRecreater: - AUTOCOMMIT_THRESHOLD = 512 * 1024 * 1024 - """Commit (compact segments) after this many (or 1 % of repository size, whichever is greater) bytes.""" - - class FakeTargetArchive: - def __init__(self): - self.stats = Statistics() - class Interrupted(Exception): def __init__(self, metadata=None): self.metadata = metadata or {} @@ -1252,9 +1715,10 @@ class ArchiveRecreater: return archive_name.endswith('.recreate') def __init__(self, repository, manifest, key, cache, matcher, - exclude_caches=False, exclude_if_present=None, keep_tag_files=False, - chunker_params=None, compression=None, compression_files=None, - dry_run=False, stats=False, progress=False, file_status_printer=None): + exclude_caches=False, exclude_if_present=None, keep_exclude_tags=False, + chunker_params=None, compression=None, recompress=False, always_recompress=False, + dry_run=False, stats=False, progress=False, file_status_printer=None, + checkpoint_interval=1800): self.repository = repository self.key = key self.manifest = manifest @@ -1263,69 +1727,54 @@ class ArchiveRecreater: self.matcher = matcher self.exclude_caches = exclude_caches self.exclude_if_present = exclude_if_present or [] - self.keep_tag_files = keep_tag_files + self.keep_exclude_tags = keep_exclude_tags + self.rechunkify = chunker_params is not None + if self.rechunkify: + logger.debug('Rechunking archives to %s', chunker_params) self.chunker_params = chunker_params or CHUNKER_PARAMS - self.recompress = bool(compression) + self.recompress = recompress + self.always_recompress = always_recompress self.compression = compression or CompressionSpec('none') self.seen_chunks = set() - self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'), - compression_files or []) - key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none')) - - self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100) - logger.debug("Autocommit threshold: %s", format_file_size(self.autocommit_threshold)) self.dry_run = dry_run self.stats = stats self.progress = progress self.print_file_status = file_status_printer or (lambda *args: None) + self.checkpoint_interval = None if dry_run else checkpoint_interval - self.interrupt = False - self.errors = False - - def recreate(self, archive_name, comment=None): + def recreate(self, archive_name, comment=None, target_name=None): assert not self.is_temporary_archive(archive_name) archive = self.open_archive(archive_name) - target, resume_from = self.create_target_or_resume(archive) + target = self.create_target(archive, target_name) if self.exclude_if_present or self.exclude_caches: self.matcher_add_tagged_dirs(archive) if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None: - logger.info("Skipping archive %s, nothing to do", archive_name) - return True - try: - self.process_items(archive, target, resume_from) - except self.Interrupted as e: - return self.save(archive, target, completed=False, metadata=e.metadata) - return self.save(archive, target, comment) + return False + self.process_items(archive, target) + replace_original = target_name is None + self.save(archive, target, comment, replace_original=replace_original) + return True - def process_items(self, archive, target, resume_from=None): + def process_items(self, archive, target): matcher = self.matcher target_is_subset = not matcher.empty() hardlink_masters = {} if target_is_subset else None def item_is_hardlink_master(item): return (target_is_subset and - stat.S_ISREG(item.mode) and + hardlinkable(item.mode) and item.get('hardlink_master', True) and - 'source' not in item and - not matcher.match(item.path)) + 'source' not in item) for item in archive.iter_items(): - if item_is_hardlink_master(item): - # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters - hardlink_masters[item.path] = (item.get('chunks'), None) - continue - if resume_from: - # Fast forward to after the last processed file - if item.path == resume_from: - logger.info('Fast-forwarded to %s', remove_surrogates(item.path)) - resume_from = None - continue if not matcher.match(item.path): self.print_file_status('x', item.path) + if item_is_hardlink_master(item): + hardlink_masters[item.path] = (item.get('chunks'), None) continue - if target_is_subset and stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters: + if target_is_subset and hardlinkable(item.mode) and item.get('source') in hardlink_masters: # master of this hard link is outside the target subset chunks, new_source = hardlink_masters[item.source] if new_source is None: @@ -1339,127 +1788,86 @@ class ArchiveRecreater: if self.dry_run: self.print_file_status('-', item.path) else: - try: - self.process_item(archive, target, item) - except self.Interrupted: - if self.progress: - target.stats.show_progress(final=True) - raise + self.process_item(archive, target, item) if self.progress: target.stats.show_progress(final=True) def process_item(self, archive, target, item): if 'chunks' in item: - item.chunks = self.process_chunks(archive, target, item) + self.process_chunks(archive, target, item) target.stats.nfiles += 1 target.add_item(item) self.print_file_status(file_status(item.mode), item.path) - if self.interrupt: - raise self.Interrupted def process_chunks(self, archive, target, item): - """Return new chunk ID list for 'item'.""" - # TODO: support --compression-from if not self.recompress and not target.recreate_rechunkify: for chunk_id, size, csize in item.chunks: self.cache.chunk_incref(chunk_id, target.stats) return item.chunks - new_chunks = self.process_partial_chunks(target) - chunk_iterator = self.create_chunk_iterator(archive, target, item) - consume(chunk_iterator, len(new_chunks)) - for chunk in chunk_iterator: - chunk_id = self.key.id_hash(chunk.data) - if chunk_id in self.seen_chunks: - new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats)) - else: - # TODO: detect / skip / --always-recompress - chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=self.recompress) - new_chunks.append((chunk_id, size, csize)) - self.seen_chunks.add(chunk_id) - if self.recompress: - # This tracks how many bytes are uncommitted but compactable, since we are recompressing - # existing chunks. - target.recreate_uncomitted_bytes += csize - if target.recreate_uncomitted_bytes >= self.autocommit_threshold: - # Issue commits to limit additional space usage when recompressing chunks - target.recreate_uncomitted_bytes = 0 - self.repository.commit() - if self.progress: - target.stats.show_progress(item=item, dt=0.2) - if self.interrupt: - raise self.Interrupted({ - 'recreate_partial_chunks': new_chunks, - }) - return new_chunks + chunk_iterator = self.iter_chunks(archive, target, list(item.chunks)) + chunk_processor = partial(self.chunk_processor, target) + target.process_file_chunks(item, self.cache, target.stats, chunk_iterator, chunk_processor) - def create_chunk_iterator(self, archive, target, item): - """Return iterator of chunks to store for 'item' from 'archive' in 'target'.""" - chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks]) + def chunk_processor(self, target, data): + chunk_id = self.key.id_hash(data) + if chunk_id in self.seen_chunks: + return self.cache.chunk_incref(chunk_id, target.stats) + overwrite = self.recompress + if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks: + # Check if this chunk is already compressed the way we want it + old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False) + if Compressor.detect(old_chunk).name == self.key.compressor.decide(data).name: + # Stored chunk has the same compression we wanted + overwrite = False + chunk_entry = self.cache.add_chunk(chunk_id, data, target.stats, overwrite=overwrite, wait=False) + self.cache.repository.async_response(wait=False) + self.seen_chunks.add(chunk_entry.id) + return chunk_entry + + def iter_chunks(self, archive, target, chunks): + chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in chunks]) if target.recreate_rechunkify: # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk # (does not load the entire file into memory) file = ChunkIteratorFileWrapper(chunk_iterator) + yield from target.chunker.chunkify(file) + else: + for chunk in chunk_iterator: + yield chunk - def _chunk_iterator(): - for data in target.chunker.chunkify(file): - yield Chunk(data) - - chunk_iterator = _chunk_iterator() - return chunk_iterator - - def process_partial_chunks(self, target): - """Return chunks from a previous run for archive 'target' (if any) or an empty list.""" - if not target.recreate_partial_chunks: - return [] - # No incref, create_target_or_resume already did that before to deleting the old target archive - # So just copy these over - partial_chunks = target.recreate_partial_chunks - target.recreate_partial_chunks = None - for chunk_id, size, csize in partial_chunks: - self.seen_chunks.add(chunk_id) - logger.debug('Copied %d chunks from a partially processed item', len(partial_chunks)) - return partial_chunks - - def save(self, archive, target, comment=None, completed=True, metadata=None): - """Save target archive. If completed, replace source. If not, save temporary with additional 'metadata' dict.""" + def save(self, archive, target, comment=None, replace_original=True): if self.dry_run: - return completed - if completed: - timestamp = archive.ts.replace(tzinfo=None) - if comment is None: - comment = archive.metadata.get(b'comment', '') - target.save(timestamp=timestamp, comment=comment, additional_metadata={ - 'cmdline': archive.metadata[b'cmdline'], - 'recreate_cmdline': sys.argv, - }) + return + if comment is None: + comment = archive.metadata.get('comment', '') + target.save(comment=comment, additional_metadata={ + # keep some metadata as in original archive: + 'time': archive.metadata.time, + 'time_end': archive.metadata.get('time_end') or archive.metadata.time, + 'cmdline': archive.metadata.cmdline, + # but also remember recreate metadata: + 'recreate_cmdline': sys.argv, + }) + if replace_original: archive.delete(Statistics(), progress=self.progress) target.rename(archive.name) - if self.stats: - target.end = datetime.utcnow() - log_multi(DASHES, - str(target), - DASHES, - str(target.stats), - str(self.cache), - DASHES) - else: - additional_metadata = metadata or {} - additional_metadata.update({ - 'recreate_source_id': archive.id, - 'recreate_args': sys.argv[1:], - }) - target.save(name=archive.name + '.recreate', additional_metadata=additional_metadata) - logger.info('Run the same command again to resume.') - return completed + if self.stats: + target.end = datetime.utcnow() + log_multi(DASHES, + str(target), + DASHES, + str(target.stats), + str(self.cache), + DASHES) def matcher_add_tagged_dirs(self, archive): """Add excludes to the matcher created by exclude_cache and exclude_if_present.""" def exclude(dir, tag_item): - if self.keep_tag_files: - tag_files.append(PathPrefixPattern(tag_item.path)) - tagged_dirs.append(FnmatchPattern(dir + '/')) + if self.keep_exclude_tags: + tag_files.append(PathPrefixPattern(tag_item.path, recurse_dir=False)) + tagged_dirs.append(FnmatchPattern(dir + '/', recurse_dir=False)) else: - tagged_dirs.append(PathPrefixPattern(dir)) + tagged_dirs.append(PathPrefixPattern(dir, recurse_dir=False)) matcher = self.matcher tag_files = [] @@ -1471,10 +1879,10 @@ class ArchiveRecreater: filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)): if item.path.endswith(CACHE_TAG_NAME): cachedir_masters[item.path] = item + dir, tag_file = os.path.split(item.path) + if tag_file in self.exclude_if_present: + exclude(dir, item) if stat.S_ISREG(item.mode): - dir, tag_file = os.path.split(item.path) - if tag_file in self.exclude_if_present: - exclude(dir, item) if self.exclude_caches and tag_file == CACHE_TAG_NAME: if 'chunks' in item: file = open_item(archive, item) @@ -1482,81 +1890,29 @@ class ArchiveRecreater: file = open_item(archive, cachedir_masters[item.source]) if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS): exclude(dir, item) - matcher.add(tag_files, True) - matcher.add(tagged_dirs, False) + matcher.add(tag_files, IECommand.Include) + matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse) - def create_target_or_resume(self, archive): - """Create new target archive or resume from temporary archive, if it exists. Return archive, resume from path""" - if self.dry_run: - return self.FakeTargetArchive(), None - target_name = archive.name + '.recreate' - resume = target_name in self.manifest.archives - target, resume_from = None, None - if resume: - target, resume_from = self.try_resume(archive, target_name) - if not target: - target = self.create_target_archive(target_name) + def create_target(self, archive, target_name=None): + """Create target archive.""" + target_name = target_name or archive.name + '.recreate' + target = self.create_target_archive(target_name) # If the archives use the same chunker params, then don't rechunkify - target.recreate_rechunkify = tuple(archive.metadata.get(b'chunker_params')) != self.chunker_params - return target, resume_from - - def try_resume(self, archive, target_name): - """Try to resume from temporary archive. Return (target archive, resume from path) if successful.""" - logger.info('Found %s, will resume interrupted operation', target_name) - old_target = self.open_archive(target_name) - resume_id = old_target.metadata[b'recreate_source_id'] - resume_args = [safe_decode(arg) for arg in old_target.metadata[b'recreate_args']] - if resume_id != archive.id: - logger.warning('Source archive changed, will discard %s and start over', target_name) - logger.warning('Saved fingerprint: %s', bin_to_hex(resume_id)) - logger.warning('Current fingerprint: %s', archive.fpr) - old_target.delete(Statistics(), progress=self.progress) - return None, None # can't resume - if resume_args != sys.argv[1:]: - logger.warning('Command line changed, this might lead to inconsistencies') - logger.warning('Saved: %s', repr(resume_args)) - logger.warning('Current: %s', repr(sys.argv[1:])) - target = self.create_target_archive(target_name + '.temp') - logger.info('Replaying items from interrupted operation...') - item = None - for item in old_target.iter_items(): - if 'chunks' in item: - for chunk in item.chunks: - self.cache.chunk_incref(chunk.id, target.stats) - target.stats.nfiles += 1 - target.add_item(item) - if item: - resume_from = item.path - else: - resume_from = None - if self.progress: - old_target.stats.show_progress(final=True) - target.recreate_partial_chunks = old_target.metadata.get(b'recreate_partial_chunks', []) - for chunk_id, size, csize in target.recreate_partial_chunks: - if not self.cache.seen_chunk(chunk_id): - try: - # Repository has __contains__, RemoteRepository doesn't - self.repository.get(chunk_id) - except Repository.ObjectNotFound: - # delete/prune/check between invocations: these chunks are gone. - target.recreate_partial_chunks = None - break - # fast-lane insert into chunks cache - self.cache.chunks[chunk_id] = (1, size, csize) - target.stats.update(size, csize, True) - continue - # incref now, otherwise old_target.delete() might delete these chunks - self.cache.chunk_incref(chunk_id, target.stats) - old_target.delete(Statistics(), progress=self.progress) - logger.info('Done replaying items') - return target, resume_from + source_chunker_params = tuple(archive.metadata.get('chunker_params', [])) + target.recreate_rechunkify = self.rechunkify and source_chunker_params != target.chunker_params + if target.recreate_rechunkify: + logger.debug('Rechunking archive from %s to %s', source_chunker_params or '(unknown)', target.chunker_params) + target.process_file_chunks = ChunksProcessor( + cache=self.cache, key=self.key, + add_item=target.add_item, write_checkpoint=target.write_checkpoint, + checkpoint_interval=self.checkpoint_interval).process_file_chunks + target.chunker = Chunker(self.key.chunk_seed, *target.chunker_params) + return target def create_target_archive(self, name): target = Archive(self.repository, self.key, self.manifest, name, create=True, progress=self.progress, chunker_params=self.chunker_params, cache=self.cache, - checkpoint_interval=0, compression=self.compression) - target.recreate_partial_chunks = None - target.recreate_uncomitted_bytes = 0 + checkpoint_interval=self.checkpoint_interval) return target def open_archive(self, name, **kwargs): diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 1c007c31..fbcf9dc9 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -1,51 +1,79 @@ import argparse import collections +import faulthandler import functools import hashlib import inspect -import io +import itertools +import json import logging import os import re import shlex +import shutil import signal import stat import subprocess import sys +import tarfile import textwrap +import time import traceback from binascii import unhexlify -from datetime import datetime +from contextlib import contextmanager +from datetime import datetime, timedelta from itertools import zip_longest from .logger import create_logger, setup_logging + logger = create_logger() +import msgpack + +import borg from . import __version__ from . import helpers +from .algorithms.checksums import crc32 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special -from .archive import BackupOSError, CHUNKER_PARAMS -from .cache import Cache +from .archive import BackupOSError, backup_io +from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor +from .cache import Cache, assert_secure from .constants import * # NOQA +from .compress import CompressionSpec +from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required, RepoKey, PassphraseKey +from .crypto.keymanager import KeyManager from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR -from .helpers import Error, NoManifestError -from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec, PrefixSpec -from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive -from .helpers import safe_encode, remove_surrogates, bin_to_hex -from .helpers import prune_within, prune_split -from .helpers import to_localtime, timestamp +from .helpers import Error, NoManifestError, set_ec +from .helpers import positive_int_validator, location_validator, archivename_validator, ChunkerParams +from .helpers import PrefixSpec, SortBySpec, FilesCacheMode +from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter +from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive +from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict +from .helpers import interval, prune_within, prune_split +from .helpers import timestamp from .helpers import get_cache_dir -from .helpers import Manifest -from .helpers import update_excludes, check_extension_modules +from .helpers import Manifest, AI_HUMAN_SORT_KEYS +from .helpers import hardlinkable +from .helpers import StableDict +from .helpers import check_python, check_extension_modules from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo from .helpers import log_multi -from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern -from .helpers import signal_handler +from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm +from .helpers import ErrorIgnoringTextIOWrapper +from .helpers import ProgressIndicatorPercent +from .helpers import basic_json_data, json_print +from .helpers import replace_placeholders +from .helpers import ChunkIteratorFileWrapper +from .helpers import popen_with_error_handling, prepare_subprocess_env +from .helpers import dash_open +from .helpers import umount +from .nanorst import rst_to_terminal +from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern +from .patterns import PatternMatcher from .item import Item -from .key import key_creator, RepoKey, PassphraseKey -from .platform import get_flags +from .platform import get_flags, get_process_id, SyncFile from .remote import RepositoryServer, RemoteRepository, cache_if_remote -from .repository import Repository +from .repository import Repository, LIST_SCAN_LIMIT from .selftest import selftest from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader @@ -54,14 +82,21 @@ if sys.platform == 'win32': from .platform import get_ads +STATS_HEADER = " Original size Compressed size Deduplicated size" + + def argument(args, str_or_bool): """If bool is passed, return it. If str is passed, retrieve named attribute from args.""" if isinstance(str_or_bool, str): return getattr(args, str_or_bool) + if isinstance(str_or_bool, (list, tuple)): + return any(getattr(args, item) for item in str_or_bool) return str_or_bool -def with_repository(fake=False, create=False, lock=True, exclusive=False, manifest=True, cache=False): +def with_repository(fake=False, invert_fake=False, create=False, lock=True, + exclusive=False, manifest=True, cache=False, secure=True, + compatibility=None): """ Method decorator for subcommand-handling methods: do_XYZ(self, args, repository, …) @@ -72,24 +107,46 @@ def with_repository(fake=False, create=False, lock=True, exclusive=False, manife :param exclusive: (str or bool) lock repository exclusively (for writing) :param manifest: load manifest and key, pass them as keyword arguments :param cache: open cache, pass it as keyword argument (implies manifest) + :param secure: do assert_secure after loading manifest + :param compatibility: mandatory if not create and (manifest or cache), specifies mandatory feature categories to check """ + + if not create and (manifest or cache): + if compatibility is None: + raise AssertionError("with_repository decorator used without compatibility argument") + if type(compatibility) is not tuple: + raise AssertionError("with_repository decorator compatibility argument must be of type tuple") + else: + if compatibility is not None: + raise AssertionError("with_repository called with compatibility argument but would not check" + repr(compatibility)) + if create: + compatibility = Manifest.NO_OPERATION_CHECK + def decorator(method): @functools.wraps(method) def wrapper(self, args, **kwargs): location = args.location # note: 'location' must be always present in args - if argument(args, fake): + append_only = getattr(args, 'append_only', False) + if argument(args, fake) ^ invert_fake: return method(self, args, repository=None, **kwargs) elif location.proto == 'ssh': - repository = RemoteRepository(location, create=create, lock_wait=self.lock_wait, lock=lock, args=args) + repository = RemoteRepository(location, create=create, exclusive=argument(args, exclusive), + lock_wait=self.lock_wait, lock=lock, append_only=append_only, args=args) else: repository = Repository(location.path, create=create, exclusive=argument(args, exclusive), - lock_wait=self.lock_wait, lock=lock) + lock_wait=self.lock_wait, lock=lock, + append_only=append_only) with repository: if manifest or cache: - kwargs['manifest'], kwargs['key'] = Manifest.load(repository) + kwargs['manifest'], kwargs['key'] = Manifest.load(repository, compatibility) + if 'compression' in args: + kwargs['key'].compressor = args.compression.compressor + if secure: + assert_secure(repository, kwargs['manifest']) if cache: with Cache(repository, kwargs['key'], kwargs['manifest'], - do_files=getattr(args, 'cache_files', False), lock_wait=self.lock_wait) as cache_: + do_files=getattr(args, 'cache_files', False), + progress=getattr(args, 'progress', False), lock_wait=self.lock_wait) as cache_: return method(self, args, repository=repository, cache=cache_, **kwargs) else: return method(self, args, repository=repository, **kwargs) @@ -101,17 +158,39 @@ def with_archive(method): @functools.wraps(method) def wrapper(self, args, repository, key, manifest, **kwargs): archive = Archive(repository, key, manifest, args.location.archive, - numeric_owner=getattr(args, 'numeric_owner', False), cache=kwargs.get('cache')) + numeric_owner=getattr(args, 'numeric_owner', False), + nobsdflags=getattr(args, 'nobsdflags', False), + cache=kwargs.get('cache'), + consider_part_files=args.consider_part_files, log_json=args.log_json) return method(self, args, repository=repository, manifest=manifest, key=key, archive=archive, **kwargs) return wrapper +def parse_storage_quota(storage_quota): + parsed = parse_file_size(storage_quota) + if parsed < parse_file_size('10M'): + raise argparse.ArgumentTypeError('quota is too small (%s). At least 10M are required.' % storage_quota) + return parsed + + +def get_func(args): + # This works around http://bugs.python.org/issue9351 + # func is used at the leaf parsers of the argparse parser tree, + # fallback_func at next level towards the root, + # fallback2_func at the 2nd next level (which is root in our case). + for name in 'func', 'fallback_func', 'fallback2_func': + func = getattr(args, name, None) + if func is not None: + return func + raise Exception('expected func attributes not found') + + class Archiver: def __init__(self, lock_wait=None, prog=None): self.exit_code = EXIT_SUCCESS self.lock_wait = lock_wait - self.parser = self.build_parser(prog) + self.prog = prog def print_error(self, msg, *args): msg = args and msg % args or msg @@ -125,56 +204,37 @@ class Archiver: def print_file_status(self, status, path): if self.output_list and (self.output_filter is None or status in self.output_filter): - logging.getLogger('borg.output.list').info("%1s %s", status, remove_surrogates(path)) + if self.log_json: + print(json.dumps({ + 'type': 'file_status', + 'status': status, + 'path': remove_surrogates(path), + }), file=sys.stderr) + else: + logging.getLogger('borg.output.list').info("%1s %s", status, remove_surrogates(path)) @staticmethod - def compare_chunk_contents(chunks1, chunks2): - """Compare two chunk iterators (like returned by :meth:`.DownloadPipeline.fetch_many`)""" - end = object() - alen = ai = 0 - blen = bi = 0 - while True: - if not alen - ai: - a = next(chunks1, end) - if a is end: - return not blen - bi and next(chunks2, end) is end - a = memoryview(a.data) - alen = len(a) - ai = 0 - if not blen - bi: - b = next(chunks2, end) - if b is end: - return not alen - ai and next(chunks1, end) is end - b = memoryview(b.data) - blen = len(b) - bi = 0 - slicelen = min(alen - ai, blen - bi) - if a[ai:ai + slicelen] != b[bi:bi + slicelen]: - return False - ai += slicelen - bi += slicelen - - @staticmethod - def build_matcher(excludes, paths): + def build_matcher(inclexcl_patterns, include_paths): matcher = PatternMatcher() - if excludes: - matcher.add(excludes, False) - include_patterns = [] - if paths: - include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths) - matcher.add(include_patterns, True) - matcher.fallback = not include_patterns - return matcher, include_patterns + matcher.add_inclexcl(inclexcl_patterns) + matcher.add_includepaths(include_paths) + return matcher def do_serve(self, args): - """Start in server mode. This command is usually not used manually. - """ - return RepositoryServer(restrict_to_paths=args.restrict_to_paths, append_only=args.append_only).serve() + """Start in server mode. This command is usually not used manually.""" + RepositoryServer( + restrict_to_paths=args.restrict_to_paths, + restrict_to_repositories=args.restrict_to_repositories, + append_only=args.append_only, + storage_quota=args.storage_quota, + ).serve() + return EXIT_SUCCESS @with_repository(create=True, exclusive=True, manifest=False) def do_init(self, args, repository): """Initialize an empty repository""" - logger.info('Initializing repository at "%s"' % args.location.canonical_path()) + path = args.location.canonical_path() + logger.info('Initializing repository at "%s"' % path) try: key = key_creator(repository, args) except (EOFError, KeyboardInterrupt): @@ -186,35 +246,99 @@ class Archiver: repository.commit() with Cache(repository, key, manifest, warn_if_unencrypted=False): pass + if key.tam_required: + tam_file = tam_required_file(repository) + open(tam_file, 'w').close() + logger.warning( + '\n' + 'By default repositories initialized with this version will produce security\n' + 'errors if written to with an older version (up to and including Borg 1.0.8).\n' + '\n' + 'If you want to use these older versions, you can disable the check by running:\n' + 'borg upgrade --disable-tam \'%s\'\n' + '\n' + 'See https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability ' + 'for details about the security implications.', path) return self.exit_code - @with_repository(exclusive='repair', manifest=False) + @with_repository(exclusive=True, manifest=False) def do_check(self, args, repository): """Check repository consistency""" if args.repair: msg = ("'check --repair' is an experimental feature that might result in data loss." + "\n" + "Type 'YES' if you understand this and want to continue: ") - if not yes(msg, false_msg="Aborting.", truish=('YES', ), + if not yes(msg, false_msg="Aborting.", invalid_msg="Invalid answer, aborting.", + truish=('YES', ), retry=False, env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): return EXIT_ERROR - if args.repo_only and args.verify_data: - self.print_error("--repository-only and --verify-data contradict each other. Please select one.") + if args.repo_only and any((args.verify_data, args.first, args.last, args.prefix)): + self.print_error("--repository-only contradicts --first, --last, --prefix and --verify-data arguments.") return EXIT_ERROR if not args.archives_only: if not repository.check(repair=args.repair, save_space=args.save_space): return EXIT_WARNING + if args.prefix: + args.glob_archives = args.prefix + '*' if not args.repo_only and not ArchiveChecker().check( repository, repair=args.repair, archive=args.location.archive, - last=args.last, prefix=args.prefix, verify_data=args.verify_data, - save_space=args.save_space): + first=args.first, last=args.last, sort_by=args.sort_by or 'ts', glob=args.glob_archives, + verify_data=args.verify_data, save_space=args.save_space): return EXIT_WARNING return EXIT_SUCCESS - @with_repository() + @with_repository(compatibility=(Manifest.Operation.CHECK,)) def do_change_passphrase(self, args, repository, manifest, key): """Change repository key file passphrase""" + if not hasattr(key, 'change_passphrase'): + print('This repository is not encrypted, cannot change the passphrase.') + return EXIT_ERROR key.change_passphrase() + logger.info('Key updated') + if hasattr(key, 'find_key'): + # print key location to make backing it up easier + logger.info('Key location: %s', key.find_key()) + return EXIT_SUCCESS + + def do_change_passphrase_deprecated(self, args): + logger.warning('"borg change-passphrase" is deprecated and will be removed in Borg 1.2.\n' + 'Use "borg key change-passphrase" instead.') + return self.do_change_passphrase(args) + + @with_repository(lock=False, exclusive=False, manifest=False, cache=False) + def do_key_export(self, args, repository): + """Export the repository key for backup""" + manager = KeyManager(repository) + manager.load_keyblob() + if args.paper: + manager.export_paperkey(args.path) + else: + if not args.path: + self.print_error("output file to export key to expected") + return EXIT_ERROR + if args.qr: + manager.export_qr(args.path) + else: + manager.export(args.path) + return EXIT_SUCCESS + + @with_repository(lock=False, exclusive=False, manifest=False, cache=False) + def do_key_import(self, args, repository): + """Import the repository key from backup""" + manager = KeyManager(repository) + if args.paper: + if args.path: + self.print_error("with --paper import from file is not supported") + return EXIT_ERROR + manager.import_paperkey(args) + else: + if not args.path: + self.print_error("input file to import key from expected") + return EXIT_ERROR + if args.path != '-' and not os.path.exists(args.path): + self.print_error("input file does not exist: " + args.path) + return EXIT_ERROR + manager.import_keyfile(args) return EXIT_SUCCESS @with_repository(manifest=False) @@ -230,16 +354,93 @@ class Archiver: key_new.id_key = key_old.id_key key_new.chunk_seed = key_old.chunk_seed key_new.change_passphrase() # option to change key protection passphrase, save + logger.info('Key updated') return EXIT_SUCCESS - @with_repository(fake='dry_run') + def do_benchmark_crud(self, args): + """Benchmark Create, Read, Update, Delete for archives.""" + def measurement_run(repo, path): + archive = repo + '::borg-benchmark-crud' + compression = '--compression=none' + # measure create perf (without files cache to always have it chunking) + t_start = time.monotonic() + rc = self.do_create(self.parse_args(['create', compression, '--files-cache=disabled', archive + '1', path])) + t_end = time.monotonic() + dt_create = t_end - t_start + assert rc == 0 + # now build files cache + rc1 = self.do_create(self.parse_args(['create', compression, archive + '2', path])) + rc2 = self.do_delete(self.parse_args(['delete', archive + '2'])) + assert rc1 == rc2 == 0 + # measure a no-change update (archive1 is still present) + t_start = time.monotonic() + rc1 = self.do_create(self.parse_args(['create', compression, archive + '3', path])) + t_end = time.monotonic() + dt_update = t_end - t_start + rc2 = self.do_delete(self.parse_args(['delete', archive + '3'])) + assert rc1 == rc2 == 0 + # measure extraction (dry-run: without writing result to disk) + t_start = time.monotonic() + rc = self.do_extract(self.parse_args(['extract', '--dry-run', archive + '1'])) + t_end = time.monotonic() + dt_extract = t_end - t_start + assert rc == 0 + # measure archive deletion (of LAST present archive with the data) + t_start = time.monotonic() + rc = self.do_delete(self.parse_args(['delete', archive + '1'])) + t_end = time.monotonic() + dt_delete = t_end - t_start + assert rc == 0 + return dt_create, dt_update, dt_extract, dt_delete + + @contextmanager + def test_files(path, count, size, random): + path = os.path.join(path, 'borg-test-data') + os.makedirs(path) + for i in range(count): + fname = os.path.join(path, 'file_%d' % i) + data = b'\0' * size if not random else os.urandom(size) + with SyncFile(fname, binary=True) as fd: # used for posix_fadvise's sake + fd.write(data) + yield path + shutil.rmtree(path) + + if '_BORG_BENCHMARK_CRUD_TEST' in os.environ: + tests = [ + ('Z-TEST', 1, 1, False), + ('R-TEST', 1, 1, True), + ] + else: + tests = [ + ('Z-BIG', 10, 100000000, False), + ('R-BIG', 10, 100000000, True), + ('Z-MEDIUM', 1000, 1000000, False), + ('R-MEDIUM', 1000, 1000000, True), + ('Z-SMALL', 10000, 10000, False), + ('R-SMALL', 10000, 10000, True), + ] + + for msg, count, size, random in tests: + with test_files(args.path, count, size, random) as path: + dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path) + total_size_MB = count * size / 1e06 + file_size_formatted = format_file_size(size) + content = 'random' if random else 'all-zero' + fmt = '%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)' + print(fmt % ('C', msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create)) + print(fmt % ('R', msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract)) + print(fmt % ('U', msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update)) + print(fmt % ('D', msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete)) + + return 0 + + @with_repository(fake='dry_run', exclusive=True, compatibility=(Manifest.Operation.WRITE,)) def do_create(self, args, repository, manifest=None, key=None): """Create new archive""" matcher = PatternMatcher(fallback=True) - if args.excludes: - matcher.add(args.excludes, False) + matcher.add_inclexcl(args.patterns) - def create_inner(archive, cache): + def create_inner(archive, cache, fso): # Add cache dir to inode_skip list skip_inodes = set() try: @@ -259,7 +460,7 @@ class Archiver: path = 'stdin' if not dry_run: try: - status = archive.process_stdin(path, cache) + status = fso.process_stdin(path, cache) except BackupOSError as e: status = 'E' self.print_warning('%s: %s', path, e) @@ -272,7 +473,7 @@ class Archiver: else: path = os.path.normpath(path) try: - st = os.lstat(path) + st = os.stat(path, follow_symlinks=False) except OSError as e: self.print_warning('%s: %s', path, e) continue @@ -280,129 +481,177 @@ class Archiver: restrict_dev = st.st_dev else: restrict_dev = None - self._process(archive, cache, matcher, args.exclude_caches, args.exclude_if_present, - args.keep_tag_files, skip_inodes, path, restrict_dev, + self._process(fso, cache, matcher, args.exclude_caches, args.exclude_if_present, + args.keep_exclude_tags, skip_inodes, path, restrict_dev, read_special=args.read_special, dry_run=dry_run, st=st) if not dry_run: archive.save(comment=args.comment, timestamp=args.timestamp) if args.progress: archive.stats.show_progress(final=True) + args.stats |= args.json if args.stats: - archive.end = datetime.utcnow() - log_multi(DASHES, - str(archive), - DASHES, - str(archive.stats), - str(cache), - DASHES, logger=logging.getLogger('borg.output.stats')) + if args.json: + json_print(basic_json_data(manifest, cache=cache, extra={ + 'archive': archive, + })) + else: + log_multi(DASHES, + str(archive), + DASHES, + STATS_HEADER, + str(archive.stats), + str(cache), + DASHES, logger=logging.getLogger('borg.output.stats')) self.output_filter = args.output_filter self.output_list = args.output_list self.ignore_inode = args.ignore_inode + self.nobsdflags = args.nobsdflags + self.exclude_nodump = args.exclude_nodump + self.files_cache_mode = args.files_cache_mode dry_run = args.dry_run t0 = datetime.utcnow() + t0_monotonic = time.monotonic() if not dry_run: - with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: + with Cache(repository, key, manifest, do_files=args.cache_files, progress=args.progress, + lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync) as cache: archive = Archive(repository, key, manifest, args.location.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, - numeric_owner=args.numeric_owner, progress=args.progress, - chunker_params=args.chunker_params, start=t0, - compression=args.compression, compression_files=args.compression_files) - create_inner(archive, cache) + numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime, + progress=args.progress, + chunker_params=args.chunker_params, start=t0, start_monotonic=t0_monotonic, + log_json=args.log_json) + metadata_collector = MetadataCollector(noatime=args.noatime, noctime=args.noctime, + nobsdflags=args.nobsdflags, numeric_owner=args.numeric_owner) + cp = ChunksProcessor(cache=cache, key=key, + add_item=archive.add_item, write_checkpoint=archive.write_checkpoint, + checkpoint_interval=args.checkpoint_interval) + fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key, + process_file_chunks=cp.process_file_chunks, add_item=archive.add_item, + chunker_params=args.chunker_params) + create_inner(archive, cache, fso) else: - create_inner(None, None) + create_inner(None, None, None) return self.exit_code - def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present, - keep_tag_files, skip_inodes, path, restrict_dev, + def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present, + keep_exclude_tags, skip_inodes, path, restrict_dev, read_special=False, dry_run=False, st=None): - if not matcher.match(path): - self.print_file_status('x', path) - return - if st is None: - try: - st = os.lstat(path) - except OSError as e: - self.print_warning('%s: %s', path, e) - return - if (st.st_ino, st.st_dev) in skip_inodes: - return - # Entering a new filesystem? - if restrict_dev is not None and st.st_dev != restrict_dev: - return - status = None - # Ignore if nodump flag is set + """ + Process *path* recursively according to the various parameters. + + *st* (if given) is a *os.stat_result* object for *path*. + + This should only raise on critical errors. Per-item errors must be handled within this method. + """ try: - if get_flags(path, st) & stat.UF_NODUMP: - self.print_file_status('x', path) - return - except OSError as e: - self.print_warning('%s: %s', path, e) - return - if stat.S_ISREG(st.st_mode): - if not dry_run: - try: - status = archive.process_file(path, st, cache, self.ignore_inode) - except BackupOSError as e: - status = 'E' - self.print_warning('%s: %s', path, e) - elif stat.S_ISDIR(st.st_mode): - tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) - if tag_paths: - if keep_tag_files and not dry_run: - archive.process_dir(path, st) - for tag_path in tag_paths: - self._process(archive, cache, matcher, exclude_caches, exclude_if_present, - keep_tag_files, skip_inodes, tag_path, restrict_dev, - read_special=read_special, dry_run=dry_run) - return - if not dry_run: - status = archive.process_dir(path, st) - try: - entries = helpers.scandir_inorder(path) - except OSError as e: - status = 'E' - self.print_warning('%s: %s', path, e) + recurse_excluded_dir = False + if matcher.match(path): + if st is None: + with backup_io('stat'): + st = os.stat(path, follow_symlinks=False) else: - for dirent in entries: - normpath = os.path.normpath(dirent.path) - self._process(archive, cache, matcher, exclude_caches, exclude_if_present, - keep_tag_files, skip_inodes, normpath, restrict_dev, - read_special=read_special, dry_run=dry_run) - elif stat.S_ISLNK(st.st_mode): - if not dry_run: - if not read_special: - status = archive.process_symlink(path, st) - else: - st_target = os.stat(path) - if is_special(st_target.st_mode): - status = archive.process_file(path, st_target, cache) + self.print_file_status('x', path) + # get out here as quickly as possible: + # we only need to continue if we shall recurse into an excluded directory. + # if we shall not recurse, then do not even touch (stat()) the item, it + # could trigger an error, e.g. if access is forbidden, see #3209. + if not matcher.recurse_dir: + return + if st is None: + with backup_io('stat'): + st = os.stat(path, follow_symlinks=False) + recurse_excluded_dir = stat.S_ISDIR(st.st_mode) + if not recurse_excluded_dir: + return + + if (st.st_ino, st.st_dev) in skip_inodes: + return + # if restrict_dev is given, we do not want to recurse into a new filesystem, + # but we WILL save the mountpoint directory (or more precise: the root + # directory of the mounted filesystem that shadows the mountpoint dir). + recurse = restrict_dev is None or st.st_dev == restrict_dev + status = None + if self.exclude_nodump: + # Ignore if nodump flag is set + with backup_io('flags'): + if get_flags(path, st) & stat.UF_NODUMP: + self.print_file_status('x', path) + return + if stat.S_ISREG(st.st_mode): + if not dry_run: + status = fso.process_file(path, st, cache, self.ignore_inode, self.files_cache_mode) + elif stat.S_ISDIR(st.st_mode): + if recurse: + tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) + if tag_paths: + if keep_exclude_tags and not dry_run: + fso.process_dir(path, st) + for tag_path in tag_paths: + self._process(fso, cache, matcher, exclude_caches, exclude_if_present, + keep_exclude_tags, skip_inodes, tag_path, restrict_dev, + read_special=read_special, dry_run=dry_run) + self.print_file_status('x', path) + return + if not dry_run: + if not recurse_excluded_dir: + status = fso.process_dir(path, st) + if recurse: + with backup_io('scandir'): + entries = helpers.scandir_inorder(path) + for dirent in entries: + normpath = os.path.normpath(dirent.path) + self._process(fso, cache, matcher, exclude_caches, exclude_if_present, + keep_exclude_tags, skip_inodes, normpath, restrict_dev, + read_special=read_special, dry_run=dry_run) + elif stat.S_ISLNK(st.st_mode): + if not dry_run: + if not read_special: + status = fso.process_symlink(path, st) else: - status = archive.process_symlink(path, st) - elif stat.S_ISFIFO(st.st_mode): - if not dry_run: - if not read_special: - status = archive.process_fifo(path, st) - else: - status = archive.process_file(path, st, cache) - elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): - if not dry_run: - if not read_special: - status = archive.process_dev(path, st) - else: - status = archive.process_file(path, st, cache) - elif stat.S_ISSOCK(st.st_mode): - # Ignore unix sockets - return - elif stat.S_ISDOOR(st.st_mode): - # Ignore Solaris doors - return - elif stat.S_ISPORT(st.st_mode): - # Ignore Solaris event ports - return - else: - self.print_warning('Unknown file type: %s', path) - return + try: + st_target = os.stat(path) + except OSError: + special = False + else: + special = is_special(st_target.st_mode) + if special: + status = fso.process_file(path, st_target, cache) + else: + status = fso.process_symlink(path, st) + elif stat.S_ISFIFO(st.st_mode): + if not dry_run: + if not read_special: + status = fso.process_fifo(path, st) + else: + status = fso.process_file(path, st, cache) + elif stat.S_ISCHR(st.st_mode): + if not dry_run: + if not read_special: + status = fso.process_dev(path, st, 'c') + else: + status = fso.process_file(path, st, cache) + elif stat.S_ISBLK(st.st_mode): + if not dry_run: + if not read_special: + status = fso.process_dev(path, st, 'b') + else: + status = fso.process_file(path, st, cache) + elif stat.S_ISSOCK(st.st_mode): + # Ignore unix sockets + return + elif stat.S_ISDOOR(st.st_mode): + # Ignore Solaris doors + return + elif stat.S_ISPORT(st.st_mode): + # Ignore Solaris event ports + return + else: + self.print_warning('Unknown file type: %s', path) + return + except BackupOSError as e: + self.print_warning('%s: %s', path, e) + status = 'E' # Status output if status is None: if not dry_run: @@ -418,7 +667,24 @@ class Archiver: keep_tag_files, skip_inodes, path + stream[:-6], restrict_dev, read_special, dry_run, st) - @with_repository() + if not recurse_excluded_dir: + self.print_file_status(status, path) + + @staticmethod + def build_filter(matcher, peek_and_store_hardlink_masters, strip_components): + if strip_components: + def item_filter(item): + matched = matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]) + peek_and_store_hardlink_masters(item, matched) + return matched + else: + def item_filter(item): + matched = matcher.match(item.path) + peek_and_store_hardlink_masters(item, matched) + return matched + return item_filter + + @with_repository(compatibility=(Manifest.Operation.READ,)) @with_archive def do_extract(self, args, repository, manifest, key, archive): """Extract archive contents""" @@ -428,8 +694,9 @@ class Archiver: if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )): logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8') - matcher, include_patterns = self.build_matcher(args.excludes, args.paths) + matcher = self.build_matcher(args.patterns, args.paths) + progress = args.progress output_list = args.output_list dry_run = args.dry_run stdout = args.stdout @@ -439,21 +706,24 @@ class Archiver: partial_extract = not matcher.empty() or strip_components hardlink_masters = {} if partial_extract else None - def item_is_hardlink_master(item): - return (partial_extract and stat.S_ISREG(item.mode) and - item.get('hardlink_master', True) and 'source' not in item) + def peek_and_store_hardlink_masters(item, matched): + if (partial_extract and not matched and hardlinkable(item.mode) and + item.get('hardlink_master', True) and 'source' not in item): + hardlink_masters[item.get('path')] = (item.get('chunks'), None) - for item in archive.iter_items(preload=True, - filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)): + filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) + if progress: + pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1, msgid='extract') + pi.output('Calculating size') + extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter)) + pi.total = extracted_size + else: + pi = None + + for item in archive.iter_items(filter, preload=True): orig_path = item.path - if item_is_hardlink_master(item): - hardlink_masters[orig_path] = (item.get('chunks'), None) - if not matcher.match(item.path): - continue if strip_components: item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) - if not item.path: - continue if not args.dry_run: while dirs and not item.path.startswith(dirs[-1].path): dir_item = dirs.pop(-1) @@ -465,243 +735,292 @@ class Archiver: logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) try: if dry_run: - archive.extract_item(item, dry_run=True) + archive.extract_item(item, dry_run=True, pi=pi) else: if stat.S_ISDIR(item.mode): dirs.append(item) - archive.extract_item(item, restore_attrs=False) + archive.extract_item(item, stdout=stdout, restore_attrs=False) else: archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters, - original_path=orig_path) + stripped_components=strip_components, original_path=orig_path, pi=pi) except BackupOSError as e: self.print_warning('%s: %s', remove_surrogates(orig_path), e) + if pi: + pi.finish() + if not args.dry_run: + pi = ProgressIndicatorPercent(total=len(dirs), msg='Setting directory permissions %3.0f%%', + msgid='extract.permissions') while dirs: + pi.show() dir_item = dirs.pop(-1) try: - archive.extract_item(dir_item) + archive.extract_item(dir_item, stdout=stdout) except BackupOSError as e: self.print_warning('%s: %s', remove_surrogates(dir_item.path), e) - for pattern in include_patterns: - if pattern.match_count == 0: - self.print_warning("Include pattern '%s' never matched.", pattern) + for pattern in matcher.get_unmatched_include_patterns(): + self.print_warning("Include pattern '%s' never matched.", pattern) + if pi: + # clear progress output + pi.finish() return self.exit_code - @with_repository() + @with_repository(compatibility=(Manifest.Operation.READ,)) + @with_archive + def do_export_tar(self, args, repository, manifest, key, archive): + """Export archive contents as a tarball""" + self.output_list = args.output_list + + # A quick note about the general design of tar_filter and tarfile; + # The tarfile module of Python can provide some compression mechanisms + # by itself, using the builtin gzip, bz2 and lzma modules (and "tarmodes" + # such as "w:xz"). + # + # Doing so would have three major drawbacks: + # For one the compressor runs on the same thread as the program using the + # tarfile, stealing valuable CPU time from Borg and thus reducing throughput. + # Then this limits the available options - what about lz4? Brotli? zstd? + # The third issue is that systems can ship more optimized versions than those + # built into Python, e.g. pigz or pxz, which can use more than one thread for + # compression. + # + # Therefore we externalize compression by using a filter program, which has + # none of these drawbacks. The only issue of using an external filter is + # that it has to be installed -- hardly a problem, considering that + # the decompressor must be installed as well to make use of the exported tarball! + + filter = None + if args.tar_filter == 'auto': + # Note that filter remains None if tarfile is '-'. + if args.tarfile.endswith('.tar.gz'): + filter = 'gzip' + elif args.tarfile.endswith('.tar.bz2'): + filter = 'bzip2' + elif args.tarfile.endswith('.tar.xz'): + filter = 'xz' + logger.debug('Automatically determined tar filter: %s', filter) + else: + filter = args.tar_filter + + tarstream = dash_open(args.tarfile, 'wb') + tarstream_close = args.tarfile != '-' + + if filter: + # When we put a filter between us and the final destination, + # the selected output (tarstream until now) becomes the output of the filter (=filterout). + # The decision whether to close that or not remains the same. + filterout = tarstream + filterout_close = tarstream_close + env = prepare_subprocess_env(system=True) + # There is no deadlock potential here (the subprocess docs warn about this), because + # communication with the process is a one-way road, i.e. the process can never block + # for us to do something while we block on the process for something different. + filterproc = popen_with_error_handling(filter, stdin=subprocess.PIPE, stdout=filterout, + log_prefix='--tar-filter: ', env=env) + if not filterproc: + return EXIT_ERROR + # Always close the pipe, otherwise the filter process would not notice when we are done. + tarstream = filterproc.stdin + tarstream_close = True + + # The | (pipe) symbol instructs tarfile to use a streaming mode of operation + # where it never seeks on the passed fileobj. + tar = tarfile.open(fileobj=tarstream, mode='w|') + + self._export_tar(args, archive, tar) + + # This does not close the fileobj (tarstream) we passed to it -- a side effect of the | mode. + tar.close() + + if tarstream_close: + tarstream.close() + + if filter: + logger.debug('Done creating tar, waiting for filter to die...') + rc = filterproc.wait() + if rc: + logger.error('--tar-filter exited with code %d, output file is likely unusable!', rc) + self.exit_code = EXIT_ERROR + else: + logger.debug('filter exited with code %d', rc) + + if filterout_close: + filterout.close() + + return self.exit_code + + def _export_tar(self, args, archive, tar): + matcher = self.build_matcher(args.patterns, args.paths) + + progress = args.progress + output_list = args.output_list + strip_components = args.strip_components + partial_extract = not matcher.empty() or strip_components + hardlink_masters = {} if partial_extract else None + + def peek_and_store_hardlink_masters(item, matched): + if (partial_extract and not matched and hardlinkable(item.mode) and + item.get('hardlink_master', True) and 'source' not in item): + hardlink_masters[item.get('path')] = (item.get('chunks'), None) + + filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) + + if progress: + pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract') + pi.output('Calculating size') + extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter)) + pi.total = extracted_size + else: + pi = None + + def item_content_stream(item): + """ + Return a file-like object that reads from the chunks of *item*. + """ + chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks]) + if pi: + info = [remove_surrogates(item.path)] + return ChunkIteratorFileWrapper(chunk_iterator, + lambda read_bytes: pi.show(increase=len(read_bytes), info=info)) + else: + return ChunkIteratorFileWrapper(chunk_iterator) + + def item_to_tarinfo(item, original_path): + """ + Transform a Borg *item* into a tarfile.TarInfo object. + + Return a tuple (tarinfo, stream), where stream may be a file-like object that represents + the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item* + cannot be represented as a TarInfo object and should be skipped. + """ + + # If we would use the PAX (POSIX) format (which we currently don't), + # we can support most things that aren't possible with classic tar + # formats, including GNU tar, such as: + # atime, ctime, possibly Linux capabilities (security.* xattrs) + # and various additions supported by GNU tar in POSIX mode. + + stream = None + tarinfo = tarfile.TarInfo() + tarinfo.name = item.path + tarinfo.mtime = item.mtime / 1e9 + tarinfo.mode = stat.S_IMODE(item.mode) + tarinfo.uid = item.uid + tarinfo.gid = item.gid + tarinfo.uname = item.user or '' + tarinfo.gname = item.group or '' + # The linkname in tar has the same dual use the 'source' attribute of Borg items, + # i.e. for symlinks it means the destination, while for hardlinks it refers to the + # file. + # Since hardlinks in tar have a different type code (LNKTYPE) the format might + # support hardlinking arbitrary objects (including symlinks and directories), but + # whether implementations actually support that is a whole different question... + tarinfo.linkname = "" + + modebits = stat.S_IFMT(item.mode) + if modebits == stat.S_IFREG: + tarinfo.type = tarfile.REGTYPE + if 'source' in item: + source = os.sep.join(item.source.split(os.sep)[strip_components:]) + if hardlink_masters is None: + linkname = source + else: + chunks, linkname = hardlink_masters.get(item.source, (None, source)) + if linkname: + # Master was already added to the archive, add a hardlink reference to it. + tarinfo.type = tarfile.LNKTYPE + tarinfo.linkname = linkname + elif chunks is not None: + # The item which has the chunks was not put into the tar, therefore + # we do that now and update hardlink_masters to reflect that. + item.chunks = chunks + tarinfo.size = item.get_size() + stream = item_content_stream(item) + hardlink_masters[item.get('source') or original_path] = (None, item.path) + else: + tarinfo.size = item.get_size() + stream = item_content_stream(item) + elif modebits == stat.S_IFDIR: + tarinfo.type = tarfile.DIRTYPE + elif modebits == stat.S_IFLNK: + tarinfo.type = tarfile.SYMTYPE + tarinfo.linkname = item.source + elif modebits == stat.S_IFBLK: + tarinfo.type = tarfile.BLKTYPE + tarinfo.devmajor = os.major(item.rdev) + tarinfo.devminor = os.minor(item.rdev) + elif modebits == stat.S_IFCHR: + tarinfo.type = tarfile.CHRTYPE + tarinfo.devmajor = os.major(item.rdev) + tarinfo.devminor = os.minor(item.rdev) + elif modebits == stat.S_IFIFO: + tarinfo.type = tarfile.FIFOTYPE + else: + self.print_warning('%s: unsupported file type %o for tar export', remove_surrogates(item.path), modebits) + set_ec(EXIT_WARNING) + return None, stream + return tarinfo, stream + + for item in archive.iter_items(filter, preload=True): + orig_path = item.path + if strip_components: + item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) + tarinfo, stream = item_to_tarinfo(item, orig_path) + if tarinfo: + if output_list: + logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) + tar.addfile(tarinfo, stream) + + if pi: + pi.finish() + + for pattern in matcher.get_unmatched_include_patterns(): + self.print_warning("Include pattern '%s' never matched.", pattern) + return self.exit_code + + @with_repository(compatibility=(Manifest.Operation.READ,)) @with_archive def do_diff(self, args, repository, manifest, key, archive): """Diff contents of two archives""" - def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2): - chunks1 = archive1.pipeline.fetch_many(chunk_ids1) - chunks2 = archive2.pipeline.fetch_many(chunk_ids2) - return self.compare_chunk_contents(chunks1, chunks2) - def sum_chunk_size(item, consider_ids=None): - if item.get('deleted'): - return None - else: - return sum(c.size for c in item.chunks - if consider_ids is None or c.id in consider_ids) - - def get_owner(item): - if args.numeric_owner: - return item.uid, item.gid - else: - return item.user, item.group - - def get_mode(item): - if 'mode' in item: - return stat.filemode(item.mode) - else: - return [None] - - def has_hardlink_master(item, hardlink_masters): - return stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters - - def compare_link(item1, item2): - # These are the simple link cases. For special cases, e.g. if a - # regular file is replaced with a link or vice versa, it is - # indicated in compare_mode instead. - if item1.get('deleted'): - return 'added link' - elif item2.get('deleted'): - return 'removed link' - elif 'source' in item1 and 'source' in item2 and item1.source != item2.source: - return 'changed link' - - def contents_changed(item1, item2): - if can_compare_chunk_ids: - return item1.chunks != item2.chunks - else: - if sum_chunk_size(item1) != sum_chunk_size(item2): - return True - else: - chunk_ids1 = [c.id for c in item1.chunks] - chunk_ids2 = [c.id for c in item2.chunks] - return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2) - - def compare_content(path, item1, item2): - if contents_changed(item1, item2): - if item1.get('deleted'): - return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2)))) - elif item2.get('deleted'): - return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1)))) - else: - chunk_ids1 = {c.id for c in item1.chunks} - chunk_ids2 = {c.id for c in item2.chunks} - added_ids = chunk_ids2 - chunk_ids1 - removed_ids = chunk_ids1 - chunk_ids2 - added = sum_chunk_size(item2, added_ids) - removed = sum_chunk_size(item1, removed_ids) - return ('{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True), - format_file_size(-removed, precision=1, sign=True))) - - def compare_directory(item1, item2): - if item2.get('deleted') and not item1.get('deleted'): - return 'removed directory' - elif item1.get('deleted') and not item2.get('deleted'): - return 'added directory' - - def compare_owner(item1, item2): - user1, group1 = get_owner(item1) - user2, group2 = get_owner(item2) - if user1 != user2 or group1 != group2: - return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2) - - def compare_mode(item1, item2): - if item1.mode != item2.mode: - return '[{} -> {}]'.format(get_mode(item1), get_mode(item2)) - - def compare_items(output, path, item1, item2, hardlink_masters, deleted=False): - """ - Compare two items with identical paths. - :param deleted: Whether one of the items has been deleted - """ - changes = [] - - if has_hardlink_master(item1, hardlink_masters): - item1 = hardlink_masters[item1.source][0] - - if has_hardlink_master(item2, hardlink_masters): - item2 = hardlink_masters[item2.source][1] - - if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l': - changes.append(compare_link(item1, item2)) - - if 'chunks' in item1 and 'chunks' in item2: - changes.append(compare_content(path, item1, item2)) - - if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd': - changes.append(compare_directory(item1, item2)) - - if not deleted: - changes.append(compare_owner(item1, item2)) - changes.append(compare_mode(item1, item2)) - - changes = [x for x in changes if x] - if changes: - output_line = (remove_surrogates(path), ' '.join(changes)) - - if args.sort: - output.append(output_line) - else: - print_output(output_line) - - def print_output(line): - print("{:<19} {}".format(line[1], line[0])) - - def compare_archives(archive1, archive2, matcher): - def hardlink_master_seen(item): - return 'source' not in item or not stat.S_ISREG(item.mode) or item.source in hardlink_masters - - def is_hardlink_master(item): - return item.get('hardlink_master', True) and 'source' not in item - - def update_hardlink_masters(item1, item2): - if is_hardlink_master(item1) or is_hardlink_master(item2): - hardlink_masters[item1.path] = (item1, item2) - - def compare_or_defer(item1, item2): - update_hardlink_masters(item1, item2) - if not hardlink_master_seen(item1) or not hardlink_master_seen(item2): - deferred.append((item1, item2)) - else: - compare_items(output, item1.path, item1, item2, hardlink_masters) - - orphans_archive1 = collections.OrderedDict() - orphans_archive2 = collections.OrderedDict() - deferred = [] - hardlink_masters = {} - output = [] - - for item1, item2 in zip_longest( - archive1.iter_items(lambda item: matcher.match(item.path)), - archive2.iter_items(lambda item: matcher.match(item.path)), - ): - if item1 and item2 and item1.path == item2.path: - compare_or_defer(item1, item2) - continue - if item1: - matching_orphan = orphans_archive2.pop(item1.path, None) - if matching_orphan: - compare_or_defer(item1, matching_orphan) - else: - orphans_archive1[item1.path] = item1 - if item2: - matching_orphan = orphans_archive1.pop(item2.path, None) - if matching_orphan: - compare_or_defer(matching_orphan, item2) - else: - orphans_archive2[item2.path] = item2 - # At this point orphans_* contain items that had no matching partner in the other archive - deleted_item = Item( - deleted=True, - chunks=[], - mode=0, - ) - for added in orphans_archive2.values(): - path = added.path - deleted_item.path = path - update_hardlink_masters(deleted_item, added) - compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True) - for deleted in orphans_archive1.values(): - path = deleted.path - deleted_item.path = path - update_hardlink_masters(deleted, deleted_item) - compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True) - for item1, item2 in deferred: - assert hardlink_master_seen(item1) - assert hardlink_master_seen(item2) - compare_items(output, item1.path, item1, item2, hardlink_masters) - - for line in sorted(output): - print_output(line) + def print_output(diff, path): + print("{:<19} {}".format(diff, path)) archive1 = archive - archive2 = Archive(repository, key, manifest, args.archive2) + archive2 = Archive(repository, key, manifest, args.archive2, + consider_part_files=args.consider_part_files) - can_compare_chunk_ids = archive1.metadata.get(b'chunker_params', False) == archive2.metadata.get( - b'chunker_params', True) or args.same_chunker_params + can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get( + 'chunker_params', True) or args.same_chunker_params if not can_compare_chunk_ids: self.print_warning('--chunker-params might be different between archives, diff will be slow.\n' 'If you know for certain that they are the same, pass --same-chunker-params ' 'to override this check.') - matcher, include_patterns = self.build_matcher(args.excludes, args.paths) + matcher = self.build_matcher(args.patterns, args.paths) - compare_archives(archive1, archive2, matcher) + diffs = Archive.compare_archives_iter(archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids) + # Conversion to string and filtering for diff.equal to save memory if sorting + diffs = ((path, str(diff)) for path, diff in diffs if not diff.equal) + + if args.sort: + diffs = sorted(diffs) + + for path, diff in diffs: + print_output(diff, path) + + for pattern in matcher.get_unmatched_include_patterns(): + self.print_warning("Include pattern '%s' never matched.", pattern) - for pattern in include_patterns: - if pattern.match_count == 0: - self.print_warning("Include pattern '%s' never matched.", pattern) return self.exit_code - @with_repository(exclusive=True, cache=True) + @with_repository(exclusive=True, cache=True, compatibility=(Manifest.Operation.CHECK,)) @with_archive def do_rename(self, args, repository, manifest, key, cache, archive): """Rename an existing archive""" - archive.rename(args.name) + name = replace_placeholders(args.name) + archive.rename(name) manifest.write() repository.commit() cache.commit() @@ -709,11 +1028,59 @@ class Archiver: @with_repository(exclusive=True, manifest=False) def do_delete(self, args, repository): - """Delete an existing repository or archive""" - if args.location.archive: - manifest, key = Manifest.load(repository) - with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: - archive = Archive(repository, key, manifest, args.location.archive, cache=cache) + """Delete an existing repository or archives""" + archive_filter_specified = args.first or args.last or args.prefix or args.glob_archives + explicit_archives_specified = args.location.archive or args.archives + if archive_filter_specified and explicit_archives_specified: + self.print_error('Mixing archive filters and explicitly named archives is not supported.') + return self.exit_code + if archive_filter_specified or explicit_archives_specified: + return self._delete_archives(args, repository) + else: + return self._delete_repository(args, repository) + + def _delete_archives(self, args, repository): + """Delete archives""" + manifest, key = Manifest.load(repository, (Manifest.Operation.DELETE,)) + + if args.location.archive or args.archives: + archives = list(args.archives) + if args.location.archive: + archives.insert(0, args.location.archive) + archive_names = tuple(archives) + else: + archive_names = tuple(x.name for x in manifest.archives.list_considering(args)) + if not archive_names: + return self.exit_code + + if args.forced == 2: + deleted = False + for i, archive_name in enumerate(archive_names, 1): + try: + del manifest.archives[archive_name] + except KeyError: + self.exit_code = EXIT_WARNING + logger.warning('Archive {} not found ({}/{}).'.format(archive_name, i, len(archive_names))) + else: + deleted = True + logger.info('Deleted {} ({}/{}).'.format(archive_name, i, len(archive_names))) + if deleted: + manifest.write() + # note: might crash in compact() after committing the repo + repository.commit() + logger.info('Done. Run "borg check --repair" to clean up the mess.') + else: + logger.warning('Aborted.') + return self.exit_code + + stats_logger = logging.getLogger('borg.output.stats') + if args.stats: + log_multi(DASHES, STATS_HEADER, logger=stats_logger) + + with Cache(repository, key, manifest, progress=args.progress, lock_wait=self.lock_wait) as cache: + for i, archive_name in enumerate(archive_names, 1): + logger.info('Deleting {} ({}/{}):'.format(archive_name, i, len(archive_names))) + archive = Archive(repository, key, manifest, archive_name, cache=cache) stats = Statistics() archive.delete(stats, progress=args.progress, forced=args.forced) manifest.write() @@ -721,62 +1088,78 @@ class Archiver: cache.commit() logger.info("Archive deleted.") if args.stats: - log_multi(DASHES, - stats.summary.format(label='Deleted data:', stats=stats), - str(cache), - DASHES, logger=logging.getLogger('borg.output.stats')) - else: - if not args.cache_only: - msg = [] - try: - manifest, key = Manifest.load(repository) - except NoManifestError: - msg.append("You requested to completely DELETE the repository *including* all archives it may contain.") - msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.") - else: - msg.append("You requested to completely DELETE the repository *including* all archives it contains:") - for archive_info in manifest.list_archive_infos(sort_by='ts'): - msg.append(format_archive(archive_info)) - msg.append("Type 'YES' if you understand this and want to continue: ") - msg = '\n'.join(msg) - if not yes(msg, false_msg="Aborting.", truish=('YES', ), - env_var_override='BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'): - self.exit_code = EXIT_ERROR - return self.exit_code - repository.destroy() - logger.info("Repository deleted.") - Cache.destroy(repository) - logger.info("Cache deleted.") + log_multi(stats.summary.format(label='Deleted data:', stats=stats), + DASHES, logger=stats_logger) + if args.forced == 0 and self.exit_code: + break + if args.stats: + stats_logger.info(str(cache)) + return self.exit_code - @with_repository() - def do_mount(self, args, repository, manifest, key): + def _delete_repository(self, args, repository): + """Delete a repository""" + if not args.cache_only: + msg = [] + try: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + except NoManifestError: + msg.append("You requested to completely DELETE the repository *including* all archives it may " + "contain.") + msg.append("This repository seems to have no manifest, so we can't tell anything about its " + "contents.") + else: + msg.append("You requested to completely DELETE the repository *including* all archives it " + "contains:") + for archive_info in manifest.archives.list(sort_by=['ts']): + msg.append(format_archive(archive_info)) + msg.append("Type 'YES' if you understand this and want to continue: ") + msg = '\n'.join(msg) + if not yes(msg, false_msg="Aborting.", invalid_msg='Invalid answer, aborting.', truish=('YES',), + retry=False, env_var_override='BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'): + self.exit_code = EXIT_ERROR + return self.exit_code + repository.destroy() + logger.info("Repository deleted.") + Cache.destroy(repository) + logger.info("Cache deleted.") + return self.exit_code + + def do_mount(self, args): """Mount archive or an entire repository as a FUSE filesystem""" + # Perform these checks before opening the repository and asking for a passphrase. + try: - from .fuse import FuseOperations + import borg.fuse except ImportError as e: - self.print_error('Loading fuse support failed [ImportError: %s]' % str(e)) + self.print_error('borg mount not available: loading FUSE support failed [ImportError: %s]' % str(e)) return self.exit_code if not os.path.isdir(args.mountpoint) or not os.access(args.mountpoint, os.R_OK | os.W_OK | os.X_OK): self.print_error('%s: Mountpoint must be a writable directory' % args.mountpoint) return self.exit_code - with cache_if_remote(repository) as cached_repo: - if args.location.archive: - archive = Archive(repository, key, manifest, args.location.archive) - else: - archive = None - operations = FuseOperations(key, repository, manifest, archive, cached_repo) + return self._do_mount(args) + + @with_repository(compatibility=(Manifest.Operation.READ,)) + def _do_mount(self, args, repository, manifest, key): + from .fuse import FuseOperations + + with cache_if_remote(repository, decrypted_cache=key) as cached_repo: + operations = FuseOperations(key, repository, manifest, args, cached_repo) logger.info("Mounting filesystem") try: operations.mount(args.mountpoint, args.options, args.foreground) except RuntimeError: - # Relevant error message already printed to stderr by fuse + # Relevant error message already printed to stderr by FUSE self.exit_code = EXIT_ERROR return self.exit_code - @with_repository() + def do_umount(self, args): + """un-mount the FUSE filesystem""" + return umount(args.mountpoint) + + @with_repository(compatibility=(Manifest.Operation.READ,)) def do_list(self, args, repository, manifest, key): """List archive or repository contents""" if not hasattr(sys.stdout, 'buffer'): @@ -788,63 +1171,160 @@ class Archiver: write = sys.stdout.buffer.write if args.location.archive: - matcher, _ = self.build_matcher(args.excludes, args.paths) - with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: - archive = Archive(repository, key, manifest, args.location.archive, cache=cache) - - if args.format: - format = args.format - elif args.short: - format = "{path}{NL}" - else: - if sys.platform == 'win32': - format = "{user:15} {size:8} {isomtime} {path}{extra}{NL}" - else: - format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}" - formatter = ItemFormatter(archive, format) - - for item in archive.iter_items(lambda item: matcher.match(item.path)): - write(safe_encode(formatter.format_item(item))) + if args.json: + self.print_error('The --json option is only valid for listing archives, not archive contents.') + return self.exit_code + return self._list_archive(args, repository, manifest, key, write) else: - if args.format: - format = args.format - elif args.short: - format = "{archive}{NL}" - else: - format = "{archive:<36} {time} [{id}]{NL}" - formatter = ArchiveFormatter(format) + if args.json_lines: + self.print_error('The --json-lines option is only valid for listing archive contents, not archives.') + return self.exit_code + return self._list_repository(args, repository, manifest, key, write) - for archive_info in manifest.list_archive_infos(sort_by='ts'): - if args.prefix and not archive_info.name.startswith(args.prefix): - continue - write(safe_encode(formatter.format_item(archive_info))) + def _list_archive(self, args, repository, manifest, key, write): + matcher = self.build_matcher(args.patterns, args.paths) + if args.format is not None: + format = args.format + elif args.short: + format = "{path}{NL}" + else: + if sys.platform != 'win32': + format = "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}" + else: + format = "{user:8} {size:8} {mtime} {path}{extra}{NL}" + + def _list_inner(cache): + archive = Archive(repository, key, manifest, args.location.archive, cache=cache, + consider_part_files=args.consider_part_files) + + formatter = ItemFormatter(archive, format, json_lines=args.json_lines) + for item in archive.iter_items(lambda item: matcher.match(item.path)): + write(safe_encode(formatter.format_item(item))) + + # Only load the cache if it will be used + if ItemFormatter.format_needs_cache(format): + with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: + _list_inner(cache) + else: + _list_inner(cache=None) return self.exit_code - @with_repository(cache=True) - @with_archive - def do_info(self, args, repository, manifest, key, archive, cache): + def _list_repository(self, args, repository, manifest, key, write): + if args.format is not None: + format = args.format + elif args.short: + format = "{archive}{NL}" + else: + format = "{archive:<36} {time} [{id}]{NL}" + formatter = ArchiveFormatter(format, repository, manifest, key, json=args.json) + + output_data = [] + + for archive_info in manifest.archives.list_considering(args): + if args.json: + output_data.append(formatter.get_item_data(archive_info)) + else: + write(safe_encode(formatter.format_item(archive_info))) + + if args.json: + json_print(basic_json_data(manifest, extra={ + 'archives': output_data + })) + + return self.exit_code + + @with_repository(cache=True, compatibility=(Manifest.Operation.READ,)) + def do_info(self, args, repository, manifest, key, cache): """Show archive details such as disk space used""" + if any((args.location.archive, args.first, args.last, args.prefix, args.glob_archives)): + return self._info_archives(args, repository, manifest, key, cache) + else: + return self._info_repository(args, repository, manifest, key, cache) + + def _info_archives(self, args, repository, manifest, key, cache): def format_cmdline(cmdline): return remove_surrogates(' '.join(shlex.quote(x) for x in cmdline)) - stats = archive.calc_stats(cache) - print('Archive name: %s' % archive.name) - print('Archive fingerprint: %s' % archive.fpr) - print('Comment: %s' % archive.metadata.get(b'comment', '')) - print('Hostname: %s' % archive.metadata[b'hostname']) - print('Username: %s' % archive.metadata[b'username']) - print('Time (start): %s' % format_time(to_localtime(archive.ts))) - print('Time (end): %s' % format_time(to_localtime(archive.ts_end))) - print('Duration: %s' % archive.duration_from_meta) - print('Number of files: %d' % stats.nfiles) - print('Command line: %s' % format_cmdline(archive.metadata[b'cmdline'])) - print(DASHES) - print(str(stats)) - print(str(cache)) + if args.location.archive: + archive_names = (args.location.archive,) + else: + archive_names = tuple(x.name for x in manifest.archives.list_considering(args)) + if not archive_names: + return self.exit_code + + output_data = [] + + for i, archive_name in enumerate(archive_names, 1): + archive = Archive(repository, key, manifest, archive_name, cache=cache, + consider_part_files=args.consider_part_files) + info = archive.info() + if args.json: + output_data.append(info) + else: + info['duration'] = format_timedelta(timedelta(seconds=info['duration'])) + info['command_line'] = format_cmdline(info['command_line']) + print(textwrap.dedent(""" + Archive name: {name} + Archive fingerprint: {id} + Comment: {comment} + Hostname: {hostname} + Username: {username} + Time (start): {start} + Time (end): {end} + Duration: {duration} + Number of files: {stats[nfiles]} + Command line: {command_line} + Utilization of maximum supported archive size: {limits[max_archive_size]:.0%} + ------------------------------------------------------------------------------ + Original size Compressed size Deduplicated size + This archive: {stats[original_size]:>20s} {stats[compressed_size]:>20s} {stats[deduplicated_size]:>20s} + {cache} + """).strip().format(cache=cache, **info)) + if self.exit_code: + break + if not args.json and len(archive_names) - i: + print() + + if args.json: + json_print(basic_json_data(manifest, cache=cache, extra={ + 'archives': output_data, + })) return self.exit_code - @with_repository() + def _info_repository(self, args, repository, manifest, key, cache): + info = basic_json_data(manifest, cache=cache, extra={ + 'security_dir': cache.security_manager.dir, + }) + + if args.json: + json_print(info) + else: + encryption = 'Encrypted: ' + if key.NAME == 'plaintext': + encryption += 'No' + else: + encryption += 'Yes (%s)' % key.NAME + if key.NAME.startswith('key file'): + encryption += '\nKey file: %s' % key.find_key() + info['encryption'] = encryption + + print(textwrap.dedent(""" + Repository ID: {id} + Location: {location} + {encryption} + Cache: {cache.path} + Security dir: {security_dir} + """).strip().format( + id=bin_to_hex(repository.id), + location=repository._location.canonical_path(), + **info)) + print(DASHES) + print(STATS_HEADER) + print(str(cache)) + return self.exit_code + + @with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,)) def do_prune(self, args, repository, manifest, key): """Prune repository archives according to specified rules""" if not any((args.secondly, args.minutely, args.hourly, args.daily, @@ -853,10 +1333,13 @@ class Archiver: '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.') return self.exit_code - archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.prefix: - archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)] - is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search + args.glob_archives = args.prefix + '*' + checkpoint_re = r'\.checkpoint(\.\d+)?' + archives_checkpoints = manifest.archives.list(glob=args.glob_archives, + match_end=r'(%s)?\Z' % checkpoint_re, + sort_by=['ts'], reverse=True) + is_checkpoint = re.compile(r'(%s)\Z' % checkpoint_re).search checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)] # keep the latest checkpoint, if there is no later non-checkpoint archive if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]: @@ -886,8 +1369,12 @@ class Archiver: keep += prune_split(archives, '%Y', args.yearly, keep) to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) stats = Statistics() - with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: + with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache: list_logger = logging.getLogger('borg.output.list') + if args.output_list: + # set up counters for the progress display + to_delete_len = len(to_delete) + archives_deleted = 0 for archive in archives_checkpoints: if archive in to_delete: if args.dry_run: @@ -895,8 +1382,11 @@ class Archiver: list_logger.info('Would prune: %s' % format_archive(archive)) else: if args.output_list: - list_logger.info('Pruning archive: %s' % format_archive(archive)) - Archive(repository, key, manifest, archive.name, cache).delete(stats, forced=args.forced) + archives_deleted += 1 + list_logger.info('Pruning archive: %s (%d/%d)' % (format_archive(archive), + archives_deleted, to_delete_len)) + Archive(repository, key, manifest, archive.name, cache, + progress=args.progress).delete(stats, forced=args.forced) else: if args.output_list: list_logger.info('Keeping archive: %s' % format_archive(archive)) @@ -906,98 +1396,165 @@ class Archiver: cache.commit() if args.stats: log_multi(DASHES, + STATS_HEADER, stats.summary.format(label='Deleted data:', stats=stats), str(cache), DASHES, logger=logging.getLogger('borg.output.stats')) return self.exit_code - def do_upgrade(self, args): + @with_repository(fake=('tam', 'disable_tam'), invert_fake=True, manifest=False, exclusive=True) + def do_upgrade(self, args, repository, manifest=None, key=None): """upgrade a repository from a previous version""" - # mainly for upgrades from Attic repositories, - # but also supports borg 0.xx -> 1.0 upgrade. + if args.tam: + manifest, key = Manifest.load(repository, (Manifest.Operation.CHECK,), force_tam_not_required=args.force) - repo = AtticRepositoryUpgrader(args.location.path, create=False) - try: - repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress) - except NotImplementedError as e: - print("warning: %s" % e) - repo = BorgRepositoryUpgrader(args.location.path, create=False) - try: - repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress) - except NotImplementedError as e: - print("warning: %s" % e) + if not hasattr(key, 'change_passphrase'): + print('This repository is not encrypted, cannot enable TAM.') + return EXIT_ERROR + + if not manifest.tam_verified or not manifest.config.get(b'tam_required', False): + # The standard archive listing doesn't include the archive ID like in borg 1.1.x + print('Manifest contents:') + for archive_info in manifest.archives.list(sort_by=['ts']): + print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id)) + manifest.config[b'tam_required'] = True + manifest.write() + repository.commit() + if not key.tam_required: + key.tam_required = True + key.change_passphrase(key._passphrase) + print('Key updated') + if hasattr(key, 'find_key'): + print('Key location:', key.find_key()) + if not tam_required(repository): + tam_file = tam_required_file(repository) + open(tam_file, 'w').close() + print('Updated security database') + elif args.disable_tam: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK, force_tam_not_required=True) + if tam_required(repository): + os.unlink(tam_required_file(repository)) + if key.tam_required: + key.tam_required = False + key.change_passphrase(key._passphrase) + print('Key updated') + if hasattr(key, 'find_key'): + print('Key location:', key.find_key()) + manifest.config[b'tam_required'] = False + manifest.write() + repository.commit() + else: + # mainly for upgrades from Attic repositories, + # but also supports borg 0.xx -> 1.0 upgrade. + + repo = AtticRepositoryUpgrader(args.location.path, create=False) + try: + repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress) + except NotImplementedError as e: + print("warning: %s" % e) + repo = BorgRepositoryUpgrader(args.location.path, create=False) + try: + repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress) + except NotImplementedError as e: + print("warning: %s" % e) return self.exit_code - @with_repository(cache=True, exclusive=True) + @with_repository(cache=True, exclusive=True, compatibility=(Manifest.Operation.CHECK,)) def do_recreate(self, args, repository, manifest, key, cache): """Re-create archives""" - def interrupt(signal_num, stack_frame): - if recreater.interrupt: - print("\nReceived signal, again. I'm not deaf.", file=sys.stderr) - else: - print("\nReceived signal, will exit cleanly.", file=sys.stderr) - recreater.interrupt = True - msg = ("recreate is an experimental feature.\n" "Type 'YES' if you understand this and want to continue: ") if not yes(msg, false_msg="Aborting.", truish=('YES',), env_var_override='BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING'): return EXIT_ERROR - matcher, include_patterns = self.build_matcher(args.excludes, args.paths) + matcher = self.build_matcher(args.patterns, args.paths) self.output_list = args.output_list self.output_filter = args.output_filter + recompress = args.recompress != 'never' + always_recompress = args.recompress == 'always' recreater = ArchiveRecreater(repository, manifest, key, cache, matcher, exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, - keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params, - compression=args.compression, compression_files=args.compression_files, + keep_exclude_tags=args.keep_exclude_tags, chunker_params=args.chunker_params, + compression=args.compression, recompress=recompress, always_recompress=always_recompress, progress=args.progress, stats=args.stats, file_status_printer=self.print_file_status, + checkpoint_interval=args.checkpoint_interval, dry_run=args.dry_run) - with signal_handler(signal.SIGTERM, interrupt), \ - signal_handler(signal.SIGINT, interrupt): - if args.location.archive: - name = args.location.archive + if args.location.archive: + name = args.location.archive + target = replace_placeholders(args.target) if args.target else None + if recreater.is_temporary_archive(name): + self.print_error('Refusing to work on temporary archive of prior recreate: %s', name) + return self.exit_code + if not recreater.recreate(name, args.comment, target): + self.print_error('Nothing to do. Archive was not processed.\n' + 'Specify at least one pattern, PATH, --comment, re-compression or re-chunking option.') + else: + if args.target is not None: + self.print_error('--target: Need to specify single archive') + return self.exit_code + for archive in manifest.archives.list(sort_by=['ts']): + name = archive.name if recreater.is_temporary_archive(name): - self.print_error('Refusing to work on temporary archive of prior recreate: %s', name) - return self.exit_code - recreater.recreate(name, args.comment) - else: - for archive in manifest.list_archive_infos(sort_by='ts'): - name = archive.name - if recreater.is_temporary_archive(name): - continue - print('Processing', name) - if not recreater.recreate(name, args.comment): - break + continue + print('Processing', name) + if not recreater.recreate(name, args.comment): + logger.info('Skipped archive %s: Nothing to do. Archive was not processed.', name) + if not args.dry_run: manifest.write() repository.commit() cache.commit() - return self.exit_code + return self.exit_code - @with_repository(manifest=False) + @with_repository(manifest=False, exclusive=True) def do_with_lock(self, args, repository): """run a user specified command with the repository lock held""" + # for a new server, this will immediately take an exclusive lock. + # to support old servers, that do not have "exclusive" arg in open() + # RPC API, we also do it the old way: # re-write manifest to start a repository transaction - this causes a # lock upgrade to exclusive for remote (and also for local) repositories. # by using manifest=False in the decorator, we avoid having to require # the encryption key (and can operate just with encrypted data). data = repository.get(Manifest.MANIFEST_ID) repository.put(Manifest.MANIFEST_ID, data) + # usually, a 0 byte (open for writing) segment file would be visible in the filesystem here. + # we write and close this file, to rather have a valid segment file on disk, before invoking the subprocess. + # we can only do this for local repositories (with .io), though: + if hasattr(repository, 'io'): + repository.io.close_segment() + env = prepare_subprocess_env(system=True) try: # we exit with the return code we get from the subprocess - return subprocess.call([args.command] + args.args) + return subprocess.call([args.command] + args.args, env=env) finally: - repository.rollback() + # we need to commit the "no change" operation we did to the manifest + # because it created a new segment file in the repository. if we would + # roll back, the same file would be later used otherwise (for other content). + # that would be bad if somebody uses rsync with ignore-existing (or + # any other mechanism relying on existing segment data not changing). + # see issue #1867. + repository.commit() - @with_repository() + def do_debug_info(self, args): + """display system information for debugging / bug reports""" + print(sysinfo()) + + # Additional debug information + print('CRC implementation:', crc32.__name__) + print('Process ID:', get_process_id()) + return EXIT_SUCCESS + + @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) def do_debug_dump_archive_items(self, args, repository, manifest, key): """dump (decrypted, decompressed) archive items metadata (not: data)""" - archive = Archive(repository, key, manifest, args.location.archive) - for i, item_id in enumerate(archive.metadata[b'items']): - _, data = key.decrypt(item_id, repository.get(item_id)) + archive = Archive(repository, key, manifest, args.location.archive, + consider_part_files=args.consider_part_files) + for i, item_id in enumerate(archive.metadata.items): + data = key.decrypt(item_id, repository.get(item_id)) filename = '%06d_%s.items' % (i, bin_to_hex(item_id)) print('Dumping', filename) with open(filename, 'wb') as fd: @@ -1005,6 +1562,90 @@ class Archiver: print('Done.') return EXIT_SUCCESS + @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) + def do_debug_dump_archive(self, args, repository, manifest, key): + """dump decoded archive metadata (not: data)""" + + try: + archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)] + except KeyError: + raise Archive.DoesNotExist(args.location.archive) + + indent = 4 + + def do_indent(d): + return textwrap.indent(json.dumps(d, indent=indent), prefix=' ' * indent) + + def output(fd): + # this outputs megabytes of data for a modest sized archive, so some manual streaming json output + fd.write('{\n') + fd.write(' "_name": ' + json.dumps(args.location.archive) + ",\n") + fd.write(' "_manifest_entry":\n') + fd.write(do_indent(prepare_dump_dict(archive_meta_orig))) + fd.write(',\n') + + data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id'])) + archive_org_dict = msgpack.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape') + + fd.write(' "_meta":\n') + fd.write(do_indent(prepare_dump_dict(archive_org_dict))) + fd.write(',\n') + fd.write(' "_items": [\n') + + unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict) + first = True + for item_id in archive_org_dict[b'items']: + data = key.decrypt(item_id, repository.get(item_id)) + unpacker.feed(data) + for item in unpacker: + item = prepare_dump_dict(item) + if first: + first = False + else: + fd.write(',\n') + fd.write(do_indent(item)) + + fd.write('\n') + fd.write(' ]\n}\n') + + with dash_open(args.path, 'w') as fd: + output(fd) + return EXIT_SUCCESS + + @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) + def do_debug_dump_manifest(self, args, repository, manifest, key): + """dump decoded repository manifest""" + + data = key.decrypt(None, repository.get(manifest.MANIFEST_ID)) + + meta = prepare_dump_dict(msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')) + + with dash_open(args.path, 'w') as fd: + json.dump(meta, fd, indent=4) + return EXIT_SUCCESS + + @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) + def do_debug_dump_repo_objs(self, args, repository, manifest, key): + """dump (decrypted, decompressed) repo objects""" + marker = None + i = 0 + while True: + result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker) + if not result: + break + marker = result[-1] + for id in result: + cdata = repository.get(id) + give_id = id if id != Manifest.MANIFEST_ID else None + data = key.decrypt(give_id, cdata) + filename = '%06d_%s.obj' % (i, bin_to_hex(id)) + print('Dumping', filename) + with open(filename, 'wb') as fd: + fd.write(data) + i += 1 + print('Done.') + return EXIT_SUCCESS + @with_repository(manifest=False) def do_debug_get_obj(self, args, repository): """get object contents from the repository and write it into file""" @@ -1016,7 +1657,7 @@ class Archiver: else: try: data = repository.get(id) - except repository.ObjectNotFound: + except Repository.ObjectNotFound: print("object %s not found." % hex_id) else: with open(args.path, "wb") as f: @@ -1024,7 +1665,7 @@ class Archiver: print("object %s fetched." % hex_id) return EXIT_SUCCESS - @with_repository(manifest=False) + @with_repository(manifest=False, exclusive=True) def do_debug_put_obj(self, args, repository): """put file(s) contents into the repository""" for path in args.paths: @@ -1036,7 +1677,7 @@ class Archiver: repository.commit() return EXIT_SUCCESS - @with_repository(manifest=False) + @with_repository(manifest=False, exclusive=True) def do_debug_delete_obj(self, args, repository): """delete the objects with the given IDs from the repo""" modified = False @@ -1050,13 +1691,36 @@ class Archiver: repository.delete(id) modified = True print("object %s deleted." % hex_id) - except repository.ObjectNotFound: + except Repository.ObjectNotFound: print("object %s not found." % hex_id) if modified: repository.commit() print('Done.') return EXIT_SUCCESS + @with_repository(manifest=False, exclusive=True, cache=True, compatibility=Manifest.NO_OPERATION_CHECK) + def do_debug_refcount_obj(self, args, repository, manifest, key, cache): + """display refcounts for the objects with the given IDs""" + for hex_id in args.ids: + try: + id = unhexlify(hex_id) + except ValueError: + print("object id %s is invalid." % hex_id) + else: + try: + refcount = cache.chunks[id][0] + print("object %s has %d referrers [info from chunks cache]." % (hex_id, refcount)) + except KeyError: + print("object %s not found [info from chunks cache]." % hex_id) + return EXIT_SUCCESS + + def do_debug_convert_profile(self, args): + """convert Borg profile to Python profile""" + import marshal + with args.output, args.input: + marshal.dump(msgpack.unpack(args.input, use_list=False, encoding='utf-8'), args.output) + return EXIT_SUCCESS + @with_repository(lock=False, manifest=False) def do_break_lock(self, args, repository): """Break the repository lock (e.g. in case it was left by a dead borg.""" @@ -1064,40 +1728,41 @@ class Archiver: Cache.break_lock(repository) return self.exit_code - helptext = {} + helptext = collections.OrderedDict() helptext['patterns'] = textwrap.dedent(''' - Exclusion patterns support four separate styles, fnmatch, shell, regular - expressions and path prefixes. By default, fnmatch is used. If followed - by a colon (':') the first two characters of a pattern are used as a + File patterns support these styles: fnmatch, shell, regular expressions, + path prefixes and path full-matches. By default, fnmatch is used for + ``--exclude`` patterns and shell-style is used for the experimental ``--pattern`` + option. + + If followed by a colon (':') the first two characters of a pattern are used as a style selector. Explicit style selection is necessary when a non-default style is desired or when the desired pattern starts with two alphanumeric characters followed by a colon (i.e. `aa:something/*`). `Fnmatch `_, selector `fm:` - - This is the default style. These patterns use a variant of shell - pattern syntax, with '*' matching any number of characters, '?' - matching any single character, '[...]' matching any single - character specified, including ranges, and '[!...]' matching any - character not specified. For the purpose of these patterns, the - path separator ('\\' for Windows and '/' on other systems) is not + This is the default style for ``--exclude`` and ``--exclude-from``. + These patterns use a variant of shell pattern syntax, with '\*' matching + any number of characters, '?' matching any single character, '[...]' + matching any single character specified, including ranges, and '[!...]' + matching any character not specified. For the purpose of these patterns, + the path separator ('\\' for Windows and '/' on other systems) is not treated specially. Wrap meta-characters in brackets for a literal match (i.e. `[?]` to match the literal character `?`). For a path to match a pattern, it must completely match from start to end, or must match from the start to just before a path separator. Except for the root path, paths will never end in the path separator when matching is attempted. Thus, if a given pattern ends in a path - separator, a '*' is appended before matching is attempted. + separator, a '\*' is appended before matching is attempted. Shell-style patterns, selector `sh:` - + This is the default style for ``--pattern`` and ``--patterns-from``. Like fnmatch patterns these are similar to shell patterns. The difference is that the pattern may include `**/` for matching zero or more directory levels, `*` for matching zero or more arbitrary characters with the exception of any path separator. Regular expressions, selector `re:` - Regular expressions similar to those found in Perl are supported. Unlike shell patterns regular expressions are not required to match the complete path and any substring match is sufficient. It is strongly recommended to @@ -1107,16 +1772,39 @@ class Archiver: regular expression syntax is described in the `Python documentation for the re module `_. - Prefix path, selector `pp:` - + Path prefix, selector `pp:` This pattern style is useful to match whole sub-directories. The pattern `pp:/data/bar` matches `/data/bar` and everything therein. - Exclusions can be passed via the command line option `--exclude`. When used + Path full-match, selector `pf:` + This pattern style is useful to match whole paths. + This is kind of a pseudo pattern as it can not have any variable or + unspecified parts - the full, precise path must be given. + `pf:/data/foo.txt` matches `/data/foo.txt` only. + + Implementation note: this is implemented via very time-efficient O(1) + hashtable lookups (this means you can have huge amounts of such patterns + without impacting performance much). + Due to that, this kind of pattern does not respect any context or order. + If you use such a pattern to include a file, it will always be included + (if the directory recursion encounters it). + Other include/exclude patterns that would normally match will be ignored. + Same logic applies for exclude. + + .. note:: + + `re:`, `sh:` and `fm:` patterns are all implemented on top of the Python SRE + engine. It is very easy to formulate patterns for each of these types which + requires an inordinate amount of time to match paths. If untrusted users + are able to supply patterns, ensure they cannot supply `re:` patterns. + Further, ensure that `sh:` and `fm:` patterns only contain a handful of + wildcards at most. + + Exclusions can be passed via the command line option ``--exclude``. When used from within a shell the patterns should be quoted to protect them from expansion. - The `--exclude-from` option permits loading exclusion patterns from a text + The ``--exclude-from`` option permits loading exclusion patterns from a text file with one pattern per line. Lines empty or starting with the number sign ('#') after removing whitespace on both ends are ignored. The optional style selector prefix is also supported for patterns loaded from a file. Due to @@ -1151,45 +1839,154 @@ class Archiver: re:^/home/[^/]\.tmp/ sh:/home/*/.thumbnails EOF - $ borg create --exclude-from exclude.txt backup /\n\n''') + $ borg create --exclude-from exclude.txt backup / + + .. container:: experimental + + A more general and easier to use way to define filename matching patterns exists + with the experimental ``--pattern`` and ``--patterns-from`` options. Using these, you + may specify the backup roots (starting points) and patterns for inclusion/exclusion. + A root path starts with the prefix `R`, followed by a path (a plain path, not a + file pattern). An include rule starts with the prefix +, an exclude rule starts + with the prefix -, an exclude-norecurse rule starts with !, all followed by a pattern. + Inclusion patterns are useful to include paths that are contained in an excluded + path. The first matching pattern is used so if an include pattern matches before + an exclude pattern, the file is backed up. If an exclude-norecurse pattern matches + a directory, it won't recurse into it and won't discover any potential matches for + include rules below that directory. + + Note that the default pattern style for ``--pattern`` and ``--patterns-from`` is + shell style (`sh:`), so those patterns behave similar to rsync include/exclude + patterns. The pattern style can be set via the `P` prefix. + + Patterns (``--pattern``) and excludes (``--exclude``) from the command line are + considered first (in the order of appearance). Then patterns from ``--patterns-from`` + are added. Exclusion patterns from ``--exclude-from`` files are appended last. + + An example ``--patterns-from`` file could look like that:: + + # "sh:" pattern style is the default, so the following line is not needed: + P sh + R / + # can be rebuild + - /home/*/.cache + # they're downloads for a reason + - /home/*/Downloads + # susan is a nice person + # include susans home + + /home/susan + # don't backup the other home directories + - /home/*\n\n''') helptext['placeholders'] = textwrap.dedent(''' - Repository (or Archive) URLs and --prefix values support these placeholders: + Repository (or Archive) URLs, ``--prefix`` and ``--remote-path`` values support these + placeholders: {hostname} - The (short) hostname of the machine. {fqdn} - The full name of the machine. {now} - - The current local date and time. + The current local date and time, by default in ISO-8601 format. + You can also supply your own `format string `_, e.g. {now:%Y-%m-%d_%H:%M:%S} {utcnow} - - The current UTC date and time. + The current UTC date and time, by default in ISO-8601 format. + You can also supply your own `format string `_, e.g. {utcnow:%Y-%m-%d_%H:%M:%S} {user} - The user name (or UID, if no name is available) of the user running borg. {pid} - The current process ID. + {borgversion} + The version of borg, e.g.: 1.0.8rc1 + + {borgmajor} + The version of borg, only the major version, e.g.: 1 + + {borgminor} + The version of borg, only major and minor version, e.g.: 1.0 + + {borgpatch} + The version of borg, only major, minor and patch version, e.g.: 1.0.8 + + If literal curly braces need to be used, double them for escaping:: + + borg create /path/to/repo::{{literal_text}} + Examples:: borg create /path/to/repo::{hostname}-{user}-{utcnow} ... borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ... - borg prune --prefix '{hostname}-' ...\n\n''') + borg prune --prefix '{hostname}-' ... + + .. note:: + systemd uses a difficult, non-standard syntax for command lines in unit files (refer to + the `systemd.unit(5)` manual page). + + When invoking borg from unit files, pay particular attention to escaping, + especially when using the now/utcnow placeholders, since systemd performs its own + %-based variable replacement even in quoted text. To avoid interference from systemd, + double all percent signs (``{hostname}-{now:%Y-%m-%d_%H:%M:%S}`` + becomes ``{hostname}-{now:%%Y-%%m-%%d_%%H:%%M:%%S}``).\n\n''') + helptext['compression'] = textwrap.dedent(''' + It is no problem to mix different compression methods in one repo, + deduplication is done on the source data chunks (not on the compressed + or encrypted data). + + If some specific chunk was once compressed and stored into the repo, creating + another backup that also uses this chunk will not change the stored chunk. + So if you use different compression specs for the backups, whichever stores a + chunk first determines its compression. See also borg recreate. + + Compression is lz4 by default. If you want something else, you have to specify what you want. + + Valid compression specifiers are: + + none + Do not compress. + + lz4 + Use lz4 compression. High speed, low compression. (default) + + zlib[,L] + Use zlib ("gz") compression. Medium speed, medium compression. + If you do not explicitely give the compression level L (ranging from 0 + to 9), it will use level 6. + Giving level 0 (means "no compression", but still has zlib protocol + overhead) is usually pointless, you better use "none" compression. + + lzma[,L] + Use lzma ("xz") compression. Low speed, high compression. + If you do not explicitely give the compression level L (ranging from 0 + to 9), it will use level 6. + Giving levels above 6 is pointless and counterproductive because it does + not compress better due to the buffer size used by borg - but it wastes + lots of CPU cycles and RAM. + + auto,C[,L] + Use a built-in heuristic to decide per chunk whether to compress or not. + The heuristic tries with lz4 whether the data is compressible. + For incompressible data, it will not use compression (uses "none"). + For compressible data, it uses the given C[,L] compression - with C[,L] + being any valid compression specifier. + + Examples:: + + borg create --compression lz4 REPO::ARCHIVE data + borg create --compression zlib REPO::ARCHIVE data + borg create --compression zlib,1 REPO::ARCHIVE data + borg create --compression auto,lzma,6 REPO::ARCHIVE data + borg create --compression auto,lzma ...\n\n''') def do_help(self, parser, commands, args): if not args.topic: parser.print_help() elif args.topic in self.helptext: - print(self.helptext[args.topic]) + print(rst_to_terminal(self.helptext[args.topic])) elif args.topic in commands: if args.epilog_only: print(commands[args.topic].epilog) @@ -1202,57 +1999,298 @@ class Archiver: parser.error('No help available on %s' % (args.topic,)) return self.exit_code + def do_subcommand_help(self, parser, args): + """display infos about subcommand""" + parser.print_help() + return EXIT_SUCCESS + + do_maincommand_help = do_subcommand_help + def preprocess_args(self, args): deprecations = [ - # ('--old', '--new', 'Warning: "--old" has been deprecated. Use "--new" instead.'), + # ('--old', '--new' or None, 'Warning: "--old" has been deprecated. Use "--new" instead.'), ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'), + ('--keep-tag-files', '--keep-exclude-tags', 'Warning: "--keep-tag-files" has been deprecated. Use "--keep-exclude-tags" instead.'), + ('--ignore-inode', None, 'Warning: "--ignore-inode" has been deprecated. Use "--files-cache=ctime,size" or "...=mtime,size" instead.'), + ('--no-files-cache', None, 'Warning: "--no-files-cache" has been deprecated. Use "--files-cache=disabled" instead.'), ] for i, arg in enumerate(args[:]): for old_name, new_name, warning in deprecations: if arg.startswith(old_name): - args[i] = arg.replace(old_name, new_name) - self.print_warning(warning) + if new_name is not None: + args[i] = arg.replace(old_name, new_name) + print(warning, file=sys.stderr) return args - def build_parser(self, prog=None): - common_parser = argparse.ArgumentParser(add_help=False, prog=prog) + class CommonOptions: + """ + Support class to allow specifying common options directly after the top-level command. - common_group = common_parser.add_argument_group('Common options') - common_group.add_argument('-h', '--help', action='help', help='show this help message and exit') - common_group.add_argument('--critical', dest='log_level', - action='store_const', const='critical', default='warning', - help='work on log level CRITICAL') - common_group.add_argument('--error', dest='log_level', - action='store_const', const='error', default='warning', - help='work on log level ERROR') - common_group.add_argument('--warning', dest='log_level', - action='store_const', const='warning', default='warning', - help='work on log level WARNING (default)') - common_group.add_argument('--info', '-v', '--verbose', dest='log_level', - action='store_const', const='info', default='warning', - help='work on log level INFO') - common_group.add_argument('--debug', dest='log_level', - action='store_const', const='debug', default='warning', - help='enable debug output, work on log level DEBUG') - common_group.add_argument('--lock-wait', dest='lock_wait', type=int, metavar='N', default=1, - help='wait for the lock, but max. N seconds (default: %(default)d).') - common_group.add_argument('--show-version', dest='show_version', action='store_true', default=False, - help='show/log the borg version') - common_group.add_argument('--show-rc', dest='show_rc', action='store_true', default=False, - help='show/log the return code (rc)') - common_group.add_argument('--no-files-cache', dest='cache_files', action='store_false', - help='do not load/update the file metadata cache used to detect unchanged files') - common_group.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=UMASK_DEFAULT, metavar='M', - help='set umask to M (local and remote, default: %(default)04o)') - common_group.add_argument('--remote-path', dest='remote_path', metavar='PATH', - help='set remote path to executable (default: "borg")') + Normally options can only be specified on the parser defining them, which means + that generally speaking *all* options go after all sub-commands. This is annoying + for common options in scripts, e.g. --remote-path or logging options. - parser = argparse.ArgumentParser(prog=prog, description='Borg - Deduplicated Backups') + This class allows adding the same set of options to both the top-level parser + and the final sub-command parsers (but not intermediary sub-commands, at least for now). + + It does so by giving every option's target name ("dest") a suffix indicating its level + -- no two options in the parser hierarchy can have the same target -- + then, after parsing the command line, multiple definitions are resolved. + + Defaults are handled by only setting them on the top-level parser and setting + a sentinel object in all sub-parsers, which then allows to discern which parser + supplied the option. + """ + + def __init__(self, define_common_options, suffix_precedence): + """ + *define_common_options* should be a callable taking one argument, which + will be a argparse.Parser.add_argument-like function. + + *define_common_options* will be called multiple times, and should call + the passed function to define common options exactly the same way each time. + + *suffix_precedence* should be a tuple of the suffixes that will be used. + It is ordered from lowest precedence to highest precedence: + An option specified on the parser belonging to index 0 is overridden if the + same option is specified on any parser with a higher index. + """ + self.define_common_options = define_common_options + self.suffix_precedence = suffix_precedence + + # Maps suffixes to sets of target names. + # E.g. common_options["_subcommand"] = {..., "log_level", ...} + self.common_options = dict() + # Set of options with the 'append' action. + self.append_options = set() + # This is the sentinel object that replaces all default values in parsers + # below the top-level parser. + self.default_sentinel = object() + + def add_common_group(self, parser, suffix, provide_defaults=False): + """ + Add common options to *parser*. + + *provide_defaults* must only be True exactly once in a parser hierarchy, + at the top level, and False on all lower levels. The default is chosen + accordingly. + + *suffix* indicates the suffix to use internally. It also indicates + which precedence the *parser* has for common options. See *suffix_precedence* + of __init__. + """ + assert suffix in self.suffix_precedence + + def add_argument(*args, **kwargs): + if 'dest' in kwargs: + kwargs.setdefault('action', 'store') + assert kwargs['action'] in ('help', 'store_const', 'store_true', 'store_false', 'store', 'append') + is_append = kwargs['action'] == 'append' + if is_append: + self.append_options.add(kwargs['dest']) + assert kwargs['default'] == [], 'The default is explicitly constructed as an empty list in resolve()' + else: + self.common_options.setdefault(suffix, set()).add(kwargs['dest']) + kwargs['dest'] += suffix + if not provide_defaults: + # Interpolate help now, in case the %(default)d (or so) is mentioned, + # to avoid producing incorrect help output. + # Assumption: Interpolated output can safely be interpolated again, + # which should always be the case. + # Note: We control all inputs. + kwargs['help'] = kwargs['help'] % kwargs + if not is_append: + kwargs['default'] = self.default_sentinel + + common_group.add_argument(*args, **kwargs) + + common_group = parser.add_argument_group('Common options') + self.define_common_options(add_argument) + + def resolve(self, args: argparse.Namespace): # Namespace has "in" but otherwise is not like a dict. + """ + Resolve the multiple definitions of each common option to the final value. + """ + for suffix in self.suffix_precedence: + # From highest level to lowest level, so the "most-specific" option wins, e.g. + # "borg --debug create --info" shall result in --info being effective. + for dest in self.common_options.get(suffix, []): + # map_from is this suffix' option name, e.g. log_level_subcommand + # map_to is the target name, e.g. log_level + map_from = dest + suffix + map_to = dest + # Retrieve value; depending on the action it may not exist, but usually does + # (store_const/store_true/store_false), either because the action implied a default + # or a default is explicitly supplied. + # Note that defaults on lower levels are replaced with default_sentinel. + # Only the top level has defaults. + value = getattr(args, map_from, self.default_sentinel) + if value is not self.default_sentinel: + # value was indeed specified on this level. Transfer value to target, + # and un-clobber the args (for tidiness - you *cannot* use the suffixed + # names for other purposes, obviously). + setattr(args, map_to, value) + try: + delattr(args, map_from) + except AttributeError: + pass + + # Options with an "append" action need some special treatment. Instead of + # overriding values, all specified values are merged together. + for dest in self.append_options: + option_value = [] + for suffix in self.suffix_precedence: + # Find values of this suffix, if any, and add them to the final list + extend_from = dest + suffix + if extend_from in args: + values = getattr(args, extend_from) + delattr(args, extend_from) + option_value.extend(values) + setattr(args, dest, option_value) + + def build_parser(self): + # You can use :ref:`xyz` in the following usage pages. However, for plain-text view, + # e.g. through "borg ... --help", define a substitution for the reference here. + # It will replace the entire :ref:`foo` verbatim. + rst_plain_text_references = { + 'a_status_oddity': '"I am seeing ‘A’ (added) status for a unchanged file!?"', + } + + def process_epilog(epilog): + epilog = textwrap.dedent(epilog).splitlines() + try: + mode = borg.doc_mode + except AttributeError: + mode = 'command-line' + if mode in ('command-line', 'build_usage'): + epilog = [line for line in epilog if not line.startswith('.. man')] + epilog = '\n'.join(epilog) + if mode == 'command-line': + epilog = rst_to_terminal(epilog, rst_plain_text_references) + return epilog + + def define_common_options(add_common_option): + add_common_option('-h', '--help', action='help', help='show this help message and exit') + add_common_option('--critical', dest='log_level', + action='store_const', const='critical', default='warning', + help='work on log level CRITICAL') + add_common_option('--error', dest='log_level', + action='store_const', const='error', default='warning', + help='work on log level ERROR') + add_common_option('--warning', dest='log_level', + action='store_const', const='warning', default='warning', + help='work on log level WARNING (default)') + add_common_option('--info', '-v', '--verbose', dest='log_level', + action='store_const', const='info', default='warning', + help='work on log level INFO') + add_common_option('--debug', dest='log_level', + action='store_const', const='debug', default='warning', + help='enable debug output, work on log level DEBUG') + add_common_option('--debug-topic', metavar='TOPIC', dest='debug_topics', action='append', default=[], + help='enable TOPIC debugging (can be specified multiple times). ' + 'The logger path is borg.debug. if TOPIC is not fully qualified.') + add_common_option('-p', '--progress', dest='progress', action='store_true', + help='show progress information') + add_common_option('--log-json', dest='log_json', action='store_true', + help='Output one JSON object per log line instead of formatted text.') + add_common_option('--lock-wait', metavar='SECONDS', dest='lock_wait', type=int, default=1, + help='wait at most SECONDS for acquiring a repository/cache lock (default: %(default)d).') + add_common_option('--show-version', dest='show_version', action='store_true', + help='show/log the borg version') + add_common_option('--show-rc', dest='show_rc', action='store_true', + help='show/log the return code (rc)') + add_common_option('--umask', metavar='M', dest='umask', type=lambda s: int(s, 8), default=UMASK_DEFAULT, + help='set umask to M (local and remote, default: %(default)04o)') + add_common_option('--remote-path', metavar='PATH', dest='remote_path', + help='use PATH as borg executable on the remote (default: "borg")') + add_common_option('--remote-ratelimit', metavar='RATE', dest='remote_ratelimit', type=int, + help='set remote network upload rate limit in kiByte/s (default: 0=unlimited)') + add_common_option('--consider-part-files', dest='consider_part_files', action='store_true', + help='treat part files like normal files (e.g. to list/extract them)') + add_common_option('--debug-profile', metavar='FILE', dest='debug_profile', default=None, + help='Write execution profile in Borg format into FILE. For local use a Python-' + 'compatible file can be generated by suffixing FILE with ".pyprof".') + + def define_exclude_and_patterns(add_option, *, tag_files=False, strip_components=False): + add_option('-e', '--exclude', metavar='PATTERN', dest='patterns', + type=parse_exclude_pattern, action='append', + help='exclude paths matching PATTERN') + add_option('--exclude-from', metavar='EXCLUDEFILE', action=ArgparseExcludeFileAction, + help='read exclude patterns from EXCLUDEFILE, one per line') + add_option('--pattern', metavar='PATTERN', action=ArgparsePatternAction, + help='experimental: include/exclude paths matching PATTERN') + add_option('--patterns-from', metavar='PATTERNFILE', action=ArgparsePatternFileAction, + help='experimental: read include/exclude patterns from PATTERNFILE, one per line') + + if tag_files: + add_option('--exclude-caches', dest='exclude_caches', action='store_true', + help='exclude directories that contain a CACHEDIR.TAG file ' + '(http://www.brynosaurus.com/cachedir/spec.html)') + add_option('--exclude-if-present', metavar='NAME', dest='exclude_if_present', + action='append', type=str, + help='exclude directories that are tagged by containing a filesystem object with ' + 'the given NAME') + add_option('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags', + action='store_true', + help='if tag objects are specified with ``--exclude-if-present``, ' + 'don\'t omit the tag objects themselves from the backup archive') + + if strip_components: + add_option('--strip-components', metavar='NUMBER', dest='strip_components', type=int, default=0, + help='Remove the specified number of leading path elements. ' + 'Paths with fewer elements will be silently skipped.') + + def define_exclusion_group(subparser, **kwargs): + exclude_group = subparser.add_argument_group('Exclusion options') + define_exclude_and_patterns(exclude_group.add_argument, **kwargs) + return exclude_group + + def define_archive_filters_group(subparser, *, sort_by=True, first_last=True): + filters_group = subparser.add_argument_group('Archive filters', + 'Archive filters can be applied to repository targets.') + group = filters_group.add_mutually_exclusive_group() + group.add_argument('-P', '--prefix', metavar='PREFIX', dest='prefix', type=PrefixSpec, default='', + help='only consider archive names starting with this prefix.') + group.add_argument('-a', '--glob-archives', metavar='GLOB', dest='glob_archives', default=None, + help='only consider archive names matching the glob. ' + 'sh: rules apply, see "borg help patterns". ' + '``--prefix`` and ``--glob-archives`` are mutually exclusive.') + + if sort_by: + sort_by_default = 'timestamp' + filters_group.add_argument('--sort-by', metavar='KEYS', dest='sort_by', + type=SortBySpec, default=sort_by_default, + help='Comma-separated list of sorting keys; valid keys are: {}; default is: {}' + .format(', '.join(AI_HUMAN_SORT_KEYS), sort_by_default)) + + if first_last: + group = filters_group.add_mutually_exclusive_group() + group.add_argument('--first', metavar='N', dest='first', default=0, type=positive_int_validator, + help='consider first N archives after other filters were applied') + group.add_argument('--last', metavar='N', dest='last', default=0, type=positive_int_validator, + help='consider last N archives after other filters were applied') + + parser = argparse.ArgumentParser(prog=self.prog, description='Borg - Deduplicated Backups', + add_help=False) + parser.set_defaults(fallback2_func=functools.partial(self.do_maincommand_help, parser)) + parser.common_options = self.CommonOptions(define_common_options, + suffix_precedence=('_maincommand', '_midcommand', '_subcommand')) parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__, help='show version number and exit') + parser.common_options.add_common_group(parser, '_maincommand', provide_defaults=True) + + common_parser = argparse.ArgumentParser(add_help=False, prog=self.prog) + # some empty defaults for all subparsers + common_parser.set_defaults(paths=[], patterns=[]) + parser.common_options.add_common_group(common_parser, '_subcommand') + + mid_common_parser = argparse.ArgumentParser(add_help=False, prog=self.prog) + mid_common_parser.set_defaults(paths=[], patterns=[]) + parser.common_options.add_common_group(mid_common_parser, '_midcommand') + subparsers = parser.add_subparsers(title='required arguments', metavar='') - serve_epilog = textwrap.dedent(""" + serve_epilog = process_epilog(""" This command starts a repository server process. This command is usually not used manually. """) subparser = subparsers.add_parser('serve', parents=[common_parser], add_help=False, @@ -1260,29 +2298,46 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='start repository server process') subparser.set_defaults(func=self.do_serve) - subparser.add_argument('--restrict-to-path', dest='restrict_to_paths', action='append', - metavar='PATH', help='restrict repository access to PATH') + subparser.add_argument('--restrict-to-path', metavar='PATH', dest='restrict_to_paths', action='append', + help='restrict repository access to PATH. ' + 'Can be specified multiple times to allow the client access to several directories. ' + 'Access to all sub-directories is granted implicitly; PATH doesn\'t need to directly point to a repository.') + subparser.add_argument('--restrict-to-repository', metavar='PATH', dest='restrict_to_repositories', action='append', + help='restrict repository access. Only the repository located at PATH ' + '(no sub-directories are considered) is accessible. ' + 'Can be specified multiple times to allow the client access to several repositories. ' + 'Unlike ``--restrict-to-path`` sub-directories are not accessible; ' + 'PATH needs to directly point at a repository location. ' + 'PATH may be an empty directory or the last element of PATH may not exist, in which case ' + 'the client may initialize a repository there.') subparser.add_argument('--append-only', dest='append_only', action='store_true', help='only allow appending to repository segment files') - init_epilog = textwrap.dedent(""" + subparser.add_argument('--storage-quota', metavar='QUOTA', dest='storage_quota', + type=parse_storage_quota, default=None, + help='Override storage quota of the repository (e.g. 5G, 1.5T). ' + 'When a new repository is initialized, sets the storage quota on the new ' + 'repository as well. Default: no quota.') + + init_epilog = process_epilog(""" This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. - Encryption can be enabled at repository init time (the default). + Encryption can be enabled at repository init time. It cannot be changed later. - It is not recommended to disable encryption. Repository encryption protects you - e.g. against the case that an attacker has access to your backup repository. + It is not recommended to work without encryption. Repository encryption protects + you e.g. against the case that an attacker has access to your backup repository. But be careful with the key / the passphrase: - If you want "passphrase-only" security, use the repokey mode. The key will - be stored inside the repository (in its "config" file). In above mentioned - attack scenario, the attacker will have the key (but not the passphrase). + If you want "passphrase-only" security, use one of the repokey modes. The + key will be stored inside the repository (in its "config" file). In above + mentioned attack scenario, the attacker will have the key (but not the + passphrase). - If you want "passphrase and having-the-key" security, use the keyfile mode. - The key will be stored in your home directory (in .config/borg/keys). In - the attack scenario, the attacker who has just access to your repo won't have - the key (and also not the passphrase). + If you want "passphrase and having-the-key" security, use one of the keyfile + modes. The key will be stored in your home directory (in .config/borg/keys). + In the attack scenario, the attacker who has just access to your repo won't + have the key (and also not the passphrase). Make a backup copy of the key file (keyfile mode) or repo config file (repokey mode) and keep it at a safe place, so you still have the key in @@ -1307,8 +2362,64 @@ class Archiver: You can change your passphrase for existing repos at any time, it won't affect the encryption/decryption key or other secrets. - When encrypting, AES-CTR-256 is used for encryption, and HMAC-SHA256 for - authentication. Hardware acceleration will be used automatically. + Encryption modes + ++++++++++++++++ + + .. nanorst: inline-fill + + +----------+---------------+------------------------+--------------------------+ + | Hash/MAC | Not encrypted | Not encrypted, | Encrypted (AEAD w/ AES) | + | | no auth | but authenticated | and authenticated | + +----------+---------------+------------------------+--------------------------+ + | SHA-256 | none | `authenticated` | repokey | + | | | | keyfile | + +----------+---------------+------------------------+--------------------------+ + | BLAKE2b | n/a | `authenticated-blake2` | `repokey-blake2` | + | | | | `keyfile-blake2` | + +----------+---------------+------------------------+--------------------------+ + + .. nanorst: inline-replace + + `Marked modes` are new in Borg 1.1 and are not backwards-compatible with Borg 1.0.x. + + On modern Intel/AMD CPUs (except very cheap ones), AES is usually + hardware-accelerated. + BLAKE2b is faster than SHA256 on Intel/AMD 64-bit CPUs + (except AMD Ryzen and future CPUs with SHA extensions), + which makes `authenticated-blake2` faster than `none` and `authenticated`. + + On modern ARM CPUs, NEON provides hardware acceleration for SHA256 making it faster + than BLAKE2b-256 there. NEON accelerates AES as well. + + Hardware acceleration is always used automatically when available. + + `repokey` and `keyfile` use AES-CTR-256 for encryption and HMAC-SHA256 for + authentication in an encrypt-then-MAC (EtM) construction. The chunk ID hash + is HMAC-SHA256 as well (with a separate key). + These modes are compatible with Borg 1.0.x. + + `repokey-blake2` and `keyfile-blake2` are also authenticated encryption modes, + but use BLAKE2b-256 instead of HMAC-SHA256 for authentication. The chunk ID + hash is a keyed BLAKE2b-256 hash. + These modes are new and *not* compatible with Borg 1.0.x. + + `authenticated` mode uses no encryption, but authenticates repository contents + through the same HMAC-SHA256 hash as the `repokey` and `keyfile` modes (it uses it + as the chunk ID hash). The key is stored like `repokey`. + This mode is new and *not* compatible with Borg 1.0.x. + + `authenticated-blake2` is like `authenticated`, but uses the keyed BLAKE2b-256 hash + from the other blake2 modes. + This mode is new and *not* compatible with Borg 1.0.x. + + `none` mode uses no encryption and no authentication. It uses SHA256 as chunk + ID hash. Not recommended, rather consider using an authenticated or + authenticated/encrypted mode. This mode has possible denial-of-service issues + when running ``borg create`` on contents controlled by an attacker. + Use it only for new repositories where no encryption is wanted **and** when compatibility + with 1.0.x is important. If compatibility with 1.0.x is not important, use + `authenticated-blake2` or `authenticated` instead. + This mode is compatible with Borg 1.0.x. """) subparser = subparsers.add_parser('init', parents=[common_parser], add_help=False, description=self.do_init.__doc__, epilog=init_epilog, @@ -1318,11 +2429,16 @@ class Archiver: subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='repository to create') - subparser.add_argument('-e', '--encryption', dest='encryption', - choices=('none', 'keyfile', 'repokey'), default='repokey', - help='select encryption key mode (default: "%(default)s")') + subparser.add_argument('-e', '--encryption', metavar='MODE', dest='encryption', required=True, + choices=key_argument_names(), + help='select encryption key mode **(required)**') + subparser.add_argument('--append-only', dest='append_only', action='store_true', + help='create an append-only mode repository') + subparser.add_argument('--storage-quota', metavar='QUOTA', dest='storage_quota', default=None, + type=parse_storage_quota, + help='Set storage quota of the new repository (e.g. 5G, 1.5T). Default: no quota.') - check_epilog = textwrap.dedent(""" + check_epilog = process_epilog(""" The check command verifies the consistency of a repository and the corresponding archives. First, the underlying repository data files are checked: @@ -1337,7 +2453,7 @@ class Archiver: stored in the segments. - If you use a remote repo server via ssh:, the repo check is executed on the repo server without causing significant network traffic. - - The repository check can be skipped using the --archives-only option. + - The repository check can be skipped using the ``--archives-only`` option. Second, the consistency and correctness of the archive metadata is verified: @@ -1359,19 +2475,15 @@ class Archiver: decryption and this is always done client-side, because key access will be required). - The archive checks can be time consuming, they can be skipped using the - --repository-only option. + ``--repository-only`` option. - The --verify-data option will perform a full integrity verification (as opposed to + The ``--verify-data`` option will perform a full integrity verification (as opposed to checking the CRC32 of the segment) of data, which means reading the data from the repository, decrypting and decompressing it. This is a cryptographic verification, which will detect (accidental) corruption. For encrypted repositories it is tamper-resistant as well, unless the attacker has access to the keys. It is also very slow. - - --verify-data only verifies data used by the archives specified with --last, - --prefix or an explicitly named archive. If none of these are passed, - all data in the repository is verified. """) subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False, description=self.do_check.__doc__, @@ -1383,35 +2495,92 @@ class Archiver: type=location_validator(), help='repository or archive to check consistency of') subparser.add_argument('--repository-only', dest='repo_only', action='store_true', - default=False, help='only perform repository checks') subparser.add_argument('--archives-only', dest='archives_only', action='store_true', - default=False, help='only perform archives checks') subparser.add_argument('--verify-data', dest='verify_data', action='store_true', - default=False, help='perform cryptographic archive data integrity verification ' - '(conflicts with --repository-only)') + '(conflicts with ``--repository-only``)') subparser.add_argument('--repair', dest='repair', action='store_true', - default=False, help='attempt to repair any inconsistencies found') subparser.add_argument('--save-space', dest='save_space', action='store_true', - default=False, help='work slower, but using less space') - subparser.add_argument('--last', dest='last', - type=int, default=None, metavar='N', - help='only check last N archives (Default: all)') - subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec, - help='only consider archive names starting with this prefix') - subparser.add_argument('-p', '--progress', dest='progress', - action='store_true', default=False, - help="""show progress display while checking""") + define_archive_filters_group(subparser) - change_passphrase_epilog = textwrap.dedent(""" + subparser = subparsers.add_parser('key', parents=[mid_common_parser], add_help=False, + description="Manage a keyfile or repokey of a repository", + epilog="", + formatter_class=argparse.RawDescriptionHelpFormatter, + help='manage repository key') + + key_parsers = subparser.add_subparsers(title='required arguments', metavar='') + subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser)) + + key_export_epilog = process_epilog(""" + If repository encryption is used, the repository is inaccessible + without the key. This command allows to backup this essential key. + + There are two backup formats. The normal backup format is suitable for + digital storage as a file. The ``--paper`` backup format is optimized + for printing and typing in while importing, with per line checks to + reduce problems with manual input. + + For repositories using keyfile encryption the key is saved locally + on the system that is capable of doing backups. To guard against loss + of this key, the key needs to be backed up independently of the main + data backup. + + For repositories using the repokey encryption the key is saved in the + repository in the config file. A backup is thus not strictly needed, + but guards against the repository becoming inaccessible if the file + is damaged for some reason. + """) + subparser = key_parsers.add_parser('export', parents=[common_parser], add_help=False, + description=self.do_key_export.__doc__, + epilog=key_export_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='export repository key for backup') + subparser.set_defaults(func=self.do_key_export) + subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False)) + subparser.add_argument('path', metavar='PATH', nargs='?', type=str, + help='where to store the backup') + subparser.add_argument('--paper', dest='paper', action='store_true', + help='Create an export suitable for printing and later type-in') + subparser.add_argument('--qr-html', dest='qr', action='store_true', + help='Create an html file suitable for printing and later type-in or qr scan') + + key_import_epilog = process_epilog(""" + This command allows to restore a key previously backed up with the + export command. + + If the ``--paper`` option is given, the import will be an interactive + process in which each line is checked for plausibility before + proceeding to the next line. For this format PATH must not be given. + """) + subparser = key_parsers.add_parser('import', parents=[common_parser], add_help=False, + description=self.do_key_import.__doc__, + epilog=key_import_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='import repository key from backup') + subparser.set_defaults(func=self.do_key_import) + subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False)) + subparser.add_argument('path', metavar='PATH', nargs='?', type=str, + help='path to the backup (\'-\' to read from stdin)') + subparser.add_argument('--paper', dest='paper', action='store_true', + help='interactively import from a backup done with ``--paper``') + + change_passphrase_epilog = process_epilog(""" The key files used for repository encryption are optionally passphrase protected. This command can be used to change this passphrase. + + Please note that this command only changes the passphrase, but not any + secret protected by it (like e.g. encryption/MAC keys or chunker seed). + Thus, changing the passphrase after passphrase and borg key got compromised + does not protect future (nor past) backups to the same repository. """) - subparser = subparsers.add_parser('change-passphrase', parents=[common_parser], add_help=False, + subparser = key_parsers.add_parser('change-passphrase', parents=[common_parser], add_help=False, description=self.do_change_passphrase.__doc__, epilog=change_passphrase_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, @@ -1420,9 +2589,19 @@ class Archiver: subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False)) - migrate_to_repokey_epilog = textwrap.dedent(""" - This command migrates a repository from passphrase mode (not supported any - more) to repokey mode. + # Borg 1.0 alias for change passphrase (without the "key" subcommand) + subparser = subparsers.add_parser('change-passphrase', parents=[common_parser], add_help=False, + description=self.do_change_passphrase.__doc__, + epilog=change_passphrase_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='change repository passphrase') + subparser.set_defaults(func=self.do_change_passphrase_deprecated) + subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False)) + + migrate_to_repokey_epilog = process_epilog(""" + This command migrates a repository from passphrase mode (removed in Borg 1.0) + to repokey mode. You will be first asked for the repository passphrase (to open it in passphrase mode). This is the same passphrase as you used to use for this repo before 1.0. @@ -1438,7 +2617,7 @@ class Archiver: But please note: the secrets will always stay the same and they could always be derived from your (old) passphrase-mode passphrase. """) - subparser = subparsers.add_parser('migrate-to-repokey', parents=[common_parser], add_help=False, + subparser = key_parsers.add_parser('migrate-to-repokey', parents=[common_parser], add_help=False, description=self.do_migrate_to_repokey.__doc__, epilog=migrate_to_repokey_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, @@ -1447,25 +2626,129 @@ class Archiver: subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False)) - create_epilog = textwrap.dedent(""" + create_epilog = process_epilog(""" This command creates a backup archive containing all files found while recursively - traversing all paths specified. The archive will consume almost no disk space for - files or parts of files that have already been stored in other archives. + traversing all paths specified. Paths are added to the archive as they are given, + that means if relative paths are desired, the command has to be run from the correct + directory. + + When giving '-' as path, borg will read data from standard input and create a + file 'stdin' in the created archive from that data. + + The archive will consume almost no disk space for files or parts of files that + have already been stored in other archives. The archive name needs to be unique. It must not end in '.checkpoint' or '.checkpoint.N' (with N being a number), because these names are used for checkpoints and treated in special ways. - In the archive name, you may use the following format tags: - {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}, {uuid4} + In the archive name, you may use the following placeholders: + {now}, {utcnow}, {fqdn}, {hostname}, {user} and some others. - To speed up pulling backups over sshfs and similar network file systems which do - not provide correct inode information the --ignore-inode flag can be used. This - potentially decreases reliability of change detection, while avoiding always reading - all files on these file systems. + Backup speed is increased by not reprocessing files that are already part of + existing archives and weren't modified. The detection of unmodified files is + done by comparing multiple file metadata values with previous values kept in + the files cache. + + This comparison can operate in different modes as given by ``--files-cache``: + + - ctime,size,inode (default) + - mtime,size,inode (default behaviour of borg versions older than 1.1.0rc4) + - ctime,size (ignore the inode number) + - mtime,size (ignore the inode number) + - rechunk,ctime (all files are considered modified - rechunk, cache ctime) + - rechunk,mtime (all files are considered modified - rechunk, cache mtime) + - disabled (disable the files cache, all files considered modified - rechunk) + + inode number: better safety, but often unstable on network filesystems + + Normally, detecting file modifications will take inode information into + consideration to improve the reliability of file change detection. + This is problematic for files located on sshfs and similar network file + systems which do not provide stable inode numbers, such files will always + be considered modified. You can use modes without `inode` in this case to + improve performance, but reliability of change detection might be reduced. + + ctime vs. mtime: safety vs. speed + + - ctime is a rather safe way to detect changes to a file (metadata and contents) + as it can not be set from userspace. But, a metadata-only change will already + update the ctime, so there might be some unnecessary chunking/hashing even + without content changes. Some filesystems do not support ctime (change time). + - mtime usually works and only updates if file contents were changed. But mtime + can be arbitrarily set from userspace, e.g. to set mtime back to the same value + it had before a content change happened. This can be used maliciously as well as + well-meant, but in both cases mtime based cache modes can be problematic. + + The mount points of filesystems or filesystem snapshots should be the same for every + creation of a new archive to ensure fast operation. This is because the file cache that + is used to determine changed files quickly uses absolute filenames. + If this is not possible, consider creating a bind mount to a stable location. + + The ``--progress`` option shows (from left to right) Original, Compressed and Deduplicated + (O, C and D, respectively), then the Number of files (N) processed so far, followed by + the currently processed path. See the output of the "borg help patterns" command for more help on exclude patterns. See the output of the "borg help placeholders" command for more help on placeholders. + + .. man NOTES + + The ``--exclude`` patterns are not like tar. In tar ``--exclude`` .bundler/gems will + exclude foo/.bundler/gems. In borg it will not, you need to use ``--exclude`` + '\*/.bundler/gems' to get the same effect. See ``borg help patterns`` for + more information. + + In addition to using ``--exclude`` patterns, it is possible to use + ``--exclude-if-present`` to specify the name of a filesystem object (e.g. a file + or folder name) which, when contained within another folder, will prevent the + containing folder from being backed up. By default, the containing folder and + all of its contents will be omitted from the backup. If, however, you wish to + only include the objects specified by ``--exclude-if-present`` in your backup, + and not include any other contents of the containing folder, this can be enabled + through using the ``--keep-exclude-tags`` option. + + Borg respects the nodump flag. Files flagged nodump will be marked as excluded (x) + in ``--list`` output. + + Item flags + ++++++++++ + + ``--list`` outputs a list of all files, directories and other + file system items it considered (no matter whether they had content changes + or not). For each item, it prefixes a single-letter flag that indicates type + and/or status of the item. + + If you are interested only in a subset of that output, you can give e.g. + ``--filter=AME`` and it will only show regular files with A, M or E status (see + below). + + A uppercase character represents the status of a regular file relative to the + "files" cache (not relative to the repo -- this is an issue if the files cache + is not used). Metadata is stored in any case and for 'A' and 'M' also new data + chunks are stored. For 'U' all data chunks refer to already existing chunks. + + - 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ) + - 'M' = regular file, modified + - 'U' = regular file, unchanged + - 'E' = regular file, an error happened while accessing/reading *this* file + + A lowercase character means a file type other than a regular file, + borg usually just stores their metadata: + + - 'd' = directory + - 'b' = block device + - 'c' = char device + - 'h' = regular file, hardlink (to already seen inodes) + - 's' = symlink + - 'f' = fifo + + Other flags used include: + + - 'i' = backup data was read from standard input (stdin) + - '-' = dry run, item was *not* backed up + - 'x' = excluded, item was *not* backed up + - '?' = missing status code (if you see this, please file a bug report!) """) subparser = subparsers.add_parser('create', parents=[common_parser], add_help=False, @@ -1475,94 +2758,71 @@ class Archiver: help='create backup') subparser.set_defaults(func=self.do_create) - subparser.add_argument('-n', '--dry-run', dest='dry_run', - action='store_true', default=False, + subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', help='do not create a backup archive') - - subparser.add_argument('-s', '--stats', dest='stats', - action='store_true', default=False, + subparser.add_argument('-s', '--stats', dest='stats', action='store_true', help='print statistics for the created archive') - subparser.add_argument('-p', '--progress', dest='progress', - action='store_true', default=False, - help='show progress display while creating the archive, showing Original, ' - 'Compressed and Deduplicated sizes, followed by the Number of files seen ' - 'and the path being processed, default: %(default)s') - subparser.add_argument('--list', dest='output_list', - action='store_true', default=False, + subparser.add_argument('--list', dest='output_list', action='store_true', help='output verbose list of items (files, dirs, ...)') - subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS', - help='only display items with the given status characters') + subparser.add_argument('--filter', metavar='STATUSCHARS', dest='output_filter', + help='only display items with the given status characters (see description)') + subparser.add_argument('--json', action='store_true', + help='output stats as JSON. Implies ``--stats``.') + subparser.add_argument('--no-cache-sync', dest='no_cache_sync', action='store_true', + help='experimental: do not synchronize the cache. Implies not using the files cache.') + subparser.add_argument('--no-files-cache', dest='cache_files', action='store_false', + help='do not load/update the file metadata cache used to detect unchanged files') - exclude_group = subparser.add_argument_group('Exclusion options') - exclude_group.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', - metavar="PATTERN", help='exclude paths matching PATTERN') - exclude_group.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', - metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') - exclude_group.add_argument('--exclude-caches', dest='exclude_caches', - action='store_true', default=False, - help='exclude directories that contain a CACHEDIR.TAG file (' - 'http://www.brynosaurus.com/cachedir/spec.html)') - exclude_group.add_argument('--exclude-if-present', dest='exclude_if_present', - metavar='FILENAME', action='append', type=str, - help='exclude directories that contain the specified file') - exclude_group.add_argument('--keep-tag-files', dest='keep_tag_files', - action='store_true', default=False, - help='keep tag files of excluded caches/directories') + exclude_group = define_exclusion_group(subparser, tag_files=True) + exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true', + help='exclude files flagged NODUMP') fs_group = subparser.add_argument_group('Filesystem options') - fs_group.add_argument('-x', '--one-file-system', dest='one_file_system', - action='store_true', default=False, - help='stay in same file system, do not cross mount points') - fs_group.add_argument('--numeric-owner', dest='numeric_owner', - action='store_true', default=False, + fs_group.add_argument('-x', '--one-file-system', dest='one_file_system', action='store_true', + help='stay in the same file system and do not store mount points of other file systems') + fs_group.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', help='only store numeric user and group identifiers') - fs_group.add_argument('--ignore-inode', dest='ignore_inode', - action='store_true', default=False, + fs_group.add_argument('--noatime', dest='noatime', action='store_true', + help='do not store atime into archive') + fs_group.add_argument('--noctime', dest='noctime', action='store_true', + help='do not store ctime into archive') + fs_group.add_argument('--nobsdflags', dest='nobsdflags', action='store_true', + help='do not read and store bsdflags (e.g. NODUMP, IMMUTABLE) into archive') + fs_group.add_argument('--ignore-inode', dest='ignore_inode', action='store_true', help='ignore inode data in the file metadata cache used to detect unchanged files.') - fs_group.add_argument('--read-special', dest='read_special', - action='store_true', default=False, + fs_group.add_argument('--files-cache', metavar='MODE', dest='files_cache_mode', + type=FilesCacheMode, default=DEFAULT_FILES_CACHE_MODE_UI, + help='operate files cache in MODE. default: %s' % DEFAULT_FILES_CACHE_MODE_UI) + fs_group.add_argument('--read-special', dest='read_special', action='store_true', help='open and read block and char device files as well as FIFOs as if they were ' 'regular files. Also follows symlinks pointing to these kinds of files.') archive_group = subparser.add_argument_group('Archive options') archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default='', help='add a comment text to the archive') - archive_group.add_argument('--timestamp', dest='timestamp', + archive_group.add_argument('--timestamp', metavar='TIMESTAMP', dest='timestamp', type=timestamp, default=None, - metavar='yyyy-mm-ddThh:mm:ss', - help='manually specify the archive creation date/time (UTC). ' - 'alternatively, give a reference file/directory.') - archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', - type=int, default=1800, metavar='SECONDS', + help='manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). ' + 'Alternatively, give a reference file/directory.') + archive_group.add_argument('-c', '--checkpoint-interval', metavar='SECONDS', dest='checkpoint_interval', + type=int, default=1800, help='write checkpoint every SECONDS seconds (Default: 1800)') - archive_group.add_argument('--chunker-params', dest='chunker_params', + archive_group.add_argument('--chunker-params', metavar='PARAMS', dest='chunker_params', type=ChunkerParams, default=CHUNKER_PARAMS, - metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', - help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS) - archive_group.add_argument('-C', '--compression', dest='compression', - type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION', - help='select compression algorithm (and level):\n' - 'none == no compression (default),\n' - 'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n' - ' being any valid compression algorithm (and optional level),\n' - 'lz4 == lz4,\n' - 'zlib == zlib (default level 6),\n' - 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' - 'lzma == lzma (default level 6),\n' - 'lzma,0 .. lzma,9 == lzma (with level 0..9).') - archive_group.add_argument('--compression-from', dest='compression_files', - type=argparse.FileType('r'), action='append', - metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line') + help='specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, ' + 'HASH_MASK_BITS, HASH_WINDOW_SIZE). default: %d,%d,%d,%d' % CHUNKER_PARAMS) + archive_group.add_argument('-C', '--compression', metavar='COMPRESSION', dest='compression', + type=CompressionSpec, default=CompressionSpec('lz4'), + help='select compression algorithm, see the output of the ' + '"borg help compression" command for details.') subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), help='name of archive to create (must be also a valid directory name)') - subparser.add_argument('paths', metavar='PATH', nargs='+', type=str, + subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths to archive') - extract_epilog = textwrap.dedent(""" + extract_epilog = process_epilog(""" This command extracts the contents of an archive. By default the entire archive is extracted but a subset of files and directories can be selected by passing a list of ``PATHs`` as arguments. The file selection can further @@ -1573,6 +2833,14 @@ class Archiver: By using ``--dry-run``, you can do all extraction steps except actually writing the output data: reading metadata and data chunks from the repo, checking the hash/hmac, decrypting, decompressing. + + ``--progress`` can be slower than no progress display, since it makes one additional + pass over the archive metadata. + + .. note:: + + Currently, extract always writes into the current working directory ("."), + so make sure you ``cd`` to the right place before calling ``borg extract``. """) subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False, description=self.do_extract.__doc__, @@ -1580,41 +2848,85 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='extract archive contents') subparser.set_defaults(func=self.do_extract) - subparser.add_argument('--list', dest='output_list', - action='store_true', default=False, + subparser.add_argument('--list', dest='output_list', action='store_true', help='output verbose list of items (files, dirs, ...)') - subparser.add_argument('-n', '--dry-run', dest='dry_run', - default=False, action='store_true', + subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', help='do not actually change any files') - subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', - metavar="PATTERN", help='exclude paths matching PATTERN') - subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', - metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') - subparser.add_argument('--numeric-owner', dest='numeric_owner', - action='store_true', default=False, + subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', help='only obey numeric user and group identifiers') - subparser.add_argument('--strip-components', dest='strip_components', - type=int, default=0, metavar='NUMBER', - help='Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped.') - subparser.add_argument('--stdout', dest='stdout', - action='store_true', default=False, + subparser.add_argument('--nobsdflags', dest='nobsdflags', action='store_true', + help='do not extract/set bsdflags (e.g. NODUMP, IMMUTABLE)') + subparser.add_argument('--stdout', dest='stdout', action='store_true', help='write all extracted data to stdout') - subparser.add_argument('--sparse', dest='sparse', - action='store_true', default=False, + subparser.add_argument('--sparse', dest='sparse', action='store_true', help='create holes in output sparse file from all-zero chunks') subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to extract') subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths to extract; patterns are supported') + define_exclusion_group(subparser, strip_components=True) - diff_epilog = textwrap.dedent(""" - This command finds differences in files (contents, user, group, mode) between archives. + export_tar_epilog = process_epilog(""" + This command creates a tarball from an archive. - Both archives need to be in the same repository, and a repository location may only - be specified for ARCHIVE1. + When giving '-' as the output FILE, Borg will write a tar stream to standard output. + + By default (``--tar-filter=auto``) Borg will detect whether the FILE should be compressed + based on its file extension and pipe the tarball through an appropriate filter + before writing it to FILE: + + - .tar.gz: gzip + - .tar.bz2: bzip2 + - .tar.xz: xz + + Alternatively a ``--tar-filter`` program may be explicitly specified. It should + read the uncompressed tar stream from stdin and write a compressed/filtered + tar stream to stdout. + + The generated tarball uses the GNU tar format. + + export-tar is a lossy conversion: + BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported. + Timestamp resolution is limited to whole seconds, not the nanosecond resolution + otherwise supported by Borg. + + A ``--sparse`` option (as found in borg extract) is not supported. + + By default the entire archive is extracted but a subset of files and directories + can be selected by passing a list of ``PATHs`` as arguments. + The file selection can further be restricted by using the ``--exclude`` option. + + See the output of the "borg help patterns" command for more help on exclude patterns. + + ``--progress`` can be slower than no progress display, since it makes one additional + pass over the archive metadata. + """) + subparser = subparsers.add_parser('export-tar', parents=[common_parser], add_help=False, + description=self.do_export_tar.__doc__, + epilog=export_tar_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='create tarball from archive') + subparser.set_defaults(func=self.do_export_tar) + subparser.add_argument('--tar-filter', dest='tar_filter', default='auto', + help='filter program to pipe data through') + subparser.add_argument('--list', dest='output_list', action='store_true', + help='output verbose list of items (files, dirs, ...)') + subparser.add_argument('location', metavar='ARCHIVE', + type=location_validator(archive=True), + help='archive to export') + subparser.add_argument('tarfile', metavar='FILE', + help='output tar file. "-" to write to stdout instead.') + subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, + help='paths to extract; patterns are supported') + define_exclusion_group(subparser, strip_components=True) + + diff_epilog = process_epilog(""" + This command finds differences (file contents, user/group/mode) between archives. + + A repository location and an archive name must be specified for REPO_ARCHIVE1. + ARCHIVE2 is just another archive name in same repository (no repository location + allowed). For archives created with Borg 1.1 or newer diff automatically detects whether the archives are created with the same chunker params. If so, only chunk IDs @@ -1622,7 +2934,7 @@ class Archiver: For archives prior to Borg 1.1 chunk contents are compared by default. If you did not create the archives with different chunker params, - pass --same-chunker-params. + pass ``--same-chunker-params``. Note that the chunker params changed from Borg 0.xx to 1.0. See the output of the "borg help patterns" command for more help on exclude patterns. @@ -1633,31 +2945,23 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='find differences in archive contents') subparser.set_defaults(func=self.do_diff) - subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', - metavar="PATTERN", help='exclude paths matching PATTERN') - subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', - metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') - subparser.add_argument('--numeric-owner', dest='numeric_owner', - action='store_true', default=False, + subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', help='only consider numeric user and group identifiers') - subparser.add_argument('--same-chunker-params', dest='same_chunker_params', - action='store_true', default=False, + subparser.add_argument('--same-chunker-params', dest='same_chunker_params', action='store_true', help='Override check of chunker parameters.') - subparser.add_argument('--sort', dest='sort', - action='store_true', default=False, + subparser.add_argument('--sort', dest='sort', action='store_true', help='Sort the output lines by file path.') - subparser.add_argument('location', metavar='ARCHIVE1', + subparser.add_argument('location', metavar='REPO_ARCHIVE1', type=location_validator(archive=True), - help='archive') + help='repository location and ARCHIVE1 name') subparser.add_argument('archive2', metavar='ARCHIVE2', type=archivename_validator(), - help='archive to compare with ARCHIVE1 (no repository location)') + help='ARCHIVE2 name (no repository location allowed)') subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, - help='paths to compare; patterns are supported') + help='paths of items inside the archives to compare; patterns are supported') + define_exclusion_group(subparser, tag_files=True) - rename_epilog = textwrap.dedent(""" + rename_epilog = process_epilog(""" This command renames an archive in the repository. This results in a different archive ID. @@ -1675,7 +2979,7 @@ class Archiver: type=archivename_validator(), help='the new archive name to use') - delete_epilog = textwrap.dedent(""" + delete_epilog = process_epilog(""" This command deletes an archive from the repository or the complete repository. Disk space is reclaimed accordingly. If you delete the complete repository, the local cache for it (if any) is also deleted. @@ -1686,37 +2990,41 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='delete archive') subparser.set_defaults(func=self.do_delete) - subparser.add_argument('-p', '--progress', dest='progress', - action='store_true', default=False, - help="""show progress display while deleting a single archive""") - subparser.add_argument('-s', '--stats', dest='stats', - action='store_true', default=False, + subparser.add_argument('-s', '--stats', dest='stats', action='store_true', help='print statistics for the deleted archive') - subparser.add_argument('-c', '--cache-only', dest='cache_only', - action='store_true', default=False, + subparser.add_argument('--cache-only', dest='cache_only', action='store_true', help='delete only the local cache for the given repository') subparser.add_argument('--force', dest='forced', - action='store_true', default=False, - help='force deletion of corrupted archives') + action='count', default=0, + help='force deletion of corrupted archives, ' + 'use ``--force --force`` in case ``--force`` does not work.') subparser.add_argument('--save-space', dest='save_space', action='store_true', - default=False, help='work slower, but using less space') subparser.add_argument('location', metavar='TARGET', nargs='?', default='', type=location_validator(), help='archive or repository to delete') + subparser.add_argument('archives', metavar='ARCHIVE', nargs='*', + help='archives to delete') + define_archive_filters_group(subparser) - list_epilog = textwrap.dedent(""" + list_epilog = process_epilog(""" This command lists the contents of a repository or an archive. See the "borg help patterns" command for more help on exclude patterns. - The following keys are available for --format: + .. man NOTES + + The following keys are available for ``--format``: + + """) + BaseFormatter.keys_help() + textwrap.dedent(""" - -- Keys for listing repository archives: + Keys for listing repository archives: + """) + ArchiveFormatter.keys_help() + textwrap.dedent(""" - -- Keys for listing archive files: + Keys for listing archive files: + """) + ItemFormatter.keys_help() subparser = subparsers.add_parser('list', parents=[common_parser], add_help=False, description=self.do_list.__doc__, @@ -1724,27 +3032,32 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='list archive or repository contents') subparser.set_defaults(func=self.do_list) - subparser.add_argument('--short', dest='short', - action='store_true', default=False, + subparser.add_argument('--short', dest='short', action='store_true', help='only print file/directory names, nothing else') - subparser.add_argument('--format', '--list-format', dest='format', type=str, - help="""specify format for file listing - (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""") - subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec, - help='only consider archive names starting with this prefix') - subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', - metavar="PATTERN", help='exclude paths matching PATTERN') - subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', - metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') + subparser.add_argument('--format', '--list-format', metavar='FORMAT', dest='format', + help='specify format for file listing ' + '(default: "{mode} {user:6} {group:6} {size:8d} {mtime} {path}{extra}{NL}")') + subparser.add_argument('--json', action='store_true', + help='Only valid for listing repository contents. Format output as JSON. ' + 'The form of ``--format`` is ignored, ' + 'but keys used in it are added to the JSON output. ' + 'Some keys are always present. Note: JSON can only represent text. ' + 'A "barchive" key is therefore not available.') + subparser.add_argument('--json-lines', action='store_true', + help='Only valid for listing archive contents. Format output as JSON Lines. ' + 'The form of ``--format`` is ignored, ' + 'but keys used in it are added to the JSON output. ' + 'Some keys are always present. Note: JSON can only represent text. ' + 'A "bpath" key is therefore not available.') subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', type=location_validator(), help='repository/archive to list contents of') subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths to list; patterns are supported') + define_archive_filters_group(subparser) + define_exclusion_group(subparser, tag_files=True) - mount_epilog = textwrap.dedent(""" + mount_epilog = process_epilog(""" This command mounts an archive as a FUSE filesystem. This can be useful for browsing an archive or restoring individual files. Unless the ``--foreground`` option is given the command will run in the background until the filesystem @@ -1760,14 +3073,23 @@ class Archiver: For mount options, see the fuse(8) manual page. Additional mount options supported by borg: + - versions: when used with a repository mount, this gives a merged, versioned + view of the files in the archives. EXPERIMENTAL, layout may change in future. - allow_damaged_files: by default damaged files (where missing chunks were - replaced with runs of zeros by borg check --repair) are not readable and + replaced with runs of zeros by borg check ``--repair``) are not readable and return EIO (I/O error). Set this option to read such files. The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users to tweak the performance. It sets the number of cached data chunks; additional memory usage can be up to ~8 MiB times this number. The default is the number of CPU cores. + + When the daemonized process receives a signal or crashes, it does not unmount. + Unmounting in these cases could cause an active rsync or similar process + to unintentionally delete data. + + When running in the foreground ^C/SIGINT unmounts cleanly, but other + signals or crashes do not. """) subparser = subparsers.add_parser('mount', parents=[common_parser], add_help=False, description=self.do_mount.__doc__, @@ -1780,29 +3102,58 @@ class Archiver: subparser.add_argument('mountpoint', metavar='MOUNTPOINT', type=str, help='where to mount filesystem') subparser.add_argument('-f', '--foreground', dest='foreground', - action='store_true', default=False, + action='store_true', help='stay in foreground, do not daemonize') subparser.add_argument('-o', dest='options', type=str, help='Extra mount options') + define_archive_filters_group(subparser) - info_epilog = textwrap.dedent(""" - This command displays some detailed information about the specified archive. + umount_epilog = process_epilog(""" + This command un-mounts a FUSE filesystem that was mounted with ``borg mount``. - The "This archive" line refers exclusively to this archive: - "Deduplicated size" is the size of the unique chunks stored only for this - archive. Non-unique / common chunks show up under "All archives". + This is a convenience wrapper that just calls the platform-specific shell + command - usually this is either umount or fusermount -u. + """) + subparser = subparsers.add_parser('umount', parents=[common_parser], add_help=False, + description=self.do_umount.__doc__, + epilog=umount_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='umount repository') + subparser.set_defaults(func=self.do_umount) + subparser.add_argument('mountpoint', metavar='MOUNTPOINT', type=str, + help='mountpoint of the filesystem to umount') + + info_epilog = process_epilog(""" + This command displays detailed information about the specified archive or repository. + + Please note that the deduplicated sizes of the individual archives do not add + up to the deduplicated size of the repository ("all archives"), because the two + are meaning different things: + + This archive / deduplicated size = amount of data stored ONLY for this archive + = unique chunks of this archive. + All archives / deduplicated size = amount of data stored in the repo + = all chunks in the repository. + + Borg archives can only contain a limited amount of file metadata. + The size of an archive relative to this limit depends on a number of factors, + mainly the number of files, the lengths of paths and other metadata stored for files. + This is shown as *utilization of maximum supported archive size*. """) subparser = subparsers.add_parser('info', parents=[common_parser], add_help=False, description=self.do_info.__doc__, epilog=info_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, - help='show archive information') + help='show repository or archive information') subparser.set_defaults(func=self.do_info) - subparser.add_argument('location', metavar='ARCHIVE', - type=location_validator(archive=True), - help='archive to display information about') + subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', + type=location_validator(), + help='archive or repository to display information about') + subparser.add_argument('--json', action='store_true', + help='format output as JSON') + define_archive_filters_group(subparser) - break_lock_epilog = textwrap.dedent(""" + break_lock_epilog = process_epilog(""" This command breaks the repository and cache locks. Please use carefully and only while no borg process (on any machine) is trying to access the Cache or the Repository. @@ -1813,11 +3164,11 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='break repository and cache locks') subparser.set_defaults(func=self.do_break_lock) - subparser.add_argument('location', metavar='REPOSITORY', + subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='repository for which to break the locks') - prune_epilog = textwrap.dedent(""" + prune_epilog = process_epilog(""" The prune command prunes a repository by deleting all archives not matching any of the specified retention options. This command is normally used by automated backup scripts wanting to keep a certain number of historic backups. @@ -1825,25 +3176,27 @@ class Archiver: Also, prune automatically removes checkpoint archives (incomplete archives left behind by interrupted backup runs) except if the checkpoint is the latest archive (and thus still needed). Checkpoint archives are not considered when - comparing archive counts against the retention limits (--keep-*). + comparing archive counts against the retention limits (``--keep-X``). If a prefix is set with -P, then only archives that start with the prefix are considered for deletion and only those archives count towards the totals specified by the rules. Otherwise, *all* archives in the repository are candidates for deletion! + There is no automatic distinction between archives representing different + contents. These need to be distinguished by specifying matching prefixes. If you have multiple sequences of archives with different data sets (e.g. from different machines) in one shared repository, use one prune call per data set that matches only the respective archives using the -P option. - The "--keep-within" option takes an argument of the form "", - where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means + The ``--keep-within`` option takes an argument of the form "", + where char is "H", "d", "w", "m", "y". For example, ``--keep-within 2d`` means to keep all archives that were created within the past 48 hours. "1m" is taken to mean "31d". The archives kept with this option do not count towards the totals specified by any other options. A good procedure is to thin out more and more the older your backups get. - As an example, "--keep-daily 7" means to keep the latest backup on each day, + As an example, ``--keep-daily 7`` means to keep the latest backup on each day, up to 7 most recent days with backups (days without backups do not count). The rules are applied from secondly to yearly, and backups selected by previous rules do not count towards those of later rules. The time that each backup @@ -1851,7 +3204,7 @@ class Archiver: the local timezone, and weeks go from Monday to Sunday. Specifying a negative number of archives to keep means that there is no limit. - The "--keep-last N" option is doing the same as "--keep-secondly N" (and it will + The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will keep the last N archives under the assumption that you do not create more than one backup archive in the same second). """) @@ -1861,19 +3214,15 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='prune archives') subparser.set_defaults(func=self.do_prune) - subparser.add_argument('-n', '--dry-run', dest='dry_run', - default=False, action='store_true', + subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', help='do not change repository') - subparser.add_argument('--force', dest='forced', - action='store_true', default=False, + subparser.add_argument('--force', dest='forced', action='store_true', help='force pruning of corrupted archives') - subparser.add_argument('-s', '--stats', dest='stats', - action='store_true', default=False, + subparser.add_argument('-s', '--stats', dest='stats', action='store_true', help='print statistics for the deleted archive') - subparser.add_argument('--list', dest='output_list', - action='store_true', default=False, + subparser.add_argument('--list', dest='output_list', action='store_true', help='output verbose list of archives it keeps/prunes') - subparser.add_argument('--keep-within', dest='within', type=str, metavar='WITHIN', + subparser.add_argument('--keep-within', metavar='INTERVAL', dest='within', type=interval, help='keep all archives within this time interval') subparser.add_argument('--keep-last', '--keep-secondly', dest='secondly', type=int, default=0, help='number of secondly archives to keep') @@ -1889,24 +3238,66 @@ class Archiver: help='number of monthly archives to keep') subparser.add_argument('-y', '--keep-yearly', dest='yearly', type=int, default=0, help='number of yearly archives to keep') - subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec, - help='only consider archive names starting with this prefix') + define_archive_filters_group(subparser, sort_by=False, first_last=False) subparser.add_argument('--save-space', dest='save_space', action='store_true', - default=False, help='work slower, but using less space') subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='repository to prune') - upgrade_epilog = textwrap.dedent(""" - Upgrade an existing Borg repository. + upgrade_epilog = process_epilog(""" + Upgrade an existing, local Borg repository. + + When you do not need borg upgrade + +++++++++++++++++++++++++++++++++ + + Not every change requires that you run ``borg upgrade``. + + You do **not** need to run it when: + + - moving your repository to a different place + - upgrading to another point release (like 1.0.x to 1.0.y), + except when noted otherwise in the changelog + - upgrading from 1.0.x to 1.1.x, + except when noted otherwise in the changelog + + Borg 1.x.y upgrades + +++++++++++++++++++ + + Use ``borg upgrade --tam REPO`` to require manifest authentication + introduced with Borg 1.0.9 to address security issues. This means + that modifying the repository after doing this with a version prior + to 1.0.9 will raise a validation error, so only perform this upgrade + after updating all clients using the repository to 1.0.9 or newer. + + This upgrade should be done on each client for safety reasons. + + If a repository is accidentally modified with a pre-1.0.9 client after + this upgrade, use ``borg upgrade --tam --force REPO`` to remedy it. + + If you routinely do this you might not want to enable this upgrade + (which will leave you exposed to the security issue). You can + reverse the upgrade by issuing ``borg upgrade --disable-tam REPO``. + + See + https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability + for details. + + Attic and Borg 0.xx to Borg 1.x + +++++++++++++++++++++++++++++++ + This currently supports converting an Attic repository to Borg and also helps with converting Borg 0.xx to 1.0. Currently, only LOCAL repositories can be upgraded (issue #465). - It will change the magic strings in the repository's segments - to match the new Borg magic strings. The keyfiles found in + Please note that ``borg create`` (since 1.0.0) uses bigger chunks by + default than old borg or attic did, so the new chunks won't deduplicate + with the old chunks in the upgraded repository. + See ``--chunker-params`` option of ``borg create`` and ``borg recreate``. + + ``borg upgrade`` will change the magic strings in the repository's + segments to match the new Borg magic strings. The keyfiles found in $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.config/borg/keys. @@ -1925,7 +3316,7 @@ class Archiver: Unless ``--inplace`` is specified, the upgrade process first creates a backup copy of the repository, in - REPOSITORY.upgrade-DATETIME, using hardlinks. This takes + REPOSITORY.before-upgrade-DATETIME, using hardlinks. This takes longer than in place upgrades, but is much safer and gives progress information (as opposed to ``cp -al``). Once you are satisfied with the conversion, you can safely destroy the @@ -1942,59 +3333,58 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='upgrade repository format') subparser.set_defaults(func=self.do_upgrade) - subparser.add_argument('-p', '--progress', dest='progress', - action='store_true', default=False, - help="""show progress display while upgrading the repository""") - subparser.add_argument('-n', '--dry-run', dest='dry_run', - default=False, action='store_true', + subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', help='do not change repository') - subparser.add_argument('-i', '--inplace', dest='inplace', - default=False, action='store_true', - help="""rewrite repository in place, with no chance of going back to older - versions of the repository.""") + subparser.add_argument('--inplace', dest='inplace', action='store_true', + help='rewrite repository in place, with no chance of going back ' + 'to older versions of the repository.') + subparser.add_argument('--force', dest='force', action='store_true', + help='Force upgrade') + subparser.add_argument('--tam', dest='tam', action='store_true', + help='Enable manifest authentication (in key and cache) (Borg 1.0.9 and later).') + subparser.add_argument('--disable-tam', dest='disable_tam', action='store_true', + help='Disable manifest authentication (in key and cache).') subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='path to the repository to be upgraded') - recreate_epilog = textwrap.dedent(""" + recreate_epilog = process_epilog(""" Recreate the contents of existing archives. - --exclude, --exclude-from and PATH have the exact same semantics - as in "borg create". If PATHs are specified the resulting archive - will only contain files from these PATHs. + This is an *experimental* feature. Do *not* use this on your only backup. - --compression: all chunks seen will be stored using the given method. + ``--exclude``, ``--exclude-from``, ``--exclude-if-present``, ``--keep-exclude-tags``, and PATH + have the exact same semantics as in "borg create". If PATHs are specified the + resulting archive will only contain files from these PATHs. + + Note that all paths in an archive are relative, therefore absolute patterns/paths + will *not* match (``--exclude``, ``--exclude-from``, PATHs). + + ``--recompress`` allows to change the compression of existing data in archives. Due to how Borg stores compressed size information this might display incorrect information for archives that were not recreated at the same time. There is no risk of data loss by this. - --chunker-params will re-chunk all files in the archive, this can be + ``--chunker-params`` will re-chunk all files in the archive, this can be used to have upgraded Borg 0.xx or Attic archives deduplicate with Borg 1.x archives. - borg recreate is signal safe. Send either SIGINT (Ctrl-C on most terminals) or - SIGTERM to request termination. - - Use the *exact same* command line to resume the operation later - changing excludes - or paths will lead to inconsistencies (changed excludes will only apply to newly - processed files/dirs). Changing compression leads to incorrect size information - (which does not cause any data loss, but can be misleading). - Changing chunker params between invocations might lead to data loss. - - USE WITH CAUTION. + **USE WITH CAUTION.** Depending on the PATHs and patterns given, recreate can be used to permanently delete files from archives. - When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are + When in doubt, use ``--dry-run --verbose --list`` to see how patterns/PATHS are interpreted. The archive being recreated is only removed after the operation completes. The archive that is built during the operation exists at the same time at ".recreate". The new archive will have a different archive ID. + With ``--target`` the original archive is not replaced, instead a new archive is created. + When rechunking space usage can be substantial, expect at least the entire deduplicated size of the archives using the previous chunker params. - When recompressing approximately 1 % of the repository size or 512 MB - (whichever is greater) of additional space is used. + When recompressing expect approx. (throughput / checkpoint-interval) in space usage, + assuming all chunks are recompressed. """) subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False, description=self.do_recreate.__doc__, @@ -2002,65 +3392,46 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help=self.do_recreate.__doc__) subparser.set_defaults(func=self.do_recreate) - subparser.add_argument('--list', dest='output_list', - action='store_true', default=False, + subparser.add_argument('--list', dest='output_list', action='store_true', help='output verbose list of items (files, dirs, ...)') - subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS', - help='only display items with the given status characters') - subparser.add_argument('-p', '--progress', dest='progress', - action='store_true', default=False, - help='show progress display while recreating archives') - subparser.add_argument('-n', '--dry-run', dest='dry_run', - action='store_true', default=False, + subparser.add_argument('--filter', metavar='STATUSCHARS', dest='output_filter', + help='only display items with the given status characters (listed in borg create --help)') + subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', help='do not change anything') - subparser.add_argument('-s', '--stats', dest='stats', - action='store_true', default=False, + subparser.add_argument('-s', '--stats', dest='stats', action='store_true', help='print statistics at end') - exclude_group = subparser.add_argument_group('Exclusion options') - exclude_group.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', - metavar="PATTERN", help='exclude paths matching PATTERN') - exclude_group.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', - metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') - exclude_group.add_argument('--exclude-caches', dest='exclude_caches', - action='store_true', default=False, - help='exclude directories that contain a CACHEDIR.TAG file (' - 'http://www.brynosaurus.com/cachedir/spec.html)') - exclude_group.add_argument('--exclude-if-present', dest='exclude_if_present', - metavar='FILENAME', action='append', type=str, - help='exclude directories that contain the specified file') - exclude_group.add_argument('--keep-tag-files', dest='keep_tag_files', - action='store_true', default=False, - help='keep tag files of excluded caches/directories') + define_exclusion_group(subparser, tag_files=True) archive_group = subparser.add_argument_group('Archive options') + archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None, + type=archivename_validator(), + help='create a new archive with the name ARCHIVE, do not replace existing archive ' + '(only applies for a single archive)') + archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', + type=int, default=1800, metavar='SECONDS', + help='write checkpoint every SECONDS seconds (Default: 1800)') archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default=None, help='add a comment text to the archive') - archive_group.add_argument('--timestamp', dest='timestamp', + archive_group.add_argument('--timestamp', metavar='TIMESTAMP', dest='timestamp', type=timestamp, default=None, - metavar='yyyy-mm-ddThh:mm:ss', - help='manually specify the archive creation date/time (UTC). ' + help='manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). ' 'alternatively, give a reference file/directory.') - archive_group.add_argument('-C', '--compression', dest='compression', - type=CompressionSpec, default=None, metavar='COMPRESSION', - help='select compression algorithm (and level):\n' - 'none == no compression (default),\n' - 'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n' - ' being any valid compression algorithm (and optional level),\n' - 'lz4 == lz4,\n' - 'zlib == zlib (default level 6),\n' - 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' - 'lzma == lzma (default level 6),\n' - 'lzma,0 .. lzma,9 == lzma (with level 0..9).') - archive_group.add_argument('--compression-from', dest='compression_files', - type=argparse.FileType('r'), action='append', - metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line') - archive_group.add_argument('--chunker-params', dest='chunker_params', - type=ChunkerParams, default=None, - metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', - help='specify the chunker parameters (or "default").') + archive_group.add_argument('-C', '--compression', metavar='COMPRESSION', dest='compression', + type=CompressionSpec, default=CompressionSpec('lz4'), + help='select compression algorithm, see the output of the ' + '"borg help compression" command for details.') + archive_group.add_argument('--recompress', dest='recompress', nargs='?', default='never', const='if-different', + choices=('never', 'if-different', 'always'), + help='recompress data chunks according to ``--compression`` if `if-different`. ' + 'When `always`, chunks that are already compressed that way are not skipped, ' + 'but compressed again. Only the algorithm is considered for `if-different`, ' + 'not the compression level (if any).') + archive_group.add_argument('--chunker-params', metavar='PARAMS', dest='chunker_params', + type=ChunkerParams, default=CHUNKER_PARAMS, + help='specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, ' + 'HASH_MASK_BITS, HASH_WINDOW_SIZE) or `default` to use the current defaults. ' + 'default: %d,%d,%d,%d' % CHUNKER_PARAMS) subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', type=location_validator(), @@ -2068,7 +3439,7 @@ class Archiver: subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths to recreate; patterns are supported') - with_lock_epilog = textwrap.dedent(""" + with_lock_epilog = process_epilog(""" This command runs a user-specified command while the repository lock is held. It will first try to acquire the lock (make sure that no other operation is @@ -2076,9 +3447,12 @@ class Archiver: for its termination, release the lock and return the user command's return code as borg's return code. - Note: if you copy a repository with the lock held, the lock will be present in - the copy, obviously. Thus, before using borg on the copy, you need to - use "borg break-lock" on it. + .. note:: + + If you copy a repository with the lock held, the lock will be present in + the copy. Thus, before using borg on the copy from a different host, + you need to use "borg break-lock" on the copied repository, because + Borg is cautious and does not automatically remove stale locks made by a different host. """) subparser = subparsers.add_parser('with-lock', parents=[common_parser], add_help=False, description=self.do_with_lock.__doc__, @@ -2096,18 +3470,45 @@ class Archiver: subparser = subparsers.add_parser('help', parents=[common_parser], add_help=False, description='Extra help') - subparser.add_argument('--epilog-only', dest='epilog_only', - action='store_true', default=False) - subparser.add_argument('--usage-only', dest='usage_only', - action='store_true', default=False) + subparser.add_argument('--epilog-only', dest='epilog_only', action='store_true') + subparser.add_argument('--usage-only', dest='usage_only', action='store_true') subparser.set_defaults(func=functools.partial(self.do_help, parser, subparsers.choices)) subparser.add_argument('topic', metavar='TOPIC', type=str, nargs='?', help='additional help on TOPIC') - debug_dump_archive_items_epilog = textwrap.dedent(""" + debug_epilog = process_epilog(""" + These commands are not intended for normal use and potentially very + dangerous if used incorrectly. + + They exist to improve debugging capabilities without direct system access, e.g. + in case you ever run into some severe malfunction. Use them only if you know + what you are doing or if a trusted developer tells you what to do.""") + + subparser = subparsers.add_parser('debug', parents=[mid_common_parser], add_help=False, + description='debugging command (not intended for normal use)', + epilog=debug_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='debugging command (not intended for normal use)') + + debug_parsers = subparser.add_subparsers(title='required arguments', metavar='') + subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser)) + + debug_info_epilog = process_epilog(""" + This command displays some system information that might be useful for bug + reports and debugging problems. If a traceback happens, this information is + already appended at the end of the traceback. + """) + subparser = debug_parsers.add_parser('info', parents=[common_parser], add_help=False, + description=self.do_debug_info.__doc__, + epilog=debug_info_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='show system infos for debugging / bug reports (debug)') + subparser.set_defaults(func=self.do_debug_info) + + debug_dump_archive_items_epilog = process_epilog(""" This command dumps raw (but decrypted and decompressed) archive items (only metadata) to files. """) - subparser = subparsers.add_parser('debug-dump-archive-items', parents=[common_parser], add_help=False, + subparser = debug_parsers.add_parser('dump-archive-items', parents=[common_parser], add_help=False, description=self.do_debug_dump_archive_items.__doc__, epilog=debug_dump_archive_items_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, @@ -2117,10 +3518,53 @@ class Archiver: type=location_validator(archive=True), help='archive to dump') - debug_get_obj_epilog = textwrap.dedent(""" + debug_dump_archive_epilog = process_epilog(""" + This command dumps all metadata of an archive in a decoded form to a file. + """) + subparser = debug_parsers.add_parser('dump-archive', parents=[common_parser], add_help=False, + description=self.do_debug_dump_archive.__doc__, + epilog=debug_dump_archive_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='dump decoded archive metadata (debug)') + subparser.set_defaults(func=self.do_debug_dump_archive) + subparser.add_argument('location', metavar='ARCHIVE', + type=location_validator(archive=True), + help='archive to dump') + subparser.add_argument('path', metavar='PATH', type=str, + help='file to dump data into') + + debug_dump_manifest_epilog = process_epilog(""" + This command dumps manifest metadata of a repository in a decoded form to a file. + """) + subparser = debug_parsers.add_parser('dump-manifest', parents=[common_parser], add_help=False, + description=self.do_debug_dump_manifest.__doc__, + epilog=debug_dump_manifest_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='dump decoded repository metadata (debug)') + subparser.set_defaults(func=self.do_debug_dump_manifest) + subparser.add_argument('location', metavar='REPOSITORY', + type=location_validator(archive=False), + help='repository to dump') + subparser.add_argument('path', metavar='PATH', type=str, + help='file to dump data into') + + debug_dump_repo_objs_epilog = process_epilog(""" + This command dumps raw (but decrypted and decompressed) repo objects to files. + """) + subparser = debug_parsers.add_parser('dump-repo-objs', parents=[common_parser], add_help=False, + description=self.do_debug_dump_repo_objs.__doc__, + epilog=debug_dump_repo_objs_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='dump repo objects (debug)') + subparser.set_defaults(func=self.do_debug_dump_repo_objs) + subparser.add_argument('location', metavar='REPOSITORY', + type=location_validator(archive=False), + help='repo to dump') + + debug_get_obj_epilog = process_epilog(""" This command gets an object from the repository. """) - subparser = subparsers.add_parser('debug-get-obj', parents=[common_parser], add_help=False, + subparser = debug_parsers.add_parser('get-obj', parents=[common_parser], add_help=False, description=self.do_debug_get_obj.__doc__, epilog=debug_get_obj_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, @@ -2134,10 +3578,10 @@ class Archiver: subparser.add_argument('path', metavar='PATH', type=str, help='file to write object data into') - debug_put_obj_epilog = textwrap.dedent(""" + debug_put_obj_epilog = process_epilog(""" This command puts objects into the repository. """) - subparser = subparsers.add_parser('debug-put-obj', parents=[common_parser], add_help=False, + subparser = debug_parsers.add_parser('put-obj', parents=[common_parser], add_help=False, description=self.do_debug_put_obj.__doc__, epilog=debug_put_obj_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, @@ -2149,10 +3593,10 @@ class Archiver: subparser.add_argument('paths', metavar='PATH', nargs='+', type=str, help='file(s) to read and create object(s) from') - debug_delete_obj_epilog = textwrap.dedent(""" + debug_delete_obj_epilog = process_epilog(""" This command deletes objects from the repository. """) - subparser = subparsers.add_parser('debug-delete-obj', parents=[common_parser], add_help=False, + subparser = debug_parsers.add_parser('delete-obj', parents=[common_parser], add_help=False, description=self.do_debug_delete_obj.__doc__, epilog=debug_delete_obj_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, @@ -2163,6 +3607,99 @@ class Archiver: help='repository to use') subparser.add_argument('ids', metavar='IDs', nargs='+', type=str, help='hex object ID(s) to delete from the repo') + + debug_refcount_obj_epilog = process_epilog(""" + This command displays the reference count for objects from the repository. + """) + subparser = debug_parsers.add_parser('refcount-obj', parents=[common_parser], add_help=False, + description=self.do_debug_refcount_obj.__doc__, + epilog=debug_refcount_obj_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='show refcount for object from repository (debug)') + subparser.set_defaults(func=self.do_debug_refcount_obj) + subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False), + help='repository to use') + subparser.add_argument('ids', metavar='IDs', nargs='+', type=str, + help='hex object ID(s) to show refcounts for') + + debug_convert_profile_epilog = process_epilog(""" + Convert a Borg profile to a Python cProfile compatible profile. + """) + subparser = debug_parsers.add_parser('convert-profile', parents=[common_parser], add_help=False, + description=self.do_debug_convert_profile.__doc__, + epilog=debug_convert_profile_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='convert Borg profile to Python profile (debug)') + subparser.set_defaults(func=self.do_debug_convert_profile) + subparser.add_argument('input', metavar='INPUT', type=argparse.FileType('rb'), + help='Borg profile') + subparser.add_argument('output', metavar='OUTPUT', type=argparse.FileType('wb'), + help='Output file') + + benchmark_epilog = process_epilog("These commands do various benchmarks.") + + subparser = subparsers.add_parser('benchmark', parents=[mid_common_parser], add_help=False, + description='benchmark command', + epilog=benchmark_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='benchmark command') + + benchmark_parsers = subparser.add_subparsers(title='required arguments', metavar='') + subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser)) + + bench_crud_epilog = process_epilog(""" + This command benchmarks borg CRUD (create, read, update, delete) operations. + + It creates input data below the given PATH and backups this data into the given REPO. + The REPO must already exist (it could be a fresh empty repo or an existing repo, the + command will create / read / update / delete some archives named borg-test-data\* there. + + Make sure you have free space there, you'll need about 1GB each (+ overhead). + + If your repository is encrypted and borg needs a passphrase to unlock the key, use: + + BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH + + Measurements are done with different input file sizes and counts. + The file contents are very artificial (either all zero or all random), + thus the measurement results do not necessarily reflect performance with real data. + Also, due to the kind of content used, no compression is used in these benchmarks. + + C- == borg create (1st archive creation, no compression, do not use files cache) + C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher. + C-R- == random files. no dedup, measuring throughput through all processing stages. + + R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk) + R-Z- == all zero files. Measuring heavily duplicated files. + R-R- == random files. No duplication here, measuring throughput through all processing + stages, except writing to disk. + + U- == borg create (2nd archive creation of unchanged input files, measure files cache speed) + The throughput value is kind of virtual here, it does not actually read the file. + U-Z- == needs to check the 2 all-zero chunks' existence in the repo. + U-R- == needs to check existence of a lot of different chunks in the repo. + + D- == borg delete archive (delete last remaining archive, measure deletion + compaction) + D-Z- == few chunks to delete / few segments to compact/remove. + D-R- == many chunks to delete / many segments to compact/remove. + + Please note that there might be quite some variance in these measurements. + Try multiple measurements and having a otherwise idle machine (and network, if you use it). + """) + subparser = benchmark_parsers.add_parser('crud', parents=[common_parser], add_help=False, + description=self.do_benchmark_crud.__doc__, + epilog=bench_crud_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='benchmarks borg CRUD (create, extract, update, delete).') + subparser.set_defaults(func=self.do_benchmark_crud) + + subparser.add_argument('location', metavar='REPO', + type=location_validator(archive=False), + help='repo to use for benchmark (must exist)') + + subparser.add_argument('path', metavar='PATH', help='path were to create benchmark input data') + return parser def get_args(self, argv, cmd): @@ -2171,6 +3708,9 @@ class Archiver: if cmd is not None and result.func == self.do_serve: forced_result = result argv = shlex.split(cmd) + # Drop environment variables (do *not* interpret them) before trying to parse + # the borg command line. + argv = list(itertools.dropwhile(lambda arg: '=' in arg, argv)) result = self.parse_args(argv[1:]) if result.func != forced_result.func: # someone is trying to execute a different borg subcommand, don't do that! @@ -2184,11 +3724,17 @@ class Archiver: # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: args = self.preprocess_args(args) - args = self.parser.parse_args(args or ['-h']) - update_excludes(args) + parser = self.build_parser() + args = parser.parse_args(args or ['-h']) + parser.common_options.resolve(args) + func = get_func(args) + if func == self.do_create and not args.paths: + # need at least 1 path but args.paths may also be populated from patterns + parser.error('Need at least one PATH argument.') return args def prerun_checks(self, logger): + check_python() check_extension_modules() if sys.platform != 'win32': selftest(logger) @@ -2197,69 +3743,99 @@ class Archiver: """ turn on INFO level logging for args that imply that they will produce output """ # map of option name to name of logger for that option option_logger = { - 'output_list': 'borg.output.list', - 'show_version': 'borg.output.show-version', - 'show_rc': 'borg.output.show-rc', - 'stats': 'borg.output.stats', - 'progress': 'borg.output.progress', - } + 'output_list': 'borg.output.list', + 'show_version': 'borg.output.show-version', + 'show_rc': 'borg.output.show-rc', + 'stats': 'borg.output.stats', + 'progress': 'borg.output.progress', + } for option, logger_name in option_logger.items(): - if args.get(option, False): - logging.getLogger(logger_name).setLevel('INFO') + option_set = args.get(option, False) + logging.getLogger(logger_name).setLevel('INFO' if option_set else 'WARN') + + def _setup_topic_debugging(self, args): + """Turn on DEBUG level logging for specified --debug-topics.""" + for topic in args.debug_topics: + if '.' not in topic: + topic = 'borg.debug.' + topic + logger.debug('Enabling debug topic %s', topic) + logging.getLogger(topic).setLevel('DEBUG') def run(self, args): os.umask(args.umask) # early, before opening files self.lock_wait = args.lock_wait - setup_logging(level=args.log_level, is_serve=args.func == self.do_serve) # do not use loggers before this! + func = get_func(args) + # do not use loggers before this! + is_serve = func == self.do_serve + setup_logging(level=args.log_level, is_serve=is_serve, json=args.log_json) + self.log_json = args.log_json + args.progress |= is_serve self._setup_implied_logging(vars(args)) + self._setup_topic_debugging(args) if args.show_version: logging.getLogger('borg.output.show-version').info('borgbackup version %s' % __version__) self.prerun_checks(logger) if is_slow_msgpack(): logger.warning("Using a pure-python msgpack! This will result in lower performance.") - return args.func(args) + if args.debug_profile: + # Import only when needed - avoids a further increase in startup time + import cProfile + import marshal + logger.debug('Writing execution profile to %s', args.debug_profile) + # Open the file early, before running the main program, to avoid + # a very late crash in case the specified path is invalid. + with open(args.debug_profile, 'wb') as fd: + profiler = cProfile.Profile() + variables = dict(locals()) + profiler.enable() + try: + return set_ec(func(args)) + finally: + profiler.disable() + profiler.snapshot_stats() + if args.debug_profile.endswith('.pyprof'): + marshal.dump(profiler.stats, fd) + else: + # We use msgpack here instead of the marshal module used by cProfile itself, + # because the latter is insecure. Since these files may be shared over the + # internet we don't want a format that is impossible to interpret outside + # an insecure implementation. + # See scripts/msgpack2marshal.py for a small script that turns a msgpack file + # into a marshal file that can be read by e.g. pyprof2calltree. + # For local use it's unnecessary hassle, though, that's why .pyprof makes + # it compatible (see above). + msgpack.pack(profiler.stats, fd, use_bin_type=True) + else: + return set_ec(func(args)) -def sig_info_handler(signum, stack): # pragma: no cover +def sig_info_handler(sig_no, stack): # pragma: no cover """search the stack for infos about the currently processed file and print them""" - for frame in inspect.getouterframes(stack): - func, loc = frame[3], frame[0].f_locals - if func in ('process_file', '_process', ): # create op - path = loc['path'] - try: - pos = loc['fd'].tell() - total = loc['st'].st_size - except Exception: - pos, total = 0, 0 - logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total))) - break - if func in ('extract_item', ): # extract op - path = loc['item'].path - try: - pos = loc['fd'].tell() - except Exception: - pos = 0 - logger.info("{0} {1}/???".format(path, format_file_size(pos))) - break + with signal_handler(sig_no, signal.SIG_IGN): + for frame in inspect.getouterframes(stack): + func, loc = frame[3], frame[0].f_locals + if func in ('process_file', '_process', ): # create op + path = loc['path'] + try: + pos = loc['fd'].tell() + total = loc['st'].st_size + except Exception: + pos, total = 0, 0 + logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total))) + break + if func in ('extract_item', ): # extract op + path = loc['item'].path + try: + pos = loc['fd'].tell() + except Exception: + pos = 0 + logger.info("{0} {1}/???".format(path, format_file_size(pos))) + break -class SIGTERMReceived(BaseException): - pass - - -def sig_term_handler(signum, stack): - raise SIGTERMReceived - - -def setup_signal_handlers(): # pragma: no cover - sigs = [] - if hasattr(signal, 'SIGUSR1'): - sigs.append(signal.SIGUSR1) # kill -USR1 pid - if hasattr(signal, 'SIGINFO'): - sigs.append(signal.SIGINFO) # kill -INFO pid (or ctrl-t) - for sig in sigs: - signal.signal(sig, sig_info_handler) - signal.signal(signal.SIGTERM, sig_term_handler) +def sig_trace_handler(sig_no, stack): # pragma: no cover + print('\nReceived SIGUSR2 at %s, dumping trace...' % datetime.now().replace(microsecond=0), file=sys.stderr) + faulthandler.dump_traceback() def main(): # pragma: no cover @@ -2269,53 +3845,94 @@ def main(): # pragma: no cover # Make sure stdout and stderr have errors='replace' to avoid unicode # issues when print()-ing unicode file names - sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True) - sys.stderr = io.TextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True) - setup_signal_handlers() - archiver = Archiver() - msg = None - try: - args = archiver.get_args(sys.argv, os.environ.get('SSH_ORIGINAL_COMMAND')) - except Error as e: - msg = e.get_message() - if e.traceback: - msg += "\n%s\n%s" % (traceback.format_exc(), sysinfo()) - # we might not have logging setup yet, so get out quickly - print(msg, file=sys.stderr) - sys.exit(e.exit_code) - try: - exit_code = archiver.run(args) - except Error as e: - msg = e.get_message() - if e.traceback: - msg += "\n%s\n%s" % (traceback.format_exc(), sysinfo()) - exit_code = e.exit_code - except RemoteRepository.RPCError as e: - msg = '%s\n%s' % (str(e), sysinfo()) - exit_code = EXIT_ERROR - except Exception: - msg = 'Local Exception.\n%s\n%s' % (traceback.format_exc(), sysinfo()) - exit_code = EXIT_ERROR - except KeyboardInterrupt: - msg = 'Keyboard interrupt.\n%s\n%s' % (traceback.format_exc(), sysinfo()) - exit_code = EXIT_ERROR - except SIGTERMReceived: - msg = 'Received SIGTERM.' - exit_code = EXIT_ERROR - if msg: - logger.error(msg) - if args.show_rc: - rc_logger = logging.getLogger('borg.output.show-rc') - exit_msg = 'terminating with %s status, rc %d' - if exit_code == EXIT_SUCCESS: - rc_logger.info(exit_msg % ('success', exit_code)) - elif exit_code == EXIT_WARNING: - rc_logger.warning(exit_msg % ('warning', exit_code)) - elif exit_code == EXIT_ERROR: - rc_logger.error(exit_msg % ('error', exit_code)) - else: - rc_logger.error(exit_msg % ('abnormal', exit_code or 666)) - sys.exit(exit_code) + sys.stdout = ErrorIgnoringTextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True) + sys.stderr = ErrorIgnoringTextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True) + + # If we receive SIGINT (ctrl-c), SIGTERM (kill) or SIGHUP (kill -HUP), + # catch them and raise a proper exception that can be handled for an + # orderly exit. + # SIGHUP is important especially for systemd systems, where logind + # sends it when a session exits, in addition to any traditional use. + # Output some info if we receive SIGUSR1 or SIGINFO (ctrl-t). + + # Register fault handler for SIGSEGV, SIGFPE, SIGABRT, SIGBUS and SIGILL. + faulthandler.enable() + with signal_handler('SIGINT', raising_signal_handler(KeyboardInterrupt)), \ + signal_handler('SIGHUP', raising_signal_handler(SigHup)), \ + signal_handler('SIGTERM', raising_signal_handler(SigTerm)), \ + signal_handler('SIGUSR1', sig_info_handler), \ + signal_handler('SIGUSR2', sig_trace_handler), \ + signal_handler('SIGINFO', sig_info_handler): + archiver = Archiver() + msg = msgid = tb = None + tb_log_level = logging.ERROR + try: + args = archiver.get_args(sys.argv, os.environ.get('SSH_ORIGINAL_COMMAND')) + except Error as e: + msg = e.get_message() + tb_log_level = logging.ERROR if e.traceback else logging.DEBUG + tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) + # we might not have logging setup yet, so get out quickly + print(msg, file=sys.stderr) + if tb_log_level == logging.ERROR: + print(tb, file=sys.stderr) + sys.exit(e.exit_code) + try: + exit_code = archiver.run(args) + except Error as e: + msg = e.get_message() + msgid = type(e).__qualname__ + tb_log_level = logging.ERROR if e.traceback else logging.DEBUG + tb = "%s\n%s" % (traceback.format_exc(), sysinfo()) + exit_code = e.exit_code + except RemoteRepository.RPCError as e: + important = e.exception_class not in ('LockTimeout', ) and e.traceback + msgid = e.exception_class + tb_log_level = logging.ERROR if important else logging.DEBUG + if important: + msg = e.exception_full + else: + msg = e.get_message() + tb = '\n'.join('Borg server: ' + l for l in e.sysinfo.splitlines()) + tb += "\n" + sysinfo() + exit_code = EXIT_ERROR + except Exception: + msg = 'Local Exception' + msgid = 'Exception' + tb_log_level = logging.ERROR + tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) + exit_code = EXIT_ERROR + except KeyboardInterrupt: + msg = 'Keyboard interrupt' + tb_log_level = logging.DEBUG + tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) + exit_code = EXIT_ERROR + except SigTerm: + msg = 'Received SIGTERM' + msgid = 'Signal.SIGTERM' + tb_log_level = logging.DEBUG + tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) + exit_code = EXIT_ERROR + except SigHup: + msg = 'Received SIGHUP.' + msgid = 'Signal.SIGHUP' + exit_code = EXIT_ERROR + if msg: + logger.error(msg, msgid=msgid) + if tb: + logger.log(tb_log_level, tb) + if args.show_rc: + rc_logger = logging.getLogger('borg.output.show-rc') + exit_msg = 'terminating with %s status, rc %d' + if exit_code == EXIT_SUCCESS: + rc_logger.info(exit_msg % ('success', exit_code)) + elif exit_code == EXIT_WARNING: + rc_logger.warning(exit_msg % ('warning', exit_code)) + elif exit_code == EXIT_ERROR: + rc_logger.error(exit_msg % ('error', exit_code)) + else: + rc_logger.error(exit_msg % ('abnormal', exit_code or 666)) + sys.exit(exit_code) if __name__ == '__main__': diff --git a/src/borg/cache.py b/src/borg/cache.py index 4dc4c218..e26e183a 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -1,35 +1,332 @@ import configparser import os -import stat import shutil +import stat from binascii import unhexlify from collections import namedtuple +from time import perf_counter import msgpack from .logger import create_logger + logger = create_logger() -from .hashindex import ChunkIndex, ChunkIndexEntry +from .constants import CACHE_README, DEFAULT_FILES_CACHE_MODE +from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer +from .helpers import Location from .helpers import Error -from .helpers import get_cache_dir -from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex +from .helpers import Manifest +from .helpers import get_cache_dir, get_security_dir +from .helpers import int_to_bigint, bigint_to_int, bin_to_hex, parse_stringified_list from .helpers import format_file_size -from .helpers import yes -from .item import Item -from .key import PlaintextKey -from .locking import UpgradableLock +from .helpers import safe_ns +from .helpers import yes, hostname_is_unique +from .helpers import remove_surrogates +from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage +from .helpers import set_ec, EXIT_WARNING +from .helpers import truncate_and_unlink +from .item import ArchiveItem, ChunkListEntry +from .crypto.key import PlaintextKey +from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError +from .locking import Lock +from .platform import SaveFile from .remote import cache_if_remote +from .repository import LIST_SCAN_LIMIT -ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize') -FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids') +# note: cmtime might me either a ctime or a mtime timestamp +FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size cmtime chunk_ids') + + +class SecurityManager: + """ + Tracks repositories. Ensures that nothing bad happens (repository swaps, + replay attacks, unknown repositories etc.). + + This is complicated by the Cache being initially used for this, while + only some commands actually use the Cache, which meant that other commands + did not perform these checks. + + Further complications were created by the Cache being a cache, so it + could be legitimately deleted, which is annoying because Borg didn't + recognize repositories after that. + + Therefore a second location, the security database (see get_security_dir), + was introduced which stores this information. However, this means that + the code has to deal with a cache existing but no security DB entry, + or inconsistencies between the security DB and the cache which have to + be reconciled, and also with no cache existing but a security DB entry. + """ + + def __init__(self, repository): + self.repository = repository + self.dir = get_security_dir(repository.id_str) + self.cache_dir = cache_dir(repository) + self.key_type_file = os.path.join(self.dir, 'key-type') + self.location_file = os.path.join(self.dir, 'location') + self.manifest_ts_file = os.path.join(self.dir, 'manifest-timestamp') + + def known(self): + return os.path.exists(self.key_type_file) + + def key_matches(self, key): + if not self.known(): + return False + try: + with open(self.key_type_file, 'r') as fd: + type = fd.read() + return type == str(key.TYPE) + except OSError as exc: + logger.warning('Could not read/parse key type file: %s', exc) + + def save(self, manifest, key): + logger.debug('security: saving state for %s to %s', self.repository.id_str, self.dir) + current_location = self.repository._location.canonical_path() + logger.debug('security: current location %s', current_location) + logger.debug('security: key type %s', str(key.TYPE)) + logger.debug('security: manifest timestamp %s', manifest.timestamp) + with SaveFile(self.location_file) as fd: + fd.write(current_location) + with SaveFile(self.key_type_file) as fd: + fd.write(str(key.TYPE)) + with SaveFile(self.manifest_ts_file) as fd: + fd.write(manifest.timestamp) + + def assert_location_matches(self, cache_config=None): + # Warn user before sending data to a relocated repository + try: + with open(self.location_file) as fd: + previous_location = fd.read() + logger.debug('security: read previous location %r', previous_location) + except FileNotFoundError: + logger.debug('security: previous location file %s not found', self.location_file) + previous_location = None + except OSError as exc: + logger.warning('Could not read previous location file: %s', exc) + previous_location = None + if cache_config and cache_config.previous_location and previous_location != cache_config.previous_location: + # Reconcile cache and security dir; we take the cache location. + previous_location = cache_config.previous_location + logger.debug('security: using previous_location of cache: %r', previous_location) + + repository_location = self.repository._location.canonical_path() + if previous_location and previous_location != repository_location: + msg = ("Warning: The repository at location {} was previously located at {}\n".format( + repository_location, previous_location) + + "Do you want to continue? [yN] ") + if not yes(msg, false_msg="Aborting.", invalid_msg="Invalid answer, aborting.", + retry=False, env_var_override='BORG_RELOCATED_REPO_ACCESS_IS_OK'): + raise Cache.RepositoryAccessAborted() + # adapt on-disk config immediately if the new location was accepted + logger.debug('security: updating location stored in cache and security dir') + with SaveFile(self.location_file) as fd: + fd.write(repository_location) + if cache_config: + cache_config.save() + + def assert_no_manifest_replay(self, manifest, key, cache_config=None): + try: + with open(self.manifest_ts_file) as fd: + timestamp = fd.read() + logger.debug('security: read manifest timestamp %r', timestamp) + except FileNotFoundError: + logger.debug('security: manifest timestamp file %s not found', self.manifest_ts_file) + timestamp = '' + except OSError as exc: + logger.warning('Could not read previous location file: %s', exc) + timestamp = '' + if cache_config: + timestamp = max(timestamp, cache_config.timestamp or '') + logger.debug('security: determined newest manifest timestamp as %s', timestamp) + # If repository is older than the cache or security dir something fishy is going on + if timestamp and timestamp > manifest.timestamp: + if isinstance(key, PlaintextKey): + raise Cache.RepositoryIDNotUnique() + else: + raise Cache.RepositoryReplay() + + def assert_key_type(self, key, cache_config=None): + # Make sure an encrypted repository has not been swapped for an unencrypted repository + if cache_config and cache_config.key_type is not None and cache_config.key_type != str(key.TYPE): + raise Cache.EncryptionMethodMismatch() + if self.known() and not self.key_matches(key): + raise Cache.EncryptionMethodMismatch() + + def assert_secure(self, manifest, key, *, cache_config=None, warn_if_unencrypted=True): + # warn_if_unencrypted=False is only used for initializing a new repository. + # Thus, avoiding asking about a repository that's currently initializing. + self.assert_access_unknown(warn_if_unencrypted, manifest, key) + if cache_config: + self._assert_secure(manifest, key, cache_config) + else: + cache_config = CacheConfig(self.repository) + if cache_config.exists(): + with cache_config: + self._assert_secure(manifest, key, cache_config) + else: + self._assert_secure(manifest, key) + logger.debug('security: repository checks ok, allowing access') + + def _assert_secure(self, manifest, key, cache_config=None): + self.assert_location_matches(cache_config) + self.assert_key_type(key, cache_config) + self.assert_no_manifest_replay(manifest, key, cache_config) + if not self.known(): + logger.debug('security: remembering previously unknown repository') + self.save(manifest, key) + + def assert_access_unknown(self, warn_if_unencrypted, manifest, key): + # warn_if_unencrypted=False is only used for initializing a new repository. + # Thus, avoiding asking about a repository that's currently initializing. + if not key.logically_encrypted and not self.known(): + msg = ("Warning: Attempting to access a previously unknown unencrypted repository!\n" + + "Do you want to continue? [yN] ") + allow_access = not warn_if_unencrypted or yes(msg, false_msg="Aborting.", + invalid_msg="Invalid answer, aborting.", + retry=False, env_var_override='BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK') + if allow_access: + if warn_if_unencrypted: + logger.debug('security: remembering unknown unencrypted repository (explicitly allowed)') + else: + logger.debug('security: initializing unencrypted repository') + self.save(manifest, key) + else: + raise Cache.CacheInitAbortedError() + + +def assert_secure(repository, manifest): + sm = SecurityManager(repository) + sm.assert_secure(manifest, manifest.key) + + +def recanonicalize_relative_location(cache_location, repository): + # borg < 1.0.8rc1 had different canonicalization for the repo location (see #1655 and #1741). + repo_location = repository._location.canonical_path() + rl = Location(repo_location) + cl = Location(cache_location) + if cl.proto == rl.proto and cl.user == rl.user and cl.host == rl.host and cl.port == rl.port \ + and \ + cl.path and rl.path and \ + cl.path.startswith('/~/') and rl.path.startswith('/./') and cl.path[3:] == rl.path[3:]: + # everything is same except the expected change in relative path canonicalization, + # update previous_location to avoid warning / user query about changed location: + return repo_location + else: + return cache_location + + +def cache_dir(repository, path=None): + return path or os.path.join(get_cache_dir(), repository.id_str) + + +class CacheConfig: + def __init__(self, repository, path=None, lock_wait=None): + self.repository = repository + self.path = cache_dir(repository, path) + self.config_path = os.path.join(self.path, 'config') + self.lock = None + self.lock_wait = lock_wait + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def exists(self): + return os.path.exists(self.config_path) + + def create(self): + assert not self.exists() + config = configparser.ConfigParser(interpolation=None) + config.add_section('cache') + config.set('cache', 'version', '1') + config.set('cache', 'repository', self.repository.id_str) + config.set('cache', 'manifest', '') + config.add_section('integrity') + config.set('integrity', 'manifest', '') + with SaveFile(self.config_path) as fd: + config.write(fd) + + def open(self): + self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True, timeout=self.lock_wait, + kill_stale_locks=hostname_is_unique()).acquire() + self.load() + + def load(self): + self._config = configparser.ConfigParser(interpolation=None) + self._config.read(self.config_path) + self._check_upgrade(self.config_path) + self.id = self._config.get('cache', 'repository') + self.manifest_id = unhexlify(self._config.get('cache', 'manifest')) + self.timestamp = self._config.get('cache', 'timestamp', fallback=None) + self.key_type = self._config.get('cache', 'key_type', fallback=None) + self.ignored_features = set(parse_stringified_list(self._config.get('cache', 'ignored_features', fallback=''))) + self.mandatory_features = set(parse_stringified_list(self._config.get('cache', 'mandatory_features', fallback=''))) + try: + self.integrity = dict(self._config.items('integrity')) + if self._config.get('cache', 'manifest') != self.integrity.pop('manifest'): + # The cache config file is updated (parsed with ConfigParser, the state of the ConfigParser + # is modified and then written out.), not re-created. + # Thus, older versions will leave our [integrity] section alone, making the section's data invalid. + # Therefore, we also add the manifest ID to this section and + # can discern whether an older version interfered by comparing the manifest IDs of this section + # and the main [cache] section. + self.integrity = {} + logger.warning('Cache integrity data not available: old Borg version modified the cache.') + except configparser.NoSectionError: + logger.debug('Cache integrity: No integrity data found (files, chunks). Cache is from old version.') + self.integrity = {} + previous_location = self._config.get('cache', 'previous_location', fallback=None) + if previous_location: + self.previous_location = recanonicalize_relative_location(previous_location, self.repository) + else: + self.previous_location = None + + def save(self, manifest=None, key=None): + if manifest: + self._config.set('cache', 'manifest', manifest.id_str) + self._config.set('cache', 'timestamp', manifest.timestamp) + self._config.set('cache', 'ignored_features', ','.join(self.ignored_features)) + self._config.set('cache', 'mandatory_features', ','.join(self.mandatory_features)) + if not self._config.has_section('integrity'): + self._config.add_section('integrity') + for file, integrity_data in self.integrity.items(): + self._config.set('integrity', file, integrity_data) + self._config.set('integrity', 'manifest', manifest.id_str) + if key: + self._config.set('cache', 'key_type', str(key.TYPE)) + self._config.set('cache', 'previous_location', self.repository._location.canonical_path()) + with SaveFile(self.config_path) as fd: + self._config.write(fd) + + def close(self): + if self.lock is not None: + self.lock.release() + self.lock = None + + def _check_upgrade(self, config_path): + try: + cache_version = self._config.getint('cache', 'version') + wanted_version = 1 + if cache_version != wanted_version: + self.close() + raise Exception('%s has unexpected cache version %d (wanted: %d).' % + (config_path, cache_version, wanted_version)) + except configparser.NoSectionError: + self.close() + raise Exception('%s does not look like a Borg cache.' % config_path) from None class Cache: """Client Side cache """ + class RepositoryIDNotUnique(Error): + """Cache is newer than repository - do you have multiple, independently updated repos with same ID?""" + class RepositoryReplay(Error): - """Cache is newer than repository, refusing to continue""" + """Cache is newer than repository - this is either an attack or unsafe (multiple repos with same ID)""" class CacheInitAbortedError(Error): """Cache initialization aborted""" @@ -42,8 +339,8 @@ class Cache: @staticmethod def break_lock(repository, path=None): - path = path or os.path.join(get_cache_dir(), repository.id_str) - UpgradableLock(os.path.join(path, 'lock'), exclusive=True).break_lock() + path = cache_dir(repository, path) + Lock(os.path.join(path, 'lock'), exclusive=True).break_lock() @staticmethod def destroy(repository, path=None): @@ -54,49 +351,104 @@ class Cache: os.remove(config) # kill config first shutil.rmtree(path) + def __new__(cls, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True, + progress=False, lock_wait=None, permit_adhoc_cache=False): + def local(): + return LocalCache(repository=repository, key=key, manifest=manifest, path=path, sync=sync, + do_files=do_files, warn_if_unencrypted=warn_if_unencrypted, progress=progress, + lock_wait=lock_wait) + + def adhoc(): + return AdHocCache(repository=repository, key=key, manifest=manifest) + + if not permit_adhoc_cache: + return local() + + # ad-hoc cache may be permitted, but if the local cache is in sync it'd be stupid to invalidate + # it by needlessly using the ad-hoc cache. + # Check if the local cache exists and is in sync. + + cache_config = CacheConfig(repository, path, lock_wait) + if cache_config.exists(): + with cache_config: + cache_in_sync = cache_config.manifest_id == manifest.id + # Don't nest cache locks + if cache_in_sync: + # Local cache is in sync, use it + logger.debug('Cache: choosing local cache (in sync)') + return local() + logger.debug('Cache: choosing ad-hoc cache (local cache does not exist or is not in sync)') + return adhoc() + + +class CacheStatsMixin: + str_format = """\ +All archives: {0.total_size:>20s} {0.total_csize:>20s} {0.unique_csize:>20s} + + Unique chunks Total chunks +Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" + + def __str__(self): + return self.str_format.format(self.format_tuple()) + + Summary = namedtuple('Summary', ['total_size', 'total_csize', 'unique_size', 'unique_csize', 'total_unique_chunks', + 'total_chunks']) + + def stats(self): + # XXX: this should really be moved down to `hashindex.pyx` + stats = self.Summary(*self.chunks.summarize())._asdict() + return stats + + def format_tuple(self): + stats = self.stats() + for field in ['total_size', 'total_csize', 'unique_csize']: + stats[field] = format_file_size(stats[field]) + return self.Summary(**stats) + + def chunks_stored_size(self): + return self.stats()['unique_csize'] + + +class LocalCache(CacheStatsMixin): + """ + Persistent, local (client-side) cache. + """ + def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True, - lock_wait=None): + progress=False, lock_wait=None): """ :param do_files: use file metadata cache :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository :param lock_wait: timeout for lock acquisition (None: return immediately if lock unavailable) :param sync: do :meth:`.sync` """ - self.lock = None - self.timestamp = None - self.lock = None - self.txn_active = False self.repository = repository self.key = key self.manifest = manifest - self.path = path or os.path.join(get_cache_dir(), repository.id_str) + self.progress = progress self.do_files = do_files + self.timestamp = None + self.txn_active = False + + self.path = cache_dir(repository, path) + self.security_manager = SecurityManager(repository) + self.cache_config = CacheConfig(self.repository, self.path, lock_wait) + # Warn user before sending data to a never seen before unencrypted repository if not os.path.exists(self.path): - if warn_if_unencrypted and isinstance(key, PlaintextKey): - msg = ("Warning: Attempting to access a previously unknown unencrypted repository!" + - "\n" + - "Do you want to continue? [yN] ") - if not yes(msg, false_msg="Aborting.", env_var_override='BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK'): - raise self.CacheInitAbortedError() + self.security_manager.assert_access_unknown(warn_if_unencrypted, manifest, key) self.create() - self.open(lock_wait=lock_wait) - try: - # Warn user before sending data to a relocated repository - if self.previous_location and self.previous_location != repository._location.canonical_path(): - msg = ("Warning: The repository at location {} was previously located at {}".format(repository._location.canonical_path(), self.previous_location) + - "\n" + - "Do you want to continue? [yN] ") - if not yes(msg, false_msg="Aborting.", env_var_override='BORG_RELOCATED_REPO_ACCESS_IS_OK'): - raise self.RepositoryAccessAborted() - if sync and self.manifest.id != self.manifest_id: - # If repository is older than the cache something fishy is going on - if self.timestamp and self.timestamp > manifest.timestamp: - raise self.RepositoryReplay() - # Make sure an encrypted repository has not been swapped for an unencrypted repository - if self.key_type is not None and self.key_type != str(key.TYPE): - raise self.EncryptionMethodMismatch() + self.open() + try: + self.security_manager.assert_secure(manifest, key, cache_config=self.cache_config) + + if not self.check_cache_compatibility(): + self.wipe_cache() + + self.update_compatibility() + + if sync and self.manifest.id != self.cache_config.manifest_id: self.sync() self.commit() except: @@ -109,127 +461,110 @@ class Cache: def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def __str__(self): - fmt = """\ -All archives: {0.total_size:>20s} {0.total_csize:>20s} {0.unique_csize:>20s} - - Unique chunks Total chunks -Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" - return fmt.format(self.format_tuple()) - - def format_tuple(self): - # XXX: this should really be moved down to `hashindex.pyx` - Summary = namedtuple('Summary', ['total_size', 'total_csize', 'unique_size', 'unique_csize', 'total_unique_chunks', 'total_chunks']) - stats = Summary(*self.chunks.summarize())._asdict() - for field in ['total_size', 'total_csize', 'unique_csize']: - stats[field] = format_file_size(stats[field]) - return Summary(**stats) - - def chunks_stored_size(self): - Summary = namedtuple('Summary', ['total_size', 'total_csize', 'unique_size', 'unique_csize', 'total_unique_chunks', 'total_chunks']) - stats = Summary(*self.chunks.summarize()) - return stats.unique_csize - def create(self): """Create a new empty cache at `self.path` """ os.makedirs(self.path) with open(os.path.join(self.path, 'README'), 'w') as fd: - fd.write('This is a Borg cache') - config = configparser.ConfigParser(interpolation=None) - config.add_section('cache') - config.set('cache', 'version', '1') - config.set('cache', 'repository', self.repository.id_str) - config.set('cache', 'manifest', '') - with open(os.path.join(self.path, 'config'), 'w') as fd: - config.write(fd) - ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) + fd.write(CACHE_README) + self.cache_config.create() + ChunkIndex().write(os.path.join(self.path, 'chunks')) os.makedirs(os.path.join(self.path, 'chunks.archive.d')) - with open(os.path.join(self.path, 'files'), 'wb') as fd: + with SaveFile(os.path.join(self.path, 'files'), binary=True): pass # empty file def _do_open(self): - self.config = configparser.ConfigParser(interpolation=None) - config_path = os.path.join(self.path, 'config') - self.config.read(config_path) - try: - cache_version = self.config.getint('cache', 'version') - wanted_version = 1 - if cache_version != wanted_version: - raise Exception('%s has unexpected cache version %d (wanted: %d).' % ( - config_path, cache_version, wanted_version)) - except configparser.NoSectionError: - raise Exception('%s does not look like a Borg cache.' % config_path) from None - self.id = self.config.get('cache', 'repository') - self.manifest_id = unhexlify(self.config.get('cache', 'manifest')) - self.timestamp = self.config.get('cache', 'timestamp', fallback=None) - self.key_type = self.config.get('cache', 'key_type', fallback=None) - self.previous_location = self.config.get('cache', 'previous_location', fallback=None) - self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8')) + self.cache_config.load() + with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False, + integrity_data=self.cache_config.integrity.get('chunks')) as fd: + self.chunks = ChunkIndex.read(fd) self.files = None - def open(self, lock_wait=None): + def open(self): if not os.path.isdir(self.path): raise Exception('%s Does not look like a Borg cache' % self.path) - self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True, timeout=lock_wait).acquire() + self.cache_config.open() self.rollback() def close(self): - if self.lock is not None: - self.lock.release() - self.lock = None + if self.cache_config is not None: + self.cache_config.close() + self.cache_config = None def _read_files(self): self.files = {} - self._newest_mtime = 0 + self._newest_cmtime = None logger.debug('Reading files cache ...') - with open(os.path.join(self.path, 'files'), 'rb') as fd: + + with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False, + integrity_data=self.cache_config.integrity.get('files')) as fd: u = msgpack.Unpacker(use_list=True) while True: data = fd.read(64 * 1024) if not data: break u.feed(data) - for path_hash, item in u: - entry = FileCacheEntry(*item) - # in the end, this takes about 240 Bytes per file - self.files[path_hash] = msgpack.packb(entry._replace(age=entry.age + 1)) + try: + for path_hash, item in u: + entry = FileCacheEntry(*item) + # in the end, this takes about 240 Bytes per file + self.files[path_hash] = msgpack.packb(entry._replace(age=entry.age + 1)) + except (TypeError, ValueError) as exc: + logger.warning('The files cache seems corrupt, ignoring it. ' + 'Expect lower performance. [%s]' % str(exc)) + self.files = {} + return def begin_txn(self): # Initialize transaction snapshot + pi = ProgressIndicatorMessage(msgid='cache.begin_transaction') txn_dir = os.path.join(self.path, 'txn.tmp') os.mkdir(txn_dir) + pi.output('Initializing cache transaction: Reading config') shutil.copy(os.path.join(self.path, 'config'), txn_dir) + pi.output('Initializing cache transaction: Reading chunks') shutil.copy(os.path.join(self.path, 'chunks'), txn_dir) + pi.output('Initializing cache transaction: Reading files') shutil.copy(os.path.join(self.path, 'files'), txn_dir) os.rename(os.path.join(self.path, 'txn.tmp'), os.path.join(self.path, 'txn.active')) self.txn_active = True + pi.finish() def commit(self): """Commit transaction """ if not self.txn_active: return + self.security_manager.save(self.manifest, self.key) + pi = ProgressIndicatorMessage(msgid='cache.commit') if self.files is not None: - with open(os.path.join(self.path, 'files'), 'wb') as fd: + if self._newest_cmtime is None: + # was never set because no files were modified/added + self._newest_cmtime = 2 ** 63 - 1 # nanoseconds, good until y2262 + ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20)) + pi.output('Saving files cache') + with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd: for path_hash, item in self.files.items(): - # Discard cached files with the newest mtime to avoid - # issues with filesystem snapshots and mtime precision + # Only keep files seen in this backup that are older than newest cmtime seen in this backup - + # this is to avoid issues with filesystem snapshots and cmtime granularity. + # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet. entry = FileCacheEntry(*msgpack.unpackb(item)) - if entry.age < 10 and bigint_to_int(entry.mtime) < self._newest_mtime: + if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \ + entry.age > 0 and entry.age < ttl: msgpack.pack((path_hash, entry), fd) - self.config.set('cache', 'manifest', self.manifest.id_str) - self.config.set('cache', 'timestamp', self.manifest.timestamp) - self.config.set('cache', 'key_type', str(self.key.TYPE)) - self.config.set('cache', 'previous_location', self.repository._location.canonical_path()) - with open(os.path.join(self.path, 'config'), 'w') as fd: - self.config.write(fd) - self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8')) + self.cache_config.integrity['files'] = fd.integrity_data + pi.output('Saving chunks cache') + with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd: + self.chunks.write(fd) + self.cache_config.integrity['chunks'] = fd.integrity_data + pi.output('Saving cache config') + self.cache_config.save(self.manifest, self.key) os.rename(os.path.join(self.path, 'txn.active'), os.path.join(self.path, 'txn.tmp')) shutil.rmtree(os.path.join(self.path, 'txn.tmp')) self.txn_active = False + pi.finish() def rollback(self): """Roll back partial and aborted transactions @@ -260,94 +595,205 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" archive indexes. """ archive_path = os.path.join(self.path, 'chunks.archive.d') + # An index of chunks whose size had to be fetched + chunks_fetched_size_index = ChunkIndex() + # Instrumentation + processed_item_metadata_bytes = 0 + processed_item_metadata_chunks = 0 + compact_chunks_archive_saved_space = 0 + fetched_chunks_for_csize = 0 + fetched_bytes_for_csize = 0 def mkpath(id, suffix=''): id_hex = bin_to_hex(id) path = os.path.join(archive_path, id_hex + suffix) - return path.encode('utf-8') + return path def cached_archives(): if self.do_cache: fns = os.listdir(archive_path) - # filenames with 64 hex digits == 256bit - return set(unhexlify(fn) for fn in fns if len(fn) == 64) + # filenames with 64 hex digits == 256bit, + # or compact indices which are 64 hex digits + ".compact" + return set(unhexlify(fn) for fn in fns if len(fn) == 64) | \ + set(unhexlify(fn[:64]) for fn in fns if len(fn) == 72 and fn.endswith('.compact')) else: return set() def repo_archives(): - return set(info[b'id'] for info in self.manifest.archives.values()) + return set(info.id for info in self.manifest.archives.list()) def cleanup_outdated(ids): for id in ids: + cleanup_cached_archive(id) + + def cleanup_cached_archive(id, cleanup_compact=True): + try: os.unlink(mkpath(id)) + os.unlink(mkpath(id) + '.integrity') + except FileNotFoundError: + pass + if not cleanup_compact: + return + try: + os.unlink(mkpath(id, suffix='.compact')) + os.unlink(mkpath(id, suffix='.compact') + '.integrity') + except FileNotFoundError: + pass - def fetch_and_build_idx(archive_id, repository, key): - chunk_idx = ChunkIndex() - cdata = repository.get(archive_id) - _, data = key.decrypt(archive_id, cdata) - chunk_idx.add(archive_id, 1, len(data), len(cdata)) - archive = msgpack.unpackb(data) - if archive[b'version'] != 1: + def fetch_missing_csize(chunk_idx): + """ + Archives created with AdHocCache will have csize=0 in all chunk list entries whose + chunks were already in the repository. + + Scan *chunk_idx* for entries where csize=0 and fill in the correct information. + """ + nonlocal fetched_chunks_for_csize + nonlocal fetched_bytes_for_csize + + all_missing_ids = chunk_idx.zero_csize_ids() + fetch_ids = [] + if len(chunks_fetched_size_index): + for id_ in all_missing_ids: + already_fetched_entry = chunks_fetched_size_index.get(id_) + if already_fetched_entry: + entry = chunk_idx[id_]._replace(csize=already_fetched_entry.csize) + assert entry.size == already_fetched_entry.size, 'Chunk size mismatch' + chunk_idx[id_] = entry + else: + fetch_ids.append(id_) + else: + fetch_ids = all_missing_ids + + # This is potentially a rather expensive operation, but it's hard to tell at this point + # if it's a problem in practice (hence the experimental status of --no-cache-sync). + for id_, data in zip(fetch_ids, decrypted_repository.repository.get_many(fetch_ids)): + entry = chunk_idx[id_]._replace(csize=len(data)) + chunk_idx[id_] = entry + chunks_fetched_size_index[id_] = entry + fetched_chunks_for_csize += 1 + fetched_bytes_for_csize += len(data) + + def fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx): + nonlocal processed_item_metadata_bytes + nonlocal processed_item_metadata_chunks + csize, data = decrypted_repository.get(archive_id) + chunk_idx.add(archive_id, 1, len(data), csize) + archive = ArchiveItem(internal_dict=msgpack.unpackb(data)) + if archive.version != 1: raise Exception('Unknown archive metadata version') - decode_dict(archive, (b'name',)) - unpacker = msgpack.Unpacker() - for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): - _, data = key.decrypt(item_id, chunk) - chunk_idx.add(item_id, 1, len(data), len(chunk)) - unpacker.feed(data) - for item in unpacker: - if not isinstance(item, dict): - logger.error('Error: Did not get expected metadata dict - archive corrupted!') - continue - item = Item(internal_dict=item) - if 'chunks' in item: - for chunk_id, size, csize in item.chunks: - chunk_idx.add(chunk_id, 1, size, csize) + sync = CacheSynchronizer(chunk_idx) + for item_id, (csize, data) in zip(archive.items, decrypted_repository.get_many(archive.items)): + chunk_idx.add(item_id, 1, len(data), csize) + processed_item_metadata_bytes += len(data) + processed_item_metadata_chunks += 1 + sync.feed(data) if self.do_cache: - fn = mkpath(archive_id) - fn_tmp = mkpath(archive_id, suffix='.tmp') - try: - chunk_idx.write(fn_tmp) - except Exception: - os.unlink(fn_tmp) - else: - os.rename(fn_tmp, fn) - return chunk_idx + fetch_missing_csize(chunk_idx) + write_archive_index(archive_id, chunk_idx) - def lookup_name(archive_id): - for name, info in self.manifest.archives.items(): - if info[b'id'] == archive_id: - return name + def write_archive_index(archive_id, chunk_idx): + nonlocal compact_chunks_archive_saved_space + compact_chunks_archive_saved_space += chunk_idx.compact() + fn = mkpath(archive_id, suffix='.compact') + fn_tmp = mkpath(archive_id, suffix='.tmp') + try: + with DetachedIntegrityCheckedFile(path=fn_tmp, write=True, + filename=bin_to_hex(archive_id) + '.compact') as fd: + chunk_idx.write(fd) + except Exception: + truncate_and_unlink(fn_tmp) + else: + os.rename(fn_tmp, fn) + + def read_archive_index(archive_id, archive_name): + archive_chunk_idx_path = mkpath(archive_id) + logger.info("Reading cached archive chunk index for %s ...", archive_name) + try: + try: + # Attempt to load compact index first + with DetachedIntegrityCheckedFile(path=archive_chunk_idx_path + '.compact', write=False) as fd: + archive_chunk_idx = ChunkIndex.read(fd, permit_compact=True) + # In case a non-compact index exists, delete it. + cleanup_cached_archive(archive_id, cleanup_compact=False) + # Compact index read - return index, no conversion necessary (below). + return archive_chunk_idx + except FileNotFoundError: + # No compact index found, load non-compact index, and convert below. + with DetachedIntegrityCheckedFile(path=archive_chunk_idx_path, write=False) as fd: + archive_chunk_idx = ChunkIndex.read(fd) + except FileIntegrityError as fie: + logger.error('Cached archive chunk index of %s is corrupted: %s', archive_name, fie) + # Delete corrupted index, set warning. A new index must be build. + cleanup_cached_archive(archive_id) + set_ec(EXIT_WARNING) + return None + + # Convert to compact index. Delete the existing index first. + logger.debug('Found non-compact index for %s, converting to compact.', archive_name) + cleanup_cached_archive(archive_id) + write_archive_index(archive_id, archive_chunk_idx) + return archive_chunk_idx + + def get_archive_ids_to_names(archive_ids): + # Pass once over all archives and build a mapping from ids to names. + # The easier approach, doing a similar loop for each archive, has + # square complexity and does about a dozen million functions calls + # with 1100 archives (which takes 30s CPU seconds _alone_). + archive_names = {} + for info in self.manifest.archives.list(): + if info.id in archive_ids: + archive_names[info.id] = info.name + assert len(archive_names) == len(archive_ids) + return archive_names def create_master_idx(chunk_idx): logger.info('Synchronizing chunks cache...') cached_ids = cached_archives() archive_ids = repo_archives() - logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % ( + logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.', len(archive_ids), len(cached_ids), - len(cached_ids - archive_ids), len(archive_ids - cached_ids), )) + len(cached_ids - archive_ids), len(archive_ids - cached_ids)) # deallocates old hashindex, creates empty hashindex: chunk_idx.clear() cleanup_outdated(cached_ids - archive_ids) + # Explicitly set the initial hash table capacity to avoid performance issues + # due to hash table "resonance". + master_index_capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR) if archive_ids: - chunk_idx = None - for archive_id in archive_ids: - archive_name = lookup_name(archive_id) - if archive_id in cached_ids: - archive_chunk_idx_path = mkpath(archive_id) - logger.info("Reading cached archive chunk index for %s ..." % archive_name) - archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path) - else: - logger.info('Fetching and building archive index for %s ...' % archive_name) - archive_chunk_idx = fetch_and_build_idx(archive_id, repository, self.key) - logger.info("Merging into master chunks index ...") - if chunk_idx is None: - # we just use the first archive's idx as starting point, - # to avoid growing the hash table from 0 size and also - # to save 1 merge call. - chunk_idx = archive_chunk_idx - else: + chunk_idx = None if not self.do_cache else ChunkIndex(master_index_capacity) + pi = ProgressIndicatorPercent(total=len(archive_ids), step=0.1, + msg='%3.0f%% Syncing chunks cache. Processing archive %s', + msgid='cache.sync') + archive_ids_to_names = get_archive_ids_to_names(archive_ids) + for archive_id, archive_name in archive_ids_to_names.items(): + pi.show(info=[remove_surrogates(archive_name)]) + if self.do_cache: + if archive_id in cached_ids: + archive_chunk_idx = read_archive_index(archive_id, archive_name) + if archive_chunk_idx is None: + cached_ids.remove(archive_id) + if archive_id not in cached_ids: + # Do not make this an else branch; the FileIntegrityError exception handler + # above can remove *archive_id* from *cached_ids*. + logger.info('Fetching and building archive index for %s ...', archive_name) + archive_chunk_idx = ChunkIndex() + fetch_and_build_idx(archive_id, decrypted_repository, archive_chunk_idx) + logger.info("Merging into master chunks index ...") chunk_idx.merge(archive_chunk_idx) + else: + chunk_idx = chunk_idx or ChunkIndex(master_index_capacity) + logger.info('Fetching archive index for %s ...', archive_name) + fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx) + if not self.do_cache: + fetch_missing_csize(chunk_idx) + pi.finish() + logger.debug('Cache sync: had to fetch %s (%d chunks) because no archive had a csize set for them ' + '(due to --no-cache-sync)', + format_file_size(fetched_bytes_for_csize), fetched_chunks_for_csize) + logger.debug('Cache sync: processed %s (%d chunks) of metadata', + format_file_size(processed_item_metadata_bytes), processed_item_metadata_chunks) + logger.debug('Cache sync: compact chunks.archive.d storage saved %s bytes', + format_file_size(compact_chunks_archive_saved_space)) logger.info('Done.') return chunk_idx @@ -366,24 +812,66 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" except: pass + # The cache can be used by a command that e.g. only checks against Manifest.Operation.WRITE, + # which does not have to include all flags from Manifest.Operation.READ. + # Since the sync will attempt to read archives, check compatibility with Manifest.Operation.READ. + self.manifest.check_repository_compatibility((Manifest.Operation.READ, )) + self.begin_txn() - with cache_if_remote(self.repository) as repository: + with cache_if_remote(self.repository, decrypted_cache=self.key) as decrypted_repository: legacy_cleanup() # TEMPORARY HACK: to avoid archive index caching, create a FILE named ~/.cache/borg/REPOID/chunks.archive.d - # this is only recommended if you have a fast, low latency connection to your repo (e.g. if repo is local disk) self.do_cache = os.path.isdir(archive_path) self.chunks = create_master_idx(self.chunks) - def add_chunk(self, id, chunk, stats, overwrite=False): + def check_cache_compatibility(self): + my_features = Manifest.SUPPORTED_REPO_FEATURES + if self.cache_config.ignored_features & my_features: + # The cache might not contain references of chunks that need a feature that is mandatory for some operation + # and which this version supports. To avoid corruption while executing that operation force rebuild. + return False + if not self.cache_config.mandatory_features <= my_features: + # The cache was build with consideration to at least one feature that this version does not understand. + # This client might misinterpret the cache. Thus force a rebuild. + return False + return True + + def wipe_cache(self): + logger.warning("Discarding incompatible cache and forcing a cache rebuild") + archive_path = os.path.join(self.path, 'chunks.archive.d') + if os.path.isdir(archive_path): + shutil.rmtree(os.path.join(self.path, 'chunks.archive.d')) + os.makedirs(os.path.join(self.path, 'chunks.archive.d')) + self.chunks = ChunkIndex() + with SaveFile(os.path.join(self.path, 'files'), binary=True): + pass # empty file + self.cache_config.manifest_id = '' + self.cache_config._config.set('cache', 'manifest', '') + + self.cache_config.ignored_features = set() + self.cache_config.mandatory_features = set() + + def update_compatibility(self): + operation_to_features_map = self.manifest.get_all_mandatory_features() + my_features = Manifest.SUPPORTED_REPO_FEATURES + repo_features = set() + for operation, features in operation_to_features_map.items(): + repo_features.update(features) + + self.cache_config.ignored_features.update(repo_features - my_features) + self.cache_config.mandatory_features.update(repo_features & my_features) + + def add_chunk(self, id, chunk, stats, overwrite=False, wait=True): if not self.txn_active: self.begin_txn() - size = len(chunk.data) + size = len(chunk) refcount = self.seen_chunk(id, size) if refcount and not overwrite: return self.chunk_incref(id, stats) data = self.key.encrypt(chunk) csize = len(data) - self.repository.put(id, data, wait=False) + self.repository.put(id, data, wait=wait) self.chunks.add(id, 1, size, csize) stats.update(size, csize, not refcount) return ChunkListEntry(id, size, csize) @@ -397,43 +885,201 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" id, stored_size, size)) return refcount - def chunk_incref(self, id, stats): + def chunk_incref(self, id, stats, size=None): if not self.txn_active: self.begin_txn() - count, size, csize = self.chunks.incref(id) - stats.update(size, csize, False) - return ChunkListEntry(id, size, csize) + count, _size, csize = self.chunks.incref(id) + stats.update(_size, csize, False) + return ChunkListEntry(id, _size, csize) - def chunk_decref(self, id, stats): + def chunk_decref(self, id, stats, wait=True): if not self.txn_active: self.begin_txn() count, size, csize = self.chunks.decref(id) if count == 0: del self.chunks[id] - self.repository.delete(id, wait=False) + self.repository.delete(id, wait=wait) stats.update(-size, -csize, True) else: stats.update(-size, -csize, False) - def file_known_and_unchanged(self, path_hash, st, ignore_inode=False): - if not (self.do_files and stat.S_ISREG(st.st_mode)): + def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE): + if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): # d(isabled) return None if self.files is None: self._read_files() + if 'r' in cache_mode: # r(echunk) + return None entry = self.files.get(path_hash) if not entry: return None entry = FileCacheEntry(*msgpack.unpackb(entry)) - if (entry.size == st.st_size and bigint_to_int(entry.mtime) == st.st_mtime_ns and - (ignore_inode or entry.inode == st.st_ino)): - self.files[path_hash] = msgpack.packb(entry._replace(age=0)) - return entry.chunk_ids - else: + if 's' in cache_mode and entry.size != st.st_size: return None + if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino: + return None + if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns: + return None + elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns: + return None + # we ignored the inode number in the comparison above or it is still same. + # if it is still the same, replacing it in the tuple doesn't change it. + # if we ignored it, a reason for doing that is that files were moved to a new + # disk / new fs (so a one-time change of inode number is expected) and we wanted + # to avoid everything getting chunked again. to be able to re-enable the inode + # number comparison in a future backup run (and avoid chunking everything + # again at that time), we need to update the inode number in the cache with what + # we see in the filesystem. + self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0)) + return entry.chunk_ids - def memorize_file(self, path_hash, st, ids): - if not (self.do_files and stat.S_ISREG(st.st_mode)): + def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE): + # note: r(echunk) modes will update the files cache, d(isabled) mode won't + if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode): return - entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids) + if 'c' in cache_mode: + cmtime_ns = safe_ns(st.st_ctime_ns) + elif 'm' in cache_mode: + cmtime_ns = safe_ns(st.st_mtime_ns) + entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids) self.files[path_hash] = msgpack.packb(entry) - self._newest_mtime = max(self._newest_mtime, st.st_mtime_ns) + self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns) + + +class AdHocCache(CacheStatsMixin): + """ + Ad-hoc, non-persistent cache. + + Compared to the standard LocalCache the AdHocCache does not maintain accurate reference count, + nor does it provide a files cache (which would require persistence). Chunks that were not added + during the current AdHocCache lifetime won't have correct size/csize set (0 bytes) and will + have an infinite reference count (MAX_VALUE). + """ + + str_format = """\ +All archives: unknown unknown unknown + + Unique chunks Total chunks +Chunk index: {0.total_unique_chunks:20d} unknown""" + + def __init__(self, repository, key, manifest, warn_if_unencrypted=True): + self.repository = repository + self.key = key + self.manifest = manifest + self._txn_active = False + + self.security_manager = SecurityManager(repository) + self.security_manager.assert_secure(manifest, key) + + logger.warning('Note: --no-cache-sync is an experimental feature.') + + # Public API + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + files = None + do_files = False + + def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE): + return None + + def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE): + pass + + def add_chunk(self, id, chunk, stats, overwrite=False, wait=True): + assert not overwrite, 'AdHocCache does not permit overwrites — trying to use it for recreate?' + if not self._txn_active: + self.begin_txn() + size = len(chunk) + refcount = self.seen_chunk(id, size) + if refcount: + return self.chunk_incref(id, stats, size=size) + data = self.key.encrypt(chunk) + csize = len(data) + self.repository.put(id, data, wait=wait) + self.chunks.add(id, 1, size, csize) + stats.update(size, csize, not refcount) + return ChunkListEntry(id, size, csize) + + def seen_chunk(self, id, size=None): + if not self._txn_active: + self.begin_txn() + entry = self.chunks.get(id, ChunkIndexEntry(0, None, None)) + if entry.refcount and size and not entry.size: + # The LocalCache has existing size information and uses *size* to make an effort at detecting collisions. + # This is of course not possible for the AdHocCache. + # Here *size* is used to update the chunk's size information, which will be zero for existing chunks. + self.chunks[id] = entry._replace(size=size) + return entry.refcount + + def chunk_incref(self, id, stats, size=None): + if not self._txn_active: + self.begin_txn() + count, _size, csize = self.chunks.incref(id) + # When _size is 0 and size is not given, then this chunk has not been locally visited yet (seen_chunk with + # size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to. + size = _size or size + assert size + stats.update(size, csize, False) + return ChunkListEntry(id, size, csize) + + def chunk_decref(self, id, stats, wait=True): + if not self._txn_active: + self.begin_txn() + count, size, csize = self.chunks.decref(id) + if count == 0: + del self.chunks[id] + self.repository.delete(id, wait=wait) + stats.update(-size, -csize, True) + else: + stats.update(-size, -csize, False) + + def commit(self): + if not self._txn_active: + return + self.security_manager.save(self.manifest, self.key) + self._txn_active = False + + def rollback(self): + self._txn_active = False + del self.chunks + + def begin_txn(self): + self._txn_active = True + # Explicitly set the initial hash table capacity to avoid performance issues + # due to hash table "resonance". + # Since we're creating an archive, add 10 % from the start. + num_chunks = len(self.repository) + capacity = int(num_chunks / ChunkIndex.MAX_LOAD_FACTOR * 1.1) + self.chunks = ChunkIndex(capacity) + pi = ProgressIndicatorPercent(total=num_chunks, msg='Downloading chunk list... %3.0f%%', + msgid='cache.download_chunks') + t0 = perf_counter() + num_requests = 0 + marker = None + while True: + result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker) + num_requests += 1 + if not result: + break + pi.show(increase=len(result)) + marker = result[-1] + # All chunks from the repository have a refcount of MAX_VALUE, which is sticky, + # therefore we can't/won't delete them. Chunks we added ourselves in this transaction + # (e.g. checkpoint archives) are tracked correctly. + init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0, csize=0) + for id_ in result: + self.chunks[id_] = init_entry + assert len(self.chunks) == num_chunks + # LocalCache does not contain the manifest, either. + del self.chunks[self.manifest.MANIFEST_ID] + duration = perf_counter() - t0 or 0.01 + pi.finish() + logger.debug('AdHocCache: downloaded %d chunk IDs in %.2f s (%d requests), ~%s/s', + num_chunks, duration, num_requests, format_file_size(num_chunks * 34 / duration)) + # Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes. + # Protocol overhead is neglected in this calculation. diff --git a/src/borg/cache_sync/cache_sync.c b/src/borg/cache_sync/cache_sync.c new file mode 100644 index 00000000..53b61552 --- /dev/null +++ b/src/borg/cache_sync/cache_sync.c @@ -0,0 +1,131 @@ +/* + * Borg cache synchronizer, + * high level interface. + * + * These routines parse msgpacked item metadata and update a HashIndex + * with all chunks that are referenced from the items. + * + * This file only contains some initialization and buffer management. + * + * The parser is split in two parts, somewhat similar to lexer/parser combinations: + * + * unpack_template.h munches msgpack and calls a specific callback for each object + * encountered (e.g. beginning of a map, an integer, a string, a map item etc.). + * + * unpack.h implements these callbacks and uses another state machine to + * extract chunk references from it. + */ + +#include "unpack.h" + +typedef struct { + unpack_context ctx; + + char *buf; + size_t head; + size_t tail; + size_t size; +} CacheSyncCtx; + +static CacheSyncCtx * +cache_sync_init(HashIndex *chunks) +{ + CacheSyncCtx *ctx; + if (!(ctx = (CacheSyncCtx*)malloc(sizeof(CacheSyncCtx)))) { + return NULL; + } + + unpack_init(&ctx->ctx); + /* needs to be set only once */ + ctx->ctx.user.chunks = chunks; + ctx->ctx.user.num_files = 0; + ctx->buf = NULL; + ctx->head = 0; + ctx->tail = 0; + ctx->size = 0; + + return ctx; +} + +static void +cache_sync_free(CacheSyncCtx *ctx) +{ + if(ctx->buf) { + free(ctx->buf); + } + free(ctx); +} + +static const char * +cache_sync_error(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.last_error; +} + +static uint64_t +cache_sync_num_files(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.num_files; +} + +/** + * feed data to the cache synchronizer + * 0 = abort, 1 = continue + * abort is a regular condition, check cache_sync_error + */ +static int +cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length) +{ + size_t new_size; + int ret; + char *new_buf; + + if(ctx->tail + length > ctx->size) { + if((ctx->tail - ctx->head) + length <= ctx->size) { + /* | XXXXX| -> move data in buffer backwards -> |XXXXX | */ + memmove(ctx->buf, ctx->buf + ctx->head, ctx->tail - ctx->head); + ctx->tail -= ctx->head; + ctx->head = 0; + } else { + /* must expand buffer to fit all data */ + new_size = (ctx->tail - ctx->head) + length; + new_buf = (char*) malloc(new_size); + if(!new_buf) { + ctx->ctx.user.last_error = "cache_sync_feed: unable to allocate buffer"; + return 0; + } + if(ctx->buf) { + memcpy(new_buf, ctx->buf + ctx->head, ctx->tail - ctx->head); + free(ctx->buf); + } + ctx->buf = new_buf; + ctx->tail -= ctx->head; + ctx->head = 0; + ctx->size = new_size; + } + } + + memcpy(ctx->buf + ctx->tail, data, length); + ctx->tail += length; + + while(1) { + if(ctx->head >= ctx->tail) { + return 1; /* request more bytes */ + } + + ret = unpack_execute(&ctx->ctx, ctx->buf, ctx->tail, &ctx->head); + if(ret == 1) { + unpack_init(&ctx->ctx); + continue; + } else if(ret == 0) { + return 1; + } else { + if(!ctx->ctx.user.last_error) { + ctx->ctx.user.last_error = "Unknown error"; + } + return 0; + } + } + /* unreachable */ + return 1; +} diff --git a/src/borg/cache_sync/sysdep.h b/src/borg/cache_sync/sysdep.h new file mode 100644 index 00000000..ed9c1bc0 --- /dev/null +++ b/src/borg/cache_sync/sysdep.h @@ -0,0 +1,194 @@ +/* + * MessagePack system dependencies + * + * Copyright (C) 2008-2010 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MSGPACK_SYSDEP_H__ +#define MSGPACK_SYSDEP_H__ + +#include +#include +#if defined(_MSC_VER) && _MSC_VER < 1600 +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#elif defined(_MSC_VER) // && _MSC_VER >= 1600 +#include +#else +#include +#include +#endif + +#ifdef _WIN32 +#define _msgpack_atomic_counter_header +typedef long _msgpack_atomic_counter_t; +#define _msgpack_sync_decr_and_fetch(ptr) InterlockedDecrement(ptr) +#define _msgpack_sync_incr_and_fetch(ptr) InterlockedIncrement(ptr) +#elif defined(__GNUC__) && ((__GNUC__*10 + __GNUC_MINOR__) < 41) +#define _msgpack_atomic_counter_header "gcc_atomic.h" +#else +typedef unsigned int _msgpack_atomic_counter_t; +#define _msgpack_sync_decr_and_fetch(ptr) __sync_sub_and_fetch(ptr, 1) +#define _msgpack_sync_incr_and_fetch(ptr) __sync_add_and_fetch(ptr, 1) +#endif + +#ifdef _WIN32 + +#ifdef __cplusplus +/* numeric_limits::min,max */ +#ifdef max +#undef max +#endif +#ifdef min +#undef min +#endif +#endif + +#else +#include /* __BYTE_ORDER */ +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __LITTLE_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN +#define __BIG_ENDIAN__ +#elif _WIN32 +#define __LITTLE_ENDIAN__ +#endif +#endif + + +#ifdef __LITTLE_ENDIAN__ + +#ifdef _WIN32 +# if defined(ntohs) +# define _msgpack_be16(x) ntohs(x) +# elif defined(_byteswap_ushort) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# define _msgpack_be16(x) ((uint16_t)_byteswap_ushort((unsigned short)x)) +# else +# define _msgpack_be16(x) ( \ + ((((uint16_t)x) << 8) ) | \ + ((((uint16_t)x) >> 8) ) ) +# endif +#else +# define _msgpack_be16(x) ntohs(x) +#endif + +#ifdef _WIN32 +# if defined(ntohl) +# define _msgpack_be32(x) ntohl(x) +# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) +# else +# define _msgpack_be32(x) \ + ( ((((uint32_t)x) << 24) ) | \ + ((((uint32_t)x) << 8) & 0x00ff0000U ) | \ + ((((uint32_t)x) >> 8) & 0x0000ff00U ) | \ + ((((uint32_t)x) >> 24) ) ) +# endif +#else +# define _msgpack_be32(x) ntohl(x) +#endif + +#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# define _msgpack_be64(x) (_byteswap_uint64(x)) +#elif defined(bswap_64) +# define _msgpack_be64(x) bswap_64(x) +#elif defined(__DARWIN_OSSwapInt64) +# define _msgpack_be64(x) __DARWIN_OSSwapInt64(x) +#else +#define _msgpack_be64(x) \ + ( ((((uint64_t)x) << 56) ) | \ + ((((uint64_t)x) << 40) & 0x00ff000000000000ULL ) | \ + ((((uint64_t)x) << 24) & 0x0000ff0000000000ULL ) | \ + ((((uint64_t)x) << 8) & 0x000000ff00000000ULL ) | \ + ((((uint64_t)x) >> 8) & 0x00000000ff000000ULL ) | \ + ((((uint64_t)x) >> 24) & 0x0000000000ff0000ULL ) | \ + ((((uint64_t)x) >> 40) & 0x000000000000ff00ULL ) | \ + ((((uint64_t)x) >> 56) ) ) +#endif + +#define _msgpack_load16(cast, from) ((cast)( \ + (((uint16_t)((uint8_t*)(from))[0]) << 8) | \ + (((uint16_t)((uint8_t*)(from))[1]) ) )) + +#define _msgpack_load32(cast, from) ((cast)( \ + (((uint32_t)((uint8_t*)(from))[0]) << 24) | \ + (((uint32_t)((uint8_t*)(from))[1]) << 16) | \ + (((uint32_t)((uint8_t*)(from))[2]) << 8) | \ + (((uint32_t)((uint8_t*)(from))[3]) ) )) + +#define _msgpack_load64(cast, from) ((cast)( \ + (((uint64_t)((uint8_t*)(from))[0]) << 56) | \ + (((uint64_t)((uint8_t*)(from))[1]) << 48) | \ + (((uint64_t)((uint8_t*)(from))[2]) << 40) | \ + (((uint64_t)((uint8_t*)(from))[3]) << 32) | \ + (((uint64_t)((uint8_t*)(from))[4]) << 24) | \ + (((uint64_t)((uint8_t*)(from))[5]) << 16) | \ + (((uint64_t)((uint8_t*)(from))[6]) << 8) | \ + (((uint64_t)((uint8_t*)(from))[7]) ) )) + +#else + +#define _msgpack_be16(x) (x) +#define _msgpack_be32(x) (x) +#define _msgpack_be64(x) (x) + +#define _msgpack_load16(cast, from) ((cast)( \ + (((uint16_t)((uint8_t*)from)[0]) << 8) | \ + (((uint16_t)((uint8_t*)from)[1]) ) )) + +#define _msgpack_load32(cast, from) ((cast)( \ + (((uint32_t)((uint8_t*)from)[0]) << 24) | \ + (((uint32_t)((uint8_t*)from)[1]) << 16) | \ + (((uint32_t)((uint8_t*)from)[2]) << 8) | \ + (((uint32_t)((uint8_t*)from)[3]) ) )) + +#define _msgpack_load64(cast, from) ((cast)( \ + (((uint64_t)((uint8_t*)from)[0]) << 56) | \ + (((uint64_t)((uint8_t*)from)[1]) << 48) | \ + (((uint64_t)((uint8_t*)from)[2]) << 40) | \ + (((uint64_t)((uint8_t*)from)[3]) << 32) | \ + (((uint64_t)((uint8_t*)from)[4]) << 24) | \ + (((uint64_t)((uint8_t*)from)[5]) << 16) | \ + (((uint64_t)((uint8_t*)from)[6]) << 8) | \ + (((uint64_t)((uint8_t*)from)[7]) ) )) +#endif + + +#define _msgpack_store16(to, num) \ + do { uint16_t val = _msgpack_be16(num); memcpy(to, &val, 2); } while(0) +#define _msgpack_store32(to, num) \ + do { uint32_t val = _msgpack_be32(num); memcpy(to, &val, 4); } while(0) +#define _msgpack_store64(to, num) \ + do { uint64_t val = _msgpack_be64(num); memcpy(to, &val, 8); } while(0) + +/* +#define _msgpack_load16(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 2); _msgpack_be16(val); }) +#define _msgpack_load32(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 4); _msgpack_be32(val); }) +#define _msgpack_load64(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 8); _msgpack_be64(val); }) +*/ + + +#endif /* msgpack/sysdep.h */ diff --git a/src/borg/cache_sync/unpack.h b/src/borg/cache_sync/unpack.h new file mode 100644 index 00000000..8332fcff --- /dev/null +++ b/src/borg/cache_sync/unpack.h @@ -0,0 +1,392 @@ +/* + * Borg cache synchronizer, + * based on a MessagePack for Python unpacking routine + * + * Copyright (C) 2009 Naoki INADA + * Copyright (c) 2017 Marian Beermann + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This limits the depth of the structures we can unpack, i.e. how many containers + * are nestable. + */ +#define MSGPACK_EMBED_STACK_SIZE (16) +#include "unpack_define.h" + +// 2**32 - 1025 +#define _MAX_VALUE ( (uint32_t) 4294966271UL ) + +#define MIN(x, y) ((x) < (y) ? (x): (y)) + +#ifdef DEBUG +#define SET_LAST_ERROR(msg) \ + fprintf(stderr, "cache_sync parse error: %s\n", (msg)); \ + u->last_error = (msg); +#else +#define SET_LAST_ERROR(msg) \ + u->last_error = (msg); +#endif + +typedef struct unpack_user { + /* Item.chunks is at the top level; we don't care about anything else, + * only need to track the current level to navigate arbitrary and unknown structure. + * To discern keys from everything else on the top level we use expect_map_item_end. + */ + int level; + + const char *last_error; + + HashIndex *chunks; + + uint64_t num_files; + + /* + * We don't care about most stuff. This flag tells us whether we're at the chunks structure, + * meaning: + * {'foo': 'bar', 'chunks': [...], 'stuff': ... } + * ^-HERE-^ + */ + int inside_chunks; + enum { + /* the next thing is a map key at the Item root level, + * and it might be the "chunks" key we're looking for */ + expect_chunks_map_key, + + /* blocking state to expect_chunks_map_key + * { 'stuff': , 'chunks': [ + * ecmk -> emie -> -> -> -> ecmk ecb eeboce + * (nested containers are tracked via level) + * ecmk=expect_chunks_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin, + * eeboce=expect_entry_begin_or_chunks_end + */ + expect_map_item_end, + + /* next thing must be the chunks array (array) */ + expect_chunks_begin, + + /* next thing must either be another CLE (array) or end of Item.chunks (array_end) */ + expect_entry_begin_or_chunks_end, + + /* + * processing ChunkListEntry tuple: + * expect_key, expect_size, expect_csize, expect_entry_end + */ + /* next thing must be the key (raw, l=32) */ + expect_key, + /* next thing must be the size (int) */ + expect_size, + /* next thing must be the csize (int) */ + expect_csize, + /* next thing must be the end of the CLE (array_end) */ + expect_entry_end, + + expect_item_begin + } expect; + + struct { + char key[32]; + uint32_t csize; + uint32_t size; + } current; +} unpack_user; + +struct unpack_context; +typedef struct unpack_context unpack_context; +typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off); + +#define UNEXPECTED(what) \ + if(u->inside_chunks || u->expect == expect_chunks_map_key) { \ + SET_LAST_ERROR("Unexpected object: " what); \ + return -1; \ + } + +static inline void unpack_init_user_state(unpack_user *u) +{ + u->last_error = NULL; + u->level = 0; + u->inside_chunks = false; + u->expect = expect_item_begin; +} + +static inline int unpack_callback_uint64(unpack_user* u, int64_t d) +{ + switch(u->expect) { + case expect_size: + u->current.size = d; + u->expect = expect_csize; + break; + case expect_csize: + u->current.csize = d; + u->expect = expect_entry_end; + break; + default: + UNEXPECTED("integer"); + } + return 0; +} + +static inline int unpack_callback_uint32(unpack_user* u, uint32_t d) +{ + return unpack_callback_uint64(u, d); +} + +static inline int unpack_callback_uint16(unpack_user* u, uint16_t d) +{ + return unpack_callback_uint64(u, d); +} + +static inline int unpack_callback_uint8(unpack_user* u, uint8_t d) +{ + return unpack_callback_uint64(u, d); +} + +static inline int unpack_callback_int64(unpack_user* u, uint64_t d) +{ + return unpack_callback_uint64(u, d); +} + +static inline int unpack_callback_int32(unpack_user* u, int32_t d) +{ + return unpack_callback_uint64(u, d); +} + +static inline int unpack_callback_int16(unpack_user* u, int16_t d) +{ + return unpack_callback_uint64(u, d); +} + +static inline int unpack_callback_int8(unpack_user* u, int8_t d) +{ + return unpack_callback_uint64(u, d); +} + +/* Ain't got anything to do with those floats */ +static inline int unpack_callback_double(unpack_user* u, double d) +{ + (void)d; + UNEXPECTED("double"); + return 0; +} + +static inline int unpack_callback_float(unpack_user* u, float d) +{ + (void)d; + UNEXPECTED("float"); + return 0; +} + +/* nil/true/false — I/don't/care */ +static inline int unpack_callback_nil(unpack_user* u) +{ + UNEXPECTED("nil"); + return 0; +} + +static inline int unpack_callback_true(unpack_user* u) +{ + UNEXPECTED("true"); + return 0; +} + +static inline int unpack_callback_false(unpack_user* u) +{ + UNEXPECTED("false"); + return 0; +} + +static inline int unpack_callback_array(unpack_user* u, unsigned int n) +{ + switch(u->expect) { + case expect_chunks_begin: + /* b'chunks': [ + * ^ */ + u->expect = expect_entry_begin_or_chunks_end; + break; + case expect_entry_begin_or_chunks_end: + /* b'chunks': [ ( + * ^ */ + if(n != 3) { + SET_LAST_ERROR("Invalid chunk list entry length"); + return -1; + } + u->expect = expect_key; + break; + default: + if(u->inside_chunks) { + SET_LAST_ERROR("Unexpected array start"); + return -1; + } else { + u->level++; + return 0; + } + } + return 0; +} + +static inline int unpack_callback_array_item(unpack_user* u, unsigned int current) +{ + (void)u; (void)current; + return 0; +} + +static inline int unpack_callback_array_end(unpack_user* u) +{ + uint32_t *cache_entry; + uint32_t cache_values[3]; + uint64_t refcount; + + switch(u->expect) { + case expect_entry_end: + /* b'chunks': [ ( b'1234...', 123, 345 ) + * ^ */ + cache_entry = (uint32_t*) hashindex_get(u->chunks, u->current.key); + if(cache_entry) { + refcount = _le32toh(cache_entry[0]); + if(refcount > _MAX_VALUE) { + SET_LAST_ERROR("invalid reference count"); + return -1; + } + refcount += 1; + cache_entry[0] = _htole32(MIN(refcount, _MAX_VALUE)); + } else { + /* refcount, size, csize */ + cache_values[0] = _htole32(1); + cache_values[1] = _htole32(u->current.size); + cache_values[2] = _htole32(u->current.csize); + if(!hashindex_set(u->chunks, u->current.key, cache_values)) { + SET_LAST_ERROR("hashindex_set failed"); + return -1; + } + } + + u->expect = expect_entry_begin_or_chunks_end; + break; + case expect_entry_begin_or_chunks_end: + /* b'chunks': [ ] + * ^ */ + /* end of Item.chunks */ + u->inside_chunks = 0; + u->expect = expect_map_item_end; + break; + default: + if(u->inside_chunks) { + SET_LAST_ERROR("Invalid state transition (unexpected array end)"); + return -1; + } else { + u->level--; + return 0; + } + } + return 0; +} + +static inline int unpack_callback_map(unpack_user* u, unsigned int n) +{ + (void)n; + + if(u->level == 0) { + if(u->expect != expect_item_begin) { + SET_LAST_ERROR("Invalid state transition"); /* unreachable */ + return -1; + } + /* This begins a new Item */ + u->expect = expect_chunks_map_key; + } + + if(u->inside_chunks) { + UNEXPECTED("map"); + } + + u->level++; + + return 0; +} + +static inline int unpack_callback_map_item(unpack_user* u, unsigned int current) +{ + (void)u; (void)current; + + if(u->level == 1) { + switch(u->expect) { + case expect_map_item_end: + u->expect = expect_chunks_map_key; + break; + default: + SET_LAST_ERROR("Unexpected map item"); + return -1; + } + } + return 0; +} + +static inline int unpack_callback_map_end(unpack_user* u) +{ + u->level--; + if(u->inside_chunks) { + SET_LAST_ERROR("Unexpected map end"); + return -1; + } + return 0; +} + +static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length) +{ + /* raw = what Borg uses for binary stuff and strings as well */ + /* Note: p points to an internal buffer which contains l bytes. */ + (void)b; + + switch(u->expect) { + case expect_key: + if(length != 32) { + SET_LAST_ERROR("Incorrect key length"); + return -1; + } + memcpy(u->current.key, p, 32); + u->expect = expect_size; + break; + case expect_chunks_map_key: + if(length == 6 && !memcmp("chunks", p, 6)) { + u->expect = expect_chunks_begin; + u->inside_chunks = 1; + u->num_files++; + } else { + u->expect = expect_map_item_end; + } + break; + default: + if(u->inside_chunks) { + SET_LAST_ERROR("Unexpected bytes in chunks structure"); + return -1; + } + } + return 0; +} + +static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length) +{ + (void)u; (void)b; (void)p; (void)length; + UNEXPECTED("bin"); + return 0; +} + +static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int length) +{ + (void)u; (void)base; (void)pos; (void)length; + UNEXPECTED("ext"); + return 0; +} + +#include "unpack_template.h" diff --git a/src/borg/cache_sync/unpack_define.h b/src/borg/cache_sync/unpack_define.h new file mode 100644 index 00000000..d681277b --- /dev/null +++ b/src/borg/cache_sync/unpack_define.h @@ -0,0 +1,95 @@ +/* + * MessagePack unpacking routine template + * + * Copyright (C) 2008-2010 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MSGPACK_UNPACK_DEFINE_H__ +#define MSGPACK_UNPACK_DEFINE_H__ + +#include "sysdep.h" +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef MSGPACK_EMBED_STACK_SIZE +#define MSGPACK_EMBED_STACK_SIZE 32 +#endif + + +// CS is first byte & 0x1f +typedef enum { + CS_HEADER = 0x00, // nil + + //CS_ = 0x01, + //CS_ = 0x02, // false + //CS_ = 0x03, // true + + CS_BIN_8 = 0x04, + CS_BIN_16 = 0x05, + CS_BIN_32 = 0x06, + + CS_EXT_8 = 0x07, + CS_EXT_16 = 0x08, + CS_EXT_32 = 0x09, + + CS_FLOAT = 0x0a, + CS_DOUBLE = 0x0b, + CS_UINT_8 = 0x0c, + CS_UINT_16 = 0x0d, + CS_UINT_32 = 0x0e, + CS_UINT_64 = 0x0f, + CS_INT_8 = 0x10, + CS_INT_16 = 0x11, + CS_INT_32 = 0x12, + CS_INT_64 = 0x13, + + //CS_FIXEXT1 = 0x14, + //CS_FIXEXT2 = 0x15, + //CS_FIXEXT4 = 0x16, + //CS_FIXEXT8 = 0x17, + //CS_FIXEXT16 = 0x18, + + CS_RAW_8 = 0x19, + CS_RAW_16 = 0x1a, + CS_RAW_32 = 0x1b, + CS_ARRAY_16 = 0x1c, + CS_ARRAY_32 = 0x1d, + CS_MAP_16 = 0x1e, + CS_MAP_32 = 0x1f, + + ACS_RAW_VALUE, + ACS_BIN_VALUE, + ACS_EXT_VALUE, +} msgpack_unpack_state; + + +typedef enum { + CT_ARRAY_ITEM, + CT_MAP_KEY, + CT_MAP_VALUE, +} msgpack_container_type; + + +#ifdef __cplusplus +} +#endif + +#endif /* msgpack/unpack_define.h */ diff --git a/src/borg/cache_sync/unpack_template.h b/src/borg/cache_sync/unpack_template.h new file mode 100644 index 00000000..9fc1a34d --- /dev/null +++ b/src/borg/cache_sync/unpack_template.h @@ -0,0 +1,365 @@ +/* + * MessagePack unpacking routine template + * + * Copyright (C) 2008-2010 FURUHASHI Sadayuki + * Copyright (c) 2017 Marian Beermann + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * This has been slightly adapted from the vanilla msgpack-{c, python} version. + * Since cache_sync does not intend to build an output data structure, + * msgpack_unpack_object and all of its uses was removed. + */ + +#ifndef USE_CASE_RANGE +#if !defined(_MSC_VER) +#define USE_CASE_RANGE +#endif +#endif + +typedef struct unpack_stack { + size_t size; + size_t count; + unsigned int ct; +} unpack_stack; + +struct unpack_context { + unpack_user user; + unsigned int cs; + unsigned int trail; + unsigned int top; + unpack_stack stack[MSGPACK_EMBED_STACK_SIZE]; +}; + +static inline void unpack_init(unpack_context* ctx) +{ + ctx->cs = CS_HEADER; + ctx->trail = 0; + ctx->top = 0; + unpack_init_user_state(&ctx->user); +} + +#define construct 1 + +static inline int unpack_execute(unpack_context* ctx, const char* data, size_t len, size_t* off) +{ + const unsigned char* p = (unsigned char*)data + *off; + const unsigned char* const pe = (unsigned char*)data + len; + const void* n = NULL; + + unsigned int trail = ctx->trail; + unsigned int cs = ctx->cs; + unsigned int top = ctx->top; + unpack_stack* stack = ctx->stack; + unpack_user* user = &ctx->user; + + unpack_stack* c = NULL; + + int ret; + + assert(len >= *off); + +#define construct_cb(name) \ + construct && unpack_callback ## name + +#define push_simple_value(func) \ + if(construct_cb(func)(user) < 0) { goto _failed; } \ + goto _push +#define push_fixed_value(func, arg) \ + if(construct_cb(func)(user, arg) < 0) { goto _failed; } \ + goto _push +#define push_variable_value(func, base, pos, len) \ + if(construct_cb(func)(user, \ + (const char*)base, (const char*)pos, len) < 0) { goto _failed; } \ + goto _push + +#define again_fixed_trail(_cs, trail_len) \ + trail = trail_len; \ + cs = _cs; \ + goto _fixed_trail_again +#define again_fixed_trail_if_zero(_cs, trail_len, ifzero) \ + trail = trail_len; \ + if(trail == 0) { goto ifzero; } \ + cs = _cs; \ + goto _fixed_trail_again + +#define start_container(func, count_, ct_) \ + if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ + if(construct_cb(func)(user, count_) < 0) { goto _failed; } \ + if((count_) == 0) { \ + if (construct_cb(func##_end)(user) < 0) { goto _failed; } \ + goto _push; } \ + stack[top].ct = ct_; \ + stack[top].size = count_; \ + stack[top].count = 0; \ + ++top; \ + goto _header_again + +#define NEXT_CS(p) ((unsigned int)*p & 0x1f) + +#ifdef USE_CASE_RANGE +#define SWITCH_RANGE_BEGIN switch(*p) { +#define SWITCH_RANGE(FROM, TO) case FROM ... TO: +#define SWITCH_RANGE_DEFAULT default: +#define SWITCH_RANGE_END } +#else +#define SWITCH_RANGE_BEGIN { if(0) { +#define SWITCH_RANGE(FROM, TO) } else if(FROM <= *p && *p <= TO) { +#define SWITCH_RANGE_DEFAULT } else { +#define SWITCH_RANGE_END } } +#endif + + if(p == pe) { goto _out; } + do { + switch(cs) { + case CS_HEADER: + SWITCH_RANGE_BEGIN + SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum + push_fixed_value(_uint8, *(uint8_t*)p); + SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum + push_fixed_value(_int8, *(int8_t*)p); + SWITCH_RANGE(0xc0, 0xdf) // Variable + switch(*p) { + case 0xc0: // nil + push_simple_value(_nil); + //case 0xc1: // never used + case 0xc2: // false + push_simple_value(_false); + case 0xc3: // true + push_simple_value(_true); + case 0xc4: // bin 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc5: // bin 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc6: // bin 32 + again_fixed_trail(NEXT_CS(p), 4); + case 0xc7: // ext 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc8: // ext 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc9: // ext 32 + again_fixed_trail(NEXT_CS(p), 4); + case 0xca: // float + case 0xcb: // double + case 0xcc: // unsigned int 8 + case 0xcd: // unsigned int 16 + case 0xce: // unsigned int 32 + case 0xcf: // unsigned int 64 + case 0xd0: // signed int 8 + case 0xd1: // signed int 16 + case 0xd2: // signed int 32 + case 0xd3: // signed int 64 + again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); + case 0xd4: // fixext 1 + case 0xd5: // fixext 2 + case 0xd6: // fixext 4 + case 0xd7: // fixext 8 + again_fixed_trail_if_zero(ACS_EXT_VALUE, + (1 << (((unsigned int)*p) & 0x03))+1, + _ext_zero); + case 0xd8: // fixext 16 + again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); + case 0xd9: // str 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xda: // raw 16 + case 0xdb: // raw 32 + case 0xdc: // array 16 + case 0xdd: // array 32 + case 0xde: // map 16 + case 0xdf: // map 32 + again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); + default: + goto _failed; + } + SWITCH_RANGE(0xa0, 0xbf) // FixRaw + again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); + SWITCH_RANGE(0x90, 0x9f) // FixArray + start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM); + SWITCH_RANGE(0x80, 0x8f) // FixMap + start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); + + SWITCH_RANGE_DEFAULT + goto _failed; + SWITCH_RANGE_END + // end CS_HEADER + + + _fixed_trail_again: + ++p; + + default: + if((size_t)(pe - p) < trail) { goto _out; } + n = p; p += trail - 1; + switch(cs) { + case CS_EXT_8: + again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); + case CS_EXT_16: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load16(uint16_t,n)+1, + _ext_zero); + case CS_EXT_32: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load32(uint32_t,n)+1, + _ext_zero); + case CS_FLOAT: { + union { uint32_t i; float f; } mem; + mem.i = _msgpack_load32(uint32_t,n); + push_fixed_value(_float, mem.f); } + case CS_DOUBLE: { + union { uint64_t i; double f; } mem; + mem.i = _msgpack_load64(uint64_t,n); +#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi + // https://github.com/msgpack/msgpack-perl/pull/1 + mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); +#endif + push_fixed_value(_double, mem.f); } + case CS_UINT_8: + push_fixed_value(_uint8, *(uint8_t*)n); + case CS_UINT_16: + push_fixed_value(_uint16, _msgpack_load16(uint16_t,n)); + case CS_UINT_32: + push_fixed_value(_uint32, _msgpack_load32(uint32_t,n)); + case CS_UINT_64: + push_fixed_value(_uint64, _msgpack_load64(uint64_t,n)); + + case CS_INT_8: + push_fixed_value(_int8, *(int8_t*)n); + case CS_INT_16: + push_fixed_value(_int16, _msgpack_load16(int16_t,n)); + case CS_INT_32: + push_fixed_value(_int32, _msgpack_load32(int32_t,n)); + case CS_INT_64: + push_fixed_value(_int64, _msgpack_load64(int64_t,n)); + + case CS_BIN_8: + again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero); + case CS_BIN_16: + again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load16(uint16_t,n), _bin_zero); + case CS_BIN_32: + again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load32(uint32_t,n), _bin_zero); + case ACS_BIN_VALUE: + _bin_zero: + push_variable_value(_bin, data, n, trail); + + case CS_RAW_8: + again_fixed_trail_if_zero(ACS_RAW_VALUE, *(uint8_t*)n, _raw_zero); + case CS_RAW_16: + again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero); + case CS_RAW_32: + again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load32(uint32_t,n), _raw_zero); + case ACS_RAW_VALUE: + _raw_zero: + push_variable_value(_raw, data, n, trail); + + case ACS_EXT_VALUE: + _ext_zero: + push_variable_value(_ext, data, n, trail); + + case CS_ARRAY_16: + start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); + case CS_ARRAY_32: + /* FIXME security guard */ + start_container(_array, _msgpack_load32(uint32_t,n), CT_ARRAY_ITEM); + + case CS_MAP_16: + start_container(_map, _msgpack_load16(uint16_t,n), CT_MAP_KEY); + case CS_MAP_32: + /* FIXME security guard */ + start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); + + default: + goto _failed; + } + } + +_push: + if(top == 0) { goto _finish; } + c = &stack[top-1]; + switch(c->ct) { + case CT_ARRAY_ITEM: + if(construct_cb(_array_item)(user, c->count) < 0) { goto _failed; } + if(++c->count == c->size) { + if (construct_cb(_array_end)(user) < 0) { goto _failed; } + --top; + /*printf("stack pop %d\n", top);*/ + goto _push; + } + goto _header_again; + case CT_MAP_KEY: + c->ct = CT_MAP_VALUE; + goto _header_again; + case CT_MAP_VALUE: + if(construct_cb(_map_item)(user, c->count) < 0) { goto _failed; } + if(++c->count == c->size) { + if (construct_cb(_map_end)(user) < 0) { goto _failed; } + --top; + /*printf("stack pop %d\n", top);*/ + goto _push; + } + c->ct = CT_MAP_KEY; + goto _header_again; + + default: + goto _failed; + } + +_header_again: + cs = CS_HEADER; + ++p; + } while(p != pe); + goto _out; + + +_finish: + if (!construct) + unpack_callback_nil(user); + ++p; + ret = 1; + /* printf("-- finish --\n"); */ + goto _end; + +_failed: + /* printf("** FAILED **\n"); */ + ret = -1; + goto _end; + +_out: + ret = 0; + goto _end; + +_end: + ctx->cs = cs; + ctx->trail = trail; + ctx->top = top; + *off = p - (const unsigned char*)data; + + return ret; +#undef construct_cb +} + +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END +#undef push_simple_value +#undef push_fixed_value +#undef push_variable_value +#undef again_fixed_trail +#undef again_fixed_trail_if_zero +#undef start_container +#undef construct + +#undef NEXT_CS + +/* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx index 560e14c8..d2b44f68 100644 --- a/src/borg/chunker.pyx +++ b/src/borg/chunker.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -API_VERSION = 2 +API_VERSION = '1.1_01' from libc.stdlib cimport free @@ -50,11 +50,11 @@ cdef class Chunker: return chunker_process(self.chunker) -def buzhash(unsigned char *data, unsigned long seed): +def buzhash(data, unsigned long seed): cdef uint32_t *table cdef uint32_t sum table = buzhash_init_table(seed & 0xffffffff) - sum = c_buzhash(data, len(data), table) + sum = c_buzhash( data, len(data), table) free(table) return sum diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 3bb88def..e3c34e04 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -1,12 +1,38 @@ +""" +borg.compress +============= + +Compression is applied to chunks after ID hashing (so the ID is a direct function of the +plain chunk, compression is irrelevant to it), and of course before encryption. + +The "auto" mode (e.g. --compression auto,lzma,4) is implemented as a meta Compressor, +meaning that Auto acts like a Compressor, but defers actual work to others (namely +LZ4 as a heuristic whether compression is worth it, and the specified Compressor +for the actual compression). + +Decompression is normally handled through Compressor.decompress which will detect +which compressor has been used to compress the data and dispatch to the correct +decompressor. +""" + import zlib + try: import lzma except ImportError: lzma = None +from .helpers import Buffer, DecompressionError + +API_VERSION = '1.1_03' + cdef extern from "lz4.h": int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil + int LZ4_compressBound(int inputSize) nogil + + +buffer = Buffer(bytearray, size=0) cdef class CompressorBase: @@ -26,11 +52,34 @@ cdef class CompressorBase: def __init__(self, **kwargs): pass + def decide(self, data): + """ + Return which compressor will perform the actual compression for *data*. + + This exists for a very specific case: If borg recreate is instructed to recompress + using Auto compression it needs to determine the _actual_ target compression of a chunk + in order to detect whether it should be recompressed. + + For all Compressors that are not Auto this always returns *self*. + """ + return self + def compress(self, data): + """ + Compress *data* (bytes) and return bytes result. Prepend the ID bytes of this compressor, + which is needed so that the correct decompressor can be used for decompression. + """ # add ID bytes return self.ID + data def decompress(self, data): + """ + Decompress *data* (bytes) and return bytes result. The leading Compressor ID + bytes need to be present. + + Only handles input generated by _this_ Compressor - for a general purpose + decompression method see *Compressor.decompress*. + """ # strip ID bytes return data[2:] @@ -52,40 +101,31 @@ class CNONE(CompressorBase): return data -cdef class LZ4(CompressorBase): +class LZ4(CompressorBase): """ raw LZ4 compression / decompression (liblz4). Features: - lz4 is super fast - wrapper releases CPython's GIL to support multithreaded code - - buffer given by caller, avoiding frequent reallocation and buffer duplication - uses safe lz4 methods that never go beyond the end of the output buffer - - But beware: - - this is not very generic, the given buffer MUST be large enough to - handle all compression or decompression output (or it will fail). - - you must not do method calls to the same LZ4 instance from different - threads at the same time - create one LZ4 instance per thread! """ ID = b'\x01\x00' name = 'lz4' - cdef char *buffer # helper buffer for (de)compression output - cdef int bufsize # size of this buffer - - def __cinit__(self, **kwargs): - buffer = kwargs['buffer'] - self.buffer = buffer - self.bufsize = len(buffer) + def __init__(self, **kwargs): + pass def compress(self, idata): if not isinstance(idata, bytes): idata = bytes(idata) # code below does not work with memoryview cdef int isize = len(idata) - cdef int osize = self.bufsize + cdef int osize cdef char *source = idata - cdef char *dest = self.buffer + cdef char *dest + osize = LZ4_compressBound(isize) + buf = buffer.get(osize) + dest = buf with nogil: osize = LZ4_compress_limitedOutput(source, dest, isize, osize) if not osize: @@ -97,15 +137,29 @@ cdef class LZ4(CompressorBase): idata = bytes(idata) # code below does not work with memoryview idata = super().decompress(idata) cdef int isize = len(idata) - cdef int osize = self.bufsize + cdef int osize + cdef int rsize cdef char *source = idata - cdef char *dest = self.buffer - with nogil: - osize = LZ4_decompress_safe(source, dest, isize, osize) - if osize < 0: - # malformed input data, buffer too small, ... - raise Exception('lz4 decompress failed') - return dest[:osize] + cdef char *dest + # a bit more than 8MB is enough for the usual data sizes yielded by the chunker. + # allocate more if isize * 3 is already bigger, to avoid having to resize often. + osize = max(int(1.1 * 2**23), isize * 3) + while True: + try: + buf = buffer.get(osize) + except MemoryError: + raise DecompressionError('MemoryError') + dest = buf + with nogil: + rsize = LZ4_decompress_safe(source, dest, isize, osize) + if rsize >= 0: + break + if osize > 2 ** 27: # 128MiB (should be enough, considering max. repo obj size and very good compression) + # this is insane, get out of here + raise DecompressionError('lz4 decompress failed') + # likely the buffer was too small, get a bigger one: + osize = int(1.5 * osize) + return dest[:rsize] class LZMA(CompressorBase): @@ -128,7 +182,10 @@ class LZMA(CompressorBase): def decompress(self, data): data = super().decompress(data) - return lzma.decompress(data) + try: + return lzma.decompress(data) + except lzma.LZMAError as e: + raise DecompressionError(str(e)) from None class ZLIB(CompressorBase): @@ -157,15 +214,85 @@ class ZLIB(CompressorBase): def decompress(self, data): # note: for compatibility no super call, do not strip ID bytes - return zlib.decompress(data) + try: + return zlib.decompress(data) + except zlib.error as e: + raise DecompressionError(str(e)) from None +class Auto(CompressorBase): + """ + Meta-Compressor that decides which compression to use based on LZ4's ratio. + + As a meta-Compressor the actual compression is deferred to other Compressors, + therefore this Compressor has no ID, no detect() and no decompress(). + """ + + ID = None + name = 'auto' + + def __init__(self, compressor): + super().__init__() + self.compressor = compressor + self.lz4 = get_compressor('lz4') + self.none = get_compressor('none') + + def _decide(self, data): + """ + Decides what to do with *data*. Returns (compressor, lz4_data). + + *lz4_data* is the LZ4 result if *compressor* is LZ4 as well, otherwise it is None. + """ + lz4_data = self.lz4.compress(data) + ratio = len(lz4_data) / len(data) + if ratio < 0.97: + return self.compressor, lz4_data + elif ratio < 1: + return self.lz4, lz4_data + else: + return self.none, None + + def decide(self, data): + return self._decide(data)[0] + + def compress(self, data): + compressor, lz4_data = self._decide(data) + if compressor is self.lz4: + # we know that trying to compress with expensive compressor is likely pointless, + # but lz4 managed to at least squeeze the data a bit. + return lz4_data + if compressor is self.none: + # we know that trying to compress with expensive compressor is likely pointless + # and also lz4 did not manage to squeeze the data (not even a bit). + uncompressed_data = compressor.compress(data) + return uncompressed_data + # if we get here, the decider decided to try the expensive compressor. + # we also know that lz4_data is smaller than uncompressed data. + exp_compressed_data = compressor.compress(data) + ratio = len(exp_compressed_data) / len(lz4_data) + if ratio < 0.99: + # the expensive compressor managed to squeeze the data significantly better than lz4. + return exp_compressed_data + else: + # otherwise let's just store the lz4 data, which decompresses extremely fast. + return lz4_data + + def decompress(self, data): + raise NotImplementedError + + def detect(cls, data): + raise NotImplementedError + + +# Maps valid compressor names to their class COMPRESSOR_TABLE = { CNONE.name: CNONE, LZ4.name: LZ4, ZLIB.name: ZLIB, LZMA.name: LZMA, + Auto.name: Auto, } +# List of possible compression types. Does not include Auto, since it is a meta-Compressor. COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ] # check fast stuff first def get_compressor(name, **kwargs): @@ -186,14 +313,53 @@ class Compressor: return self.compressor.compress(data) def decompress(self, data): + compressor_cls = self.detect(data) + return compressor_cls(**self.params).decompress(data) + + @staticmethod + def detect(data): hdr = bytes(data[:2]) # detect() does not work with memoryview for cls in COMPRESSOR_LIST: if cls.detect(hdr): - return cls(**self.params).decompress(data) + return cls else: raise ValueError('No decompressor for this data found: %r.', data[:2]) -# a buffer used for (de)compression result, which can be slightly bigger -# than the chunk buffer in the worst (incompressible data) case, add 10%: -COMPR_BUFFER = bytes(int(1.1 * 2 ** 23)) # CHUNK_MAX_EXP == 23 +class CompressionSpec: + def __init__(self, s): + values = s.split(',') + count = len(values) + if count < 1: + raise ValueError + # --compression algo[,level] + self.name = values[0] + if self.name in ('none', 'lz4', ): + return + elif self.name in ('zlib', 'lzma', ): + if count < 2: + level = 6 # default compression level in py stdlib + elif count == 2: + level = int(values[1]) + if not 0 <= level <= 9: + raise ValueError + else: + raise ValueError + self.level = level + elif self.name == 'auto': + if 2 <= count <= 3: + compression = ','.join(values[1:]) + else: + raise ValueError + self.inner = CompressionSpec(compression) + else: + raise ValueError + + @property + def compressor(self): + if self.name in ('none', 'lz4', ): + return get_compressor(self.name) + elif self.name in ('zlib', 'lzma', ): + return get_compressor(self.name, level=self.level) + elif self.name == 'auto': + return get_compressor(self.name, compressor=self.inner.compressor) diff --git a/src/borg/constants.py b/src/borg/constants.py index ba5aa073..05a91339 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -1,8 +1,8 @@ # this set must be kept complete, otherwise the RobustUnpacker might malfunction: ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master', - 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', - 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', 'win_dacl', - 'win_sacl', 'user_sid', ]) + 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'size', + 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', + 'part', 'win_dacl', 'win_sacl', 'user_sid']) # this is the set of keys that are always present in items: REQUIRED_ITEM_KEYS = frozenset(['path', 'mtime', ]) @@ -15,9 +15,7 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us # this is the set of keys that are always present in archives: REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ]) -ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end') - -# default umask, overriden by --umask, defaults to read/write only for owner +# default umask, overridden by --umask, defaults to read/write only for owner UMASK_DEFAULT = 0o077 CACHE_TAG_NAME = 'CACHEDIR.TAG' @@ -29,8 +27,27 @@ CACHE_TAG_CONTENTS = b'Signature: 8a477f597d28d172789f06886806bc55' # bytes. That's why it's 500 MiB instead of 512 MiB. DEFAULT_MAX_SEGMENT_SIZE = 500 * 1024 * 1024 -# A few hundred files per directory to go easy on filesystems which don't like too many files per dir (NTFS) -DEFAULT_SEGMENTS_PER_DIR = 500 +# 20 MiB minus 41 bytes for a Repository header (because the "size" field in the Repository includes +# the header, and the total size was set to 20 MiB). +MAX_DATA_SIZE = 20971479 + +# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of +# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID. +MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41 # see LoggedIO.put_header_fmt.size assertion in repository module +assert MAX_OBJECT_SIZE == 20 * 1024 * 1024 + +# borg.remote read() buffer size +BUFSIZE = 10 * 1024 * 1024 + +# to use a safe, limited unpacker, we need to set a upper limit to the archive count in the manifest. +# this does not mean that you can always really reach that number, because it also needs to be less than +# MAX_DATA_SIZE or it will trigger the check for that. +MAX_ARCHIVES = 400000 + +# repo.list() / .scan() result count limit the borg client uses +LIST_SCAN_LIMIT = 100000 + +DEFAULT_SEGMENTS_PER_DIR = 1000 CHUNK_MIN_EXP = 19 # 2**19 == 512kiB CHUNK_MAX_EXP = 23 # 2**23 == 8MiB @@ -41,7 +58,11 @@ HASH_MASK_BITS = 21 # results in ~2MiB chunks statistically CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE) # chunker params for the items metadata stream, finer granularity -ITEMS_CHUNKER_PARAMS = (12, 16, 14, HASH_WINDOW_SIZE) +ITEMS_CHUNKER_PARAMS = (15, 19, 17, HASH_WINDOW_SIZE) + +# operating mode of the files cache (for fast skipping of unchanged files) +DEFAULT_FILES_CACHE_MODE_UI = 'ctime,size,inode' +DEFAULT_FILES_CACHE_MODE = 'cis' # == CacheMode(DEFAULT_FILES_CACHE_MODE_UI) # return codes returned by borg command # when borg is killed by signal N, rc = 128 + N @@ -49,6 +70,21 @@ EXIT_SUCCESS = 0 # everything done, no problems EXIT_WARNING = 1 # reached normal end of operation, but there were issues EXIT_ERROR = 2 # terminated abruptly, did not reach end of operation +# never use datetime.isoformat(), it is evil. always use one of these: +# datetime.strftime(ISO_FORMAT) # output always includes .microseconds +# datetime.strftime(ISO_FORMAT_NO_USECS) # output never includes microseconds +ISO_FORMAT_NO_USECS = '%Y-%m-%dT%H:%M:%S' +ISO_FORMAT = ISO_FORMAT_NO_USECS + '.%f' + DASHES = '-' * 78 PBKDF2_ITERATIONS = 100000 + + +REPOSITORY_README = """This is a Borg Backup repository. +See https://borgbackup.readthedocs.io/ +""" + +CACHE_README = """This is a Borg Backup cache. +See https://borgbackup.readthedocs.io/ +""" diff --git a/src/borg/crypto.pyx b/src/borg/crypto.pyx deleted file mode 100644 index 286d596b..00000000 --- a/src/borg/crypto.pyx +++ /dev/null @@ -1,203 +0,0 @@ -"""A thin OpenSSL wrapper""" - -from libc.stdlib cimport malloc, free -from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release - -API_VERSION = 3 - - -cdef extern from "openssl/evp.h": - ctypedef struct EVP_MD: - pass - ctypedef struct EVP_CIPHER: - pass - ctypedef struct EVP_CIPHER_CTX: - pass - ctypedef struct ENGINE: - pass - const EVP_CIPHER *EVP_aes_256_ctr() - EVP_CIPHER_CTX *EVP_CIPHER_CTX_new() - void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *a) - - int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl, - const unsigned char *key, const unsigned char *iv) - int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl, - const unsigned char *key, const unsigned char *iv) - int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, - const unsigned char *in_, int inl) - int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, - const unsigned char *in_, int inl) - int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) - int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) - - EVP_MD *EVP_sha256() nogil - - -cdef extern from "openssl/hmac.h": - unsigned char *HMAC(const EVP_MD *evp_md, - const void *key, int key_len, - const unsigned char *data, int data_len, - unsigned char *md, unsigned int *md_len) nogil - -import struct - -_int = struct.Struct('>I') -_long = struct.Struct('>Q') -_2long = struct.Struct('>QQ') - -bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0] -bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] -long_to_bytes = lambda x: _long.pack(x) - - -def bytes16_to_int(b, offset=0): - h, l = _2long.unpack_from(b, offset) - return (h << 64) + l - - -def int_to_bytes16(i): - max_uint64 = 0xffffffffffffffff - l = i & max_uint64 - h = (i >> 64) & max_uint64 - return _2long.pack(h, l) - - -def increment_iv(iv, amount=1): - """ - Increment the IV by the given amount (default 1). - - :param iv: input IV, 16 bytes (128 bit) - :param amount: increment value - :return: input_IV + amount, 16 bytes (128 bit) - """ - assert len(iv) == 16 - iv = bytes16_to_int(iv) - iv += amount - iv = int_to_bytes16(iv) - return iv - - -def num_aes_blocks(int length): - """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data. - Note: this is only correct for modes without padding, like AES-CTR. - """ - return (length + 15) // 16 - - -cdef Py_buffer ro_buffer(object data) except *: - cdef Py_buffer view - PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) - return view - - -cdef class AES: - """A thin wrapper around the OpenSSL EVP cipher API - """ - cdef EVP_CIPHER_CTX *ctx - cdef int is_encrypt - cdef unsigned char iv_orig[16] - cdef long long blocks - - def __cinit__(self, is_encrypt, key, iv=None): - self.ctx = EVP_CIPHER_CTX_new() - self.is_encrypt = is_encrypt - # Set cipher type and mode - cipher_mode = EVP_aes_256_ctr() - if self.is_encrypt: - if not EVP_EncryptInit_ex(self.ctx, cipher_mode, NULL, NULL, NULL): - raise Exception('EVP_EncryptInit_ex failed') - else: # decrypt - if not EVP_DecryptInit_ex(self.ctx, cipher_mode, NULL, NULL, NULL): - raise Exception('EVP_DecryptInit_ex failed') - self.reset(key, iv) - - def __dealloc__(self): - EVP_CIPHER_CTX_free(self.ctx) - - def reset(self, key=None, iv=None): - cdef const unsigned char *key2 = NULL - cdef const unsigned char *iv2 = NULL - if key: - key2 = key - if iv: - iv2 = iv - assert isinstance(iv, bytes) and len(iv) == 16 - for i in range(16): - self.iv_orig[i] = iv[i] - self.blocks = 0 # number of AES blocks encrypted starting with iv_orig - # Initialise key and IV - if self.is_encrypt: - if not EVP_EncryptInit_ex(self.ctx, NULL, NULL, key2, iv2): - raise Exception('EVP_EncryptInit_ex failed') - else: # decrypt - if not EVP_DecryptInit_ex(self.ctx, NULL, NULL, key2, iv2): - raise Exception('EVP_DecryptInit_ex failed') - - @property - def iv(self): - return increment_iv(self.iv_orig[:16], self.blocks) - - def encrypt(self, data): - cdef Py_buffer data_buf = ro_buffer(data) - cdef int inl = len(data) - cdef int ctl = 0 - cdef int outl = 0 - # note: modes that use padding, need up to one extra AES block (16b) - cdef unsigned char *out = malloc(inl+16) - if not out: - raise MemoryError - try: - if not EVP_EncryptUpdate(self.ctx, out, &outl, data_buf.buf, inl): - raise Exception('EVP_EncryptUpdate failed') - ctl = outl - if not EVP_EncryptFinal_ex(self.ctx, out+ctl, &outl): - raise Exception('EVP_EncryptFinal failed') - ctl += outl - self.blocks += num_aes_blocks(ctl) - return out[:ctl] - finally: - free(out) - PyBuffer_Release(&data_buf) - - def decrypt(self, data): - cdef Py_buffer data_buf = ro_buffer(data) - cdef int inl = len(data) - cdef int ptl = 0 - cdef int outl = 0 - # note: modes that use padding, need up to one extra AES block (16b). - # This is what the openssl docs say. I am not sure this is correct, - # but OTOH it will not cause any harm if our buffer is a little bigger. - cdef unsigned char *out = malloc(inl+16) - if not out: - raise MemoryError - try: - if not EVP_DecryptUpdate(self.ctx, out, &outl, data_buf.buf, inl): - raise Exception('EVP_DecryptUpdate failed') - ptl = outl - if EVP_DecryptFinal_ex(self.ctx, out+ptl, &outl) <= 0: - # this error check is very important for modes with padding or - # authentication. for them, a failure here means corrupted data. - # CTR mode does not use padding nor authentication. - raise Exception('EVP_DecryptFinal failed') - ptl += outl - self.blocks += num_aes_blocks(inl) - return out[:ptl] - finally: - free(out) - PyBuffer_Release(&data_buf) - - -def hmac_sha256(key, data): - md = bytes(32) - cdef Py_buffer data_buf = ro_buffer(data) - cdef const unsigned char *key_ptr = key - cdef int key_len = len(key) - cdef unsigned char *md_ptr = md - try: - with nogil: - rc = HMAC(EVP_sha256(), key_ptr, key_len, data_buf.buf, data_buf.len, md_ptr, NULL) - if rc != md_ptr: - raise Exception('HMAC(EVP_sha256) failed') - finally: - PyBuffer_Release(&data_buf) - return md diff --git a/src/borg/crypto/__init__.py b/src/borg/crypto/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/borg/crypto/_crypto_helpers.c b/src/borg/crypto/_crypto_helpers.c new file mode 100644 index 00000000..0a433bb5 --- /dev/null +++ b/src/borg/crypto/_crypto_helpers.c @@ -0,0 +1,35 @@ +/* some helpers, so our code also works with OpenSSL 1.0.x */ + +#include +#include +#include + +#if OPENSSL_VERSION_NUMBER < 0x10100000L + +HMAC_CTX *HMAC_CTX_new(void) +{ + HMAC_CTX *ctx = OPENSSL_malloc(sizeof(*ctx)); + if (ctx != NULL) { + memset(ctx, 0, sizeof *ctx); + HMAC_CTX_cleanup(ctx); + } + return ctx; +} + +void HMAC_CTX_free(HMAC_CTX *ctx) +{ + if (ctx != NULL) { + HMAC_CTX_cleanup(ctx); + OPENSSL_free(ctx); + } +} + +const EVP_CIPHER *EVP_aes_256_ocb(void){ /* dummy, so that code compiles */ + return NULL; +} + +const EVP_CIPHER *EVP_chacha20_poly1305(void){ /* dummy, so that code compiles */ + return NULL; +} + +#endif diff --git a/src/borg/crypto/_crypto_helpers.h b/src/borg/crypto/_crypto_helpers.h new file mode 100644 index 00000000..bb9afc41 --- /dev/null +++ b/src/borg/crypto/_crypto_helpers.h @@ -0,0 +1,15 @@ +/* some helpers, so our code also works with OpenSSL 1.0.x */ + +#include +#include +#include + +#if OPENSSL_VERSION_NUMBER < 0x10100000L + +HMAC_CTX *HMAC_CTX_new(void); +void HMAC_CTX_free(HMAC_CTX *ctx); + +const EVP_CIPHER *EVP_aes_256_ocb(void); /* dummy, so that code compiles */ +const EVP_CIPHER *EVP_chacha20_poly1305(void); /* dummy, so that code compiles */ + +#endif diff --git a/src/borg/crypto/file_integrity.py b/src/borg/crypto/file_integrity.py new file mode 100644 index 00000000..84b22a7e --- /dev/null +++ b/src/borg/crypto/file_integrity.py @@ -0,0 +1,237 @@ +import hashlib +import io +import json +import os +from hmac import compare_digest + +from ..helpers import IntegrityError +from ..logger import create_logger +from ..algorithms.checksums import StreamingXXH64 + +logger = create_logger() + + +class FileLikeWrapper: + def __enter__(self): + self.fd.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.fd.__exit__(exc_type, exc_val, exc_tb) + + def tell(self): + return self.fd.tell() + + def seek(self, offset, whence=io.SEEK_SET): + return self.fd.seek(offset, whence) + + def write(self, data): + return self.fd.write(data) + + def read(self, n=None): + return self.fd.read(n) + + def flush(self): + self.fd.flush() + + def fileno(self): + return self.fd.fileno() + + +class FileHashingWrapper(FileLikeWrapper): + """ + Wrapper for file-like objects that computes a hash on-the-fly while reading/writing. + + WARNING: Seeks should only be used to query the size of the file, not + to skip data, because skipped data isn't read and not hashed into the digest. + + Similarly skipping while writing to create sparse files is also not supported. + + Data has to be read/written in a symmetric fashion, otherwise different + digests will be generated. + + Note: When used as a context manager read/write operations outside the enclosed scope + are illegal. + """ + + ALGORITHM = None + FACTORY = None + + def __init__(self, backing_fd, write): + self.fd = backing_fd + self.writing = write + self.hash = self.FACTORY() + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is None: + self.hash_length() + super().__exit__(exc_type, exc_val, exc_tb) + + def write(self, data): + """ + Write *data* to backing file and update internal state. + """ + n = super().write(data) + self.hash.update(data) + return n + + def read(self, n=None): + """ + Read *data* from backing file (*n* has the usual meaning) and update internal state. + """ + data = super().read(n) + self.hash.update(data) + return data + + def hexdigest(self): + """ + Return current digest bytes as hex-string. + + Note: this can be called multiple times. + """ + return self.hash.hexdigest() + + def update(self, data: bytes): + self.hash.update(data) + + def hash_length(self, seek_to_end=False): + if seek_to_end: + # Add length of file to the hash to avoid problems if only a prefix is read. + self.seek(0, io.SEEK_END) + self.hash.update(str(self.tell()).encode()) + + +class SHA512FileHashingWrapper(FileHashingWrapper): + ALGORITHM = 'SHA512' + FACTORY = hashlib.sha512 + + +class XXH64FileHashingWrapper(FileHashingWrapper): + ALGORITHM = 'XXH64' + FACTORY = StreamingXXH64 + + +SUPPORTED_ALGORITHMS = { + SHA512FileHashingWrapper.ALGORITHM: SHA512FileHashingWrapper, + XXH64FileHashingWrapper.ALGORITHM: XXH64FileHashingWrapper, +} + + +class FileIntegrityError(IntegrityError): + """File failed integrity check: {}""" + + +class IntegrityCheckedFile(FileLikeWrapper): + def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None): + self.path = path + self.writing = write + mode = 'wb' if write else 'rb' + self.file_fd = override_fd or open(path, mode) + self.digests = {} + + hash_cls = XXH64FileHashingWrapper + + if not write: + algorithm_and_digests = self.load_integrity_data(path, integrity_data) + if algorithm_and_digests: + algorithm, self.digests = algorithm_and_digests + hash_cls = SUPPORTED_ALGORITHMS[algorithm] + + # TODO: When we're reading but don't have any digests, i.e. no integrity file existed, + # TODO: then we could just short-circuit. + + self.fd = self.hasher = hash_cls(backing_fd=self.file_fd, write=write) + self.hash_filename(filename) + + def load_integrity_data(self, path, integrity_data): + if integrity_data is not None: + return self.parse_integrity_data(path, integrity_data) + + def hash_filename(self, filename=None): + # Hash the name of the file, but only the basename, ie. not the path. + # In Borg the name itself encodes the context (eg. index.N, cache, files), + # while the path doesn't matter, and moving e.g. a repository or cache directory is supported. + # Changing the name however imbues a change of context that is not permissible. + # While Borg does not use anything except ASCII in these file names, it's important to use + # the same encoding everywhere for portability. Using os.fsencode() would be wrong. + filename = os.path.basename(filename or self.path) + self.hasher.update(('%10d' % len(filename)).encode()) + self.hasher.update(filename.encode()) + + @classmethod + def parse_integrity_data(cls, path: str, data: str): + try: + integrity_data = json.loads(data) + # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled. + algorithm = integrity_data['algorithm'] + if algorithm not in SUPPORTED_ALGORITHMS: + logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm) + return + digests = integrity_data['digests'] + # Require at least presence of the final digest + digests['final'] + return algorithm, digests + except (ValueError, TypeError, KeyError) as e: + logger.warning('Could not parse integrity data for %s: %s', path, e) + raise FileIntegrityError(path) + + def hash_part(self, partname, is_final=False): + if not self.writing and not self.digests: + return + self.hasher.update(('%10d' % len(partname)).encode()) + self.hasher.update(partname.encode()) + self.hasher.hash_length(seek_to_end=is_final) + digest = self.hasher.hexdigest() + if self.writing: + self.digests[partname] = digest + elif self.digests and not compare_digest(self.digests.get(partname, ''), digest): + raise FileIntegrityError(self.path) + + def __exit__(self, exc_type, exc_val, exc_tb): + exception = exc_type is not None + if not exception: + self.hash_part('final', is_final=True) + self.hasher.__exit__(exc_type, exc_val, exc_tb) + if exception: + return + if self.writing: + self.store_integrity_data(json.dumps({ + 'algorithm': self.hasher.ALGORITHM, + 'digests': self.digests, + })) + elif self.digests: + logger.debug('Verified integrity of %s', self.path) + + def store_integrity_data(self, data: str): + self.integrity_data = data + + +class DetachedIntegrityCheckedFile(IntegrityCheckedFile): + def __init__(self, path, write, filename=None, override_fd=None): + super().__init__(path, write, filename, override_fd) + filename = filename or os.path.basename(path) + output_dir = os.path.dirname(path) + self.output_integrity_file = self.integrity_file_path(os.path.join(output_dir, filename)) + + def load_integrity_data(self, path, integrity_data): + assert not integrity_data, 'Cannot pass explicit integrity_data to DetachedIntegrityCheckedFile' + return self.read_integrity_file(self.path) + + @staticmethod + def integrity_file_path(path): + return path + '.integrity' + + @classmethod + def read_integrity_file(cls, path): + try: + with open(cls.integrity_file_path(path), 'r') as fd: + return cls.parse_integrity_data(path, fd.read()) + except FileNotFoundError: + logger.info('No integrity file found for %s', path) + except OSError as e: + logger.warning('Could not read integrity file for %s: %s', path, e) + raise FileIntegrityError(path) + + def store_integrity_data(self, data: str): + with open(self.output_integrity_file, 'w') as fd: + fd.write(data) diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py new file mode 100644 index 00000000..be3d8130 --- /dev/null +++ b/src/borg/crypto/key.py @@ -0,0 +1,839 @@ +import configparser +import getpass +import os +import shlex +import sys +import textwrap +import subprocess +from binascii import a2b_base64, b2a_base64, hexlify +from hashlib import sha256, sha512, pbkdf2_hmac +from hmac import HMAC, compare_digest + +import msgpack + +from ..logger import create_logger + +logger = create_logger() + +from ..constants import * # NOQA +from ..compress import Compressor +from ..helpers import StableDict +from ..helpers import Error, IntegrityError +from ..helpers import yes +from ..helpers import get_keys_dir, get_security_dir +from ..helpers import get_limited_unpacker +from ..helpers import bin_to_hex +from ..helpers import prepare_subprocess_env +from ..item import Key, EncryptedKey +from ..platform import SaveFile + +from .nonces import NonceManager +from .low_level import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_cipher_blocks, hmac_sha256, blake2b_256, hkdf_hmac_sha512 +from .low_level import AES256_CTR_HMAC_SHA256, AES256_CTR_BLAKE2b + + +class PassphraseWrong(Error): + """passphrase supplied in BORG_PASSPHRASE or by BORG_PASSCOMMAND is incorrect.""" + + +class PasscommandFailure(Error): + """passcommand supplied in BORG_PASSCOMMAND failed: {}""" + + +class PasswordRetriesExceeded(Error): + """exceeded the maximum password retries""" + + +class UnsupportedPayloadError(Error): + """Unsupported payload type {}. A newer version is required to access this repository.""" + + +class UnsupportedManifestError(Error): + """Unsupported manifest envelope. A newer version is required to access this repository.""" + + +class KeyfileNotFoundError(Error): + """No key file for repository {} found in {}.""" + + +class KeyfileInvalidError(Error): + """Invalid key file for repository {} found in {}.""" + + +class KeyfileMismatchError(Error): + """Mismatch between repository {} and key file {}.""" + + +class RepoKeyNotFoundError(Error): + """No key entry found in the config of repository {}.""" + + +class TAMRequiredError(IntegrityError): + __doc__ = textwrap.dedent(""" + Manifest is unauthenticated, but it is required for this repository. + + This either means that you are under attack, or that you modified this repository + with a Borg version older than 1.0.9 after TAM authentication was enabled. + + In the latter case, use "borg upgrade --tam --force '{}'" to re-authenticate the manifest. + """).strip() + traceback = False + + +class TAMInvalid(IntegrityError): + __doc__ = IntegrityError.__doc__ + traceback = False + + def __init__(self): + # Error message becomes: "Data integrity error: Manifest authentication did not verify" + super().__init__('Manifest authentication did not verify') + + +class TAMUnsupportedSuiteError(IntegrityError): + """Could not verify manifest: Unsupported suite {!r}; a newer version is needed.""" + traceback = False + + +class KeyBlobStorage: + NO_STORAGE = 'no_storage' + KEYFILE = 'keyfile' + REPO = 'repository' + + +def key_creator(repository, args): + for key in AVAILABLE_KEY_TYPES: + if key.ARG_NAME == args.encryption: + assert key.ARG_NAME is not None + return key.create(repository, args) + else: + raise ValueError('Invalid encryption mode "%s"' % args.encryption) + + +def key_argument_names(): + return [key.ARG_NAME for key in AVAILABLE_KEY_TYPES if key.ARG_NAME] + + +def identify_key(manifest_data): + key_type = manifest_data[0] + if key_type == PassphraseKey.TYPE: + # we just dispatch to repokey mode and assume the passphrase was migrated to a repokey. + # see also comment in PassphraseKey class. + return RepoKey + + for key in AVAILABLE_KEY_TYPES: + if key.TYPE == key_type: + return key + else: + raise UnsupportedPayloadError(key_type) + + +def key_factory(repository, manifest_data): + return identify_key(manifest_data).detect(repository, manifest_data) + + +def tam_required_file(repository): + security_dir = get_security_dir(bin_to_hex(repository.id)) + return os.path.join(security_dir, 'tam_required') + + +def tam_required(repository): + file = tam_required_file(repository) + return os.path.isfile(file) + + +class KeyBase: + # Numeric key type ID, must fit in one byte. + TYPE = None # override in subclasses + + # Human-readable name + NAME = 'UNDEFINED' + + # Name used in command line / API (e.g. borg init --encryption=...) + ARG_NAME = 'UNDEFINED' + + # Storage type (no key blob storage / keyfile / repo) + STORAGE = KeyBlobStorage.NO_STORAGE + + # Seed for the buzhash chunker (borg.algorithms.chunker.Chunker) + # type: int + chunk_seed = None + + # Whether this *particular instance* is encrypted from a practical point of view, + # i.e. when it's using encryption with a empty passphrase, then + # that may be *technically* called encryption, but for all intents and purposes + # that's as good as not encrypting in the first place, and this member should be False. + # + # The empty passphrase is also special because Borg tries it first when no passphrase + # was supplied, and if an empty passphrase works, then Borg won't ask for one. + logically_encrypted = False + + def __init__(self, repository): + self.TYPE_STR = bytes([self.TYPE]) + self.repository = repository + self.target = None # key location file path / repo obj + # Some commands write new chunks (e.g. rename) but don't take a --compression argument. This duplicates + # the default used by those commands who do take a --compression argument. + self.compressor = Compressor('lz4') + self.decompress = self.compressor.decompress + self.tam_required = True + + def id_hash(self, data): + """Return HMAC hash using the "id" HMAC key + """ + + def encrypt(self, chunk): + pass + + def decrypt(self, id, data, decompress=True): + pass + + def assert_id(self, id, data): + if id: + id_computed = self.id_hash(data) + if not compare_digest(id_computed, id): + raise IntegrityError('Chunk %s: id verification failed' % bin_to_hex(id)) + + def _tam_key(self, salt, context): + return hkdf_hmac_sha512( + ikm=self.id_key + self.enc_key + self.enc_hmac_key, + salt=salt, + info=b'borg-metadata-authentication-' + context, + output_length=64 + ) + + def pack_and_authenticate_metadata(self, metadata_dict, context=b'manifest'): + metadata_dict = StableDict(metadata_dict) + tam = metadata_dict['tam'] = StableDict({ + 'type': 'HKDF_HMAC_SHA512', + 'hmac': bytes(64), + 'salt': os.urandom(64), + }) + packed = msgpack.packb(metadata_dict, unicode_errors='surrogateescape') + tam_key = self._tam_key(tam['salt'], context) + tam['hmac'] = HMAC(tam_key, packed, sha512).digest() + return msgpack.packb(metadata_dict, unicode_errors='surrogateescape') + + def unpack_and_verify_manifest(self, data, force_tam_not_required=False): + """Unpack msgpacked *data* and return (object, did_verify).""" + if data.startswith(b'\xc1' * 4): + # This is a manifest from the future, we can't read it. + raise UnsupportedManifestError() + tam_required = self.tam_required + if force_tam_not_required and tam_required: + logger.warning('Manifest authentication DISABLED.') + tam_required = False + data = bytearray(data) + unpacker = get_limited_unpacker('manifest') + unpacker.feed(data) + unpacked = unpacker.unpack() + if b'tam' not in unpacked: + if tam_required: + raise TAMRequiredError(self.repository._location.canonical_path()) + else: + logger.debug('TAM not found and not required') + return unpacked, False + tam = unpacked.pop(b'tam', None) + if not isinstance(tam, dict): + raise TAMInvalid() + tam_type = tam.get(b'type', b'').decode('ascii', 'replace') + if tam_type != 'HKDF_HMAC_SHA512': + if tam_required: + raise TAMUnsupportedSuiteError(repr(tam_type)) + else: + logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type) + return unpacked, False + tam_hmac = tam.get(b'hmac') + tam_salt = tam.get(b'salt') + if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes): + raise TAMInvalid() + offset = data.index(tam_hmac) + data[offset:offset + 64] = bytes(64) + tam_key = self._tam_key(tam_salt, context=b'manifest') + calculated_hmac = HMAC(tam_key, data, sha512).digest() + if not compare_digest(calculated_hmac, tam_hmac): + raise TAMInvalid() + logger.debug('TAM-verified manifest') + return unpacked, True + + +class PlaintextKey(KeyBase): + TYPE = 0x02 + NAME = 'plaintext' + ARG_NAME = 'none' + STORAGE = KeyBlobStorage.NO_STORAGE + + chunk_seed = 0 + logically_encrypted = False + + def __init__(self, repository): + super().__init__(repository) + self.tam_required = False + + @classmethod + def create(cls, repository, args): + logger.info('Encryption NOT enabled.\nUse the "--encryption=repokey|keyfile" to enable encryption.') + return cls(repository) + + @classmethod + def detect(cls, repository, manifest_data): + return cls(repository) + + def id_hash(self, data): + return sha256(data).digest() + + def encrypt(self, chunk): + data = self.compressor.compress(chunk) + return b''.join([self.TYPE_STR, data]) + + def decrypt(self, id, data, decompress=True): + if data[0] != self.TYPE: + id_str = bin_to_hex(id) if id is not None else '(unknown)' + raise IntegrityError('Chunk %s: Invalid encryption envelope' % id_str) + payload = memoryview(data)[1:] + if not decompress: + return payload + data = self.decompress(payload) + self.assert_id(id, data) + return data + + def _tam_key(self, salt, context): + return salt + context + + +def random_blake2b_256_key(): + # This might look a bit curious, but is the same construction used in the keyed mode of BLAKE2b. + # Why limit the key to 64 bytes and pad it with 64 nulls nonetheless? The answer is that BLAKE2b + # has a 128 byte block size, but only 64 bytes of internal state (this is also referred to as a + # "local wide pipe" design, because the compression function transforms (block, state) => state, + # and len(block) >= len(state), hence wide.) + # In other words, a key longer than 64 bytes would have simply no advantage, since the function + # has no way of propagating more than 64 bytes of entropy internally. + # It's padded to a full block so that the key is never buffered internally by blake2b_update, ie. + # it remains in a single memory location that can be tracked and could be erased securely, if we + # wanted to. + return os.urandom(64) + bytes(64) + + +class ID_BLAKE2b_256: + """ + Key mix-in class for using BLAKE2b-256 for the id key. + + The id_key length must be 32 bytes. + """ + + def id_hash(self, data): + return blake2b_256(self.id_key, data) + + def init_from_random_data(self, data=None): + assert data is None # PassphraseKey is the only caller using *data* + super().init_from_random_data() + self.enc_hmac_key = random_blake2b_256_key() + self.id_key = random_blake2b_256_key() + + +class ID_HMAC_SHA_256: + """ + Key mix-in class for using HMAC-SHA-256 for the id key. + + The id_key length must be 32 bytes. + """ + + def id_hash(self, data): + return hmac_sha256(self.id_key, data) + + +class AESKeyBase(KeyBase): + """ + Common base class shared by KeyfileKey and PassphraseKey + + Chunks are encrypted using 256bit AES in Counter Mode (CTR) + + Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT + + To reduce payload size only 8 bytes of the 16 bytes nonce is saved + in the payload, the first 8 bytes are always zeros. This does not + affect security but limits the maximum repository capacity to + only 295 exabytes! + """ + + PAYLOAD_OVERHEAD = 1 + 32 + 8 # TYPE + HMAC + NONCE + + CIPHERSUITE = AES256_CTR_HMAC_SHA256 + + logically_encrypted = True + + def encrypt(self, chunk): + data = self.compressor.compress(chunk) + next_iv = self.nonce_manager.ensure_reservation(self.cipher.next_iv(), + self.cipher.block_count(len(data))) + return self.cipher.encrypt(data, header=self.TYPE_STR, iv=next_iv) + + def decrypt(self, id, data, decompress=True): + if not (data[0] == self.TYPE or + data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)): + id_str = bin_to_hex(id) if id is not None else '(unknown)' + raise IntegrityError('Chunk %s: Invalid encryption envelope' % id_str) + try: + payload = self.cipher.decrypt(data) + except IntegrityError as e: + raise IntegrityError("Chunk %s: Could not decrypt [%s]" % (bin_to_hex(id), str(e))) + if not decompress: + return payload + data = self.decompress(payload) + self.assert_id(id, data) + return data + + def init_from_random_data(self, data=None): + if data is None: + data = os.urandom(100) + self.enc_key = data[0:32] + self.enc_hmac_key = data[32:64] + self.id_key = data[64:96] + self.chunk_seed = bytes_to_int(data[96:100]) + # Convert to signed int32 + if self.chunk_seed & 0x80000000: + self.chunk_seed = self.chunk_seed - 0xffffffff - 1 + + def init_ciphers(self, manifest_data=None): + self.cipher = self.CIPHERSUITE(mac_key=self.enc_hmac_key, enc_key=self.enc_key, header_len=1, aad_offset=1) + if manifest_data is None: + nonce = 0 + else: + if not (manifest_data[0] == self.TYPE or + manifest_data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)): + raise IntegrityError('Manifest: Invalid encryption envelope') + # manifest_blocks is a safe upper bound on the amount of cipher blocks needed + # to encrypt the manifest. depending on the ciphersuite and overhead, it might + # be a bit too high, but that does not matter. + manifest_blocks = num_cipher_blocks(len(manifest_data)) + nonce = self.cipher.extract_iv(manifest_data) + manifest_blocks + self.cipher.set_iv(nonce) + self.nonce_manager = NonceManager(self.repository, nonce) + + +class Passphrase(str): + @classmethod + def _env_passphrase(cls, env_var, default=None): + passphrase = os.environ.get(env_var, default) + if passphrase is not None: + return cls(passphrase) + + @classmethod + def env_passphrase(cls, default=None): + passphrase = cls._env_passphrase('BORG_PASSPHRASE', default) + if passphrase is not None: + return passphrase + passphrase = cls.env_passcommand() + if passphrase is not None: + return passphrase + + @classmethod + def env_passcommand(cls, default=None): + passcommand = os.environ.get('BORG_PASSCOMMAND', None) + if passcommand is not None: + # passcommand is a system command (not inside pyinstaller env) + env = prepare_subprocess_env(system=True) + try: + passphrase = subprocess.check_output(shlex.split(passcommand), universal_newlines=True, env=env) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + raise PasscommandFailure(e) + return cls(passphrase.rstrip('\n')) + + @classmethod + def env_new_passphrase(cls, default=None): + return cls._env_passphrase('BORG_NEW_PASSPHRASE', default) + + @classmethod + def getpass(cls, prompt): + return cls(getpass.getpass(prompt)) + + @classmethod + def verification(cls, passphrase): + msg = 'Do you want your passphrase to be displayed for verification? [yN]: ' + if yes(msg, retry_msg=msg, invalid_msg='Invalid answer, try again.', + retry=True, env_var_override='BORG_DISPLAY_PASSPHRASE'): + print('Your passphrase (between double-quotes): "%s"' % passphrase, + file=sys.stderr) + print('Make sure the passphrase displayed above is exactly what you wanted.', + file=sys.stderr) + try: + passphrase.encode('ascii') + except UnicodeEncodeError: + print('Your passphrase (UTF-8 encoding in hex): %s' % + bin_to_hex(passphrase.encode('utf-8')), + file=sys.stderr) + print('As you have a non-ASCII passphrase, it is recommended to keep the UTF-8 encoding in hex together with the passphrase at a safe place.', + file=sys.stderr) + + @classmethod + def new(cls, allow_empty=False): + passphrase = cls.env_new_passphrase() + if passphrase is not None: + return passphrase + passphrase = cls.env_passphrase() + if passphrase is not None: + return passphrase + for retry in range(1, 11): + passphrase = cls.getpass('Enter new passphrase: ') + if allow_empty or passphrase: + passphrase2 = cls.getpass('Enter same passphrase again: ') + if passphrase == passphrase2: + cls.verification(passphrase) + logger.info('Remember your passphrase. Your data will be inaccessible without it.') + return passphrase + else: + print('Passphrases do not match', file=sys.stderr) + else: + print('Passphrase must not be blank', file=sys.stderr) + else: + raise PasswordRetriesExceeded + + def __repr__(self): + return '' + + def kdf(self, salt, iterations, length): + return pbkdf2_hmac('sha256', self.encode('utf-8'), salt, iterations, length) + + +class PassphraseKey(ID_HMAC_SHA_256, AESKeyBase): + # This mode was killed in borg 1.0, see: https://github.com/borgbackup/borg/issues/97 + # Reasons: + # - you can never ever change your passphrase for existing repos. + # - you can never ever use a different iterations count for existing repos. + # "Killed" means: + # - there is no automatic dispatch to this class via type byte + # - --encryption=passphrase is an invalid argument now + # This class is kept for a while to support migration from passphrase to repokey mode. + TYPE = 0x01 + NAME = 'passphrase' + ARG_NAME = None + STORAGE = KeyBlobStorage.NO_STORAGE + + iterations = 100000 # must not be changed ever! + + @classmethod + def create(cls, repository, args): + key = cls(repository) + logger.warning('WARNING: "passphrase" mode is unsupported since borg 1.0.') + passphrase = Passphrase.new(allow_empty=False) + key.init(repository, passphrase) + return key + + @classmethod + def detect(cls, repository, manifest_data): + prompt = 'Enter passphrase for %s: ' % repository._location.canonical_path() + key = cls(repository) + passphrase = Passphrase.env_passphrase() + if passphrase is None: + passphrase = Passphrase.getpass(prompt) + for retry in range(1, 3): + key.init(repository, passphrase) + try: + key.decrypt(None, manifest_data) + key.init_ciphers(manifest_data) + key._passphrase = passphrase + return key + except IntegrityError: + passphrase = Passphrase.getpass(prompt) + else: + raise PasswordRetriesExceeded + + def change_passphrase(self): + class ImmutablePassphraseError(Error): + """The passphrase for this encryption key type can't be changed.""" + + raise ImmutablePassphraseError + + def init(self, repository, passphrase): + self.init_from_random_data(passphrase.kdf(repository.id, self.iterations, 100)) + self.init_ciphers() + self.tam_required = False + + +class KeyfileKeyBase(AESKeyBase): + @classmethod + def detect(cls, repository, manifest_data): + key = cls(repository) + target = key.find_key() + prompt = 'Enter passphrase for key %s: ' % target + passphrase = Passphrase.env_passphrase() + if passphrase is None: + passphrase = Passphrase() + if not key.load(target, passphrase): + for retry in range(0, 3): + passphrase = Passphrase.getpass(prompt) + if key.load(target, passphrase): + break + else: + raise PasswordRetriesExceeded + else: + if not key.load(target, passphrase): + raise PassphraseWrong + key.init_ciphers(manifest_data) + key._passphrase = passphrase + return key + + def find_key(self): + raise NotImplementedError + + def load(self, target, passphrase): + raise NotImplementedError + + def _load(self, key_data, passphrase): + cdata = a2b_base64(key_data) + data = self.decrypt_key_file(cdata, passphrase) + if data: + data = msgpack.unpackb(data) + key = Key(internal_dict=data) + if key.version != 1: + raise IntegrityError('Invalid key file header') + self.repository_id = key.repository_id + self.enc_key = key.enc_key + self.enc_hmac_key = key.enc_hmac_key + self.id_key = key.id_key + self.chunk_seed = key.chunk_seed + self.tam_required = key.get('tam_required', tam_required(self.repository)) + return True + return False + + def decrypt_key_file(self, data, passphrase): + unpacker = get_limited_unpacker('key') + unpacker.feed(data) + data = unpacker.unpack() + enc_key = EncryptedKey(internal_dict=data) + assert enc_key.version == 1 + assert enc_key.algorithm == 'sha256' + key = passphrase.kdf(enc_key.salt, enc_key.iterations, 32) + data = AES(key, b'\0'*16).decrypt(enc_key.data) + if hmac_sha256(key, data) == enc_key.hash: + return data + + def encrypt_key_file(self, data, passphrase): + salt = os.urandom(32) + iterations = PBKDF2_ITERATIONS + key = passphrase.kdf(salt, iterations, 32) + hash = hmac_sha256(key, data) + cdata = AES(key, b'\0'*16).encrypt(data) + enc_key = EncryptedKey( + version=1, + salt=salt, + iterations=iterations, + algorithm='sha256', + hash=hash, + data=cdata, + ) + return msgpack.packb(enc_key.as_dict()) + + def _save(self, passphrase): + key = Key( + version=1, + repository_id=self.repository_id, + enc_key=self.enc_key, + enc_hmac_key=self.enc_hmac_key, + id_key=self.id_key, + chunk_seed=self.chunk_seed, + tam_required=self.tam_required, + ) + data = self.encrypt_key_file(msgpack.packb(key.as_dict()), passphrase) + key_data = '\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii'))) + return key_data + + def change_passphrase(self, passphrase=None): + if passphrase is None: + passphrase = Passphrase.new(allow_empty=True) + self.save(self.target, passphrase) + + @classmethod + def create(cls, repository, args): + passphrase = Passphrase.new(allow_empty=True) + key = cls(repository) + key.repository_id = repository.id + key.init_from_random_data() + key.init_ciphers() + target = key.get_new_target(args) + key.save(target, passphrase) + logger.info('Key in "%s" created.' % target) + logger.info('Keep this key safe. Your data will be inaccessible without it.') + return key + + def save(self, target, passphrase): + raise NotImplementedError + + def get_new_target(self, args): + raise NotImplementedError + + +class KeyfileKey(ID_HMAC_SHA_256, KeyfileKeyBase): + TYPE = 0x00 + NAME = 'key file' + ARG_NAME = 'keyfile' + STORAGE = KeyBlobStorage.KEYFILE + + FILE_ID = 'BORG_KEY' + + def sanity_check(self, filename, id): + file_id = self.FILE_ID.encode() + b' ' + repo_id = hexlify(id) + with open(filename, 'rb') as fd: + # we do the magic / id check in binary mode to avoid stumbling over + # decoding errors if somebody has binary files in the keys dir for some reason. + if fd.read(len(file_id)) != file_id: + raise KeyfileInvalidError(self.repository._location.canonical_path(), filename) + if fd.read(len(repo_id)) != repo_id: + raise KeyfileMismatchError(self.repository._location.canonical_path(), filename) + return filename + + def find_key(self): + id = self.repository.id + keyfile = os.environ.get('BORG_KEY_FILE') + if keyfile: + return self.sanity_check(os.path.abspath(keyfile), id) + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + try: + return self.sanity_check(filename, id) + except (KeyfileInvalidError, KeyfileMismatchError): + pass + raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir()) + + def get_new_target(self, args): + keyfile = os.environ.get('BORG_KEY_FILE') + if keyfile: + return os.path.abspath(keyfile) + filename = args.location.to_key_filename() + path = filename + i = 1 + while os.path.exists(path): + i += 1 + path = filename + '.%d' % i + return path + + def load(self, target, passphrase): + with open(target, 'r') as fd: + key_data = ''.join(fd.readlines()[1:]) + success = self._load(key_data, passphrase) + if success: + self.target = target + return success + + def save(self, target, passphrase): + key_data = self._save(passphrase) + with SaveFile(target) as fd: + fd.write('%s %s\n' % (self.FILE_ID, bin_to_hex(self.repository_id))) + fd.write(key_data) + fd.write('\n') + self.target = target + + +class RepoKey(ID_HMAC_SHA_256, KeyfileKeyBase): + TYPE = 0x03 + NAME = 'repokey' + ARG_NAME = 'repokey' + STORAGE = KeyBlobStorage.REPO + + def find_key(self): + loc = self.repository._location.canonical_path() + try: + self.repository.load_key() + return loc + except configparser.NoOptionError: + raise RepoKeyNotFoundError(loc) from None + + def get_new_target(self, args): + return self.repository + + def load(self, target, passphrase): + # While the repository is encrypted, we consider a repokey repository with a blank + # passphrase an unencrypted repository. + self.logically_encrypted = passphrase != '' + + # what we get in target is just a repo location, but we already have the repo obj: + target = self.repository + key_data = target.load_key() + key_data = key_data.decode('utf-8') # remote repo: msgpack issue #99, getting bytes + success = self._load(key_data, passphrase) + if success: + self.target = target + return success + + def save(self, target, passphrase): + self.logically_encrypted = passphrase != '' + key_data = self._save(passphrase) + key_data = key_data.encode('utf-8') # remote repo: msgpack issue #99, giving bytes + target.save_key(key_data) + self.target = target + + +class Blake2KeyfileKey(ID_BLAKE2b_256, KeyfileKey): + TYPE = 0x04 + NAME = 'key file BLAKE2b' + ARG_NAME = 'keyfile-blake2' + STORAGE = KeyBlobStorage.KEYFILE + + FILE_ID = 'BORG_KEY' + CIPHERSUITE = AES256_CTR_BLAKE2b + + +class Blake2RepoKey(ID_BLAKE2b_256, RepoKey): + TYPE = 0x05 + NAME = 'repokey BLAKE2b' + ARG_NAME = 'repokey-blake2' + STORAGE = KeyBlobStorage.REPO + + CIPHERSUITE = AES256_CTR_BLAKE2b + + +class AuthenticatedKeyBase(RepoKey): + STORAGE = KeyBlobStorage.REPO + + # It's only authenticated, not encrypted. + logically_encrypted = False + + def load(self, target, passphrase): + success = super().load(target, passphrase) + self.logically_encrypted = False + return success + + def save(self, target, passphrase): + super().save(target, passphrase) + self.logically_encrypted = False + + def init_ciphers(self, manifest_data=None): + if manifest_data is not None and manifest_data[0] != self.TYPE: + raise IntegrityError('Manifest: Invalid encryption envelope') + + def encrypt(self, chunk): + data = self.compressor.compress(chunk) + return b''.join([self.TYPE_STR, data]) + + def decrypt(self, id, data, decompress=True): + if data[0] != self.TYPE: + id_str = bin_to_hex(id) if id is not None else '(unknown)' + raise IntegrityError('Chunk %s: Invalid envelope' % id_str) + payload = memoryview(data)[1:] + if not decompress: + return payload + data = self.decompress(payload) + self.assert_id(id, data) + return data + + +class AuthenticatedKey(AuthenticatedKeyBase): + TYPE = 0x07 + NAME = 'authenticated' + ARG_NAME = 'authenticated' + + +class Blake2AuthenticatedKey(ID_BLAKE2b_256, AuthenticatedKeyBase): + TYPE = 0x06 + NAME = 'authenticated BLAKE2b' + ARG_NAME = 'authenticated-blake2' + + +AVAILABLE_KEY_TYPES = ( + PlaintextKey, + PassphraseKey, + KeyfileKey, RepoKey, AuthenticatedKey, + Blake2KeyfileKey, Blake2RepoKey, Blake2AuthenticatedKey, +) diff --git a/src/borg/crypto/keymanager.py b/src/borg/crypto/keymanager.py new file mode 100644 index 00000000..aa657af9 --- /dev/null +++ b/src/borg/crypto/keymanager.py @@ -0,0 +1,221 @@ +import binascii +import pkgutil +import textwrap +from binascii import unhexlify, a2b_base64, b2a_base64 +from hashlib import sha256 + +from ..helpers import Manifest, NoManifestError, Error, yes, bin_to_hex, dash_open +from ..repository import Repository + +from .key import KeyfileKey, KeyfileNotFoundError, KeyBlobStorage, identify_key + + +class UnencryptedRepo(Error): + """Keymanagement not available for unencrypted repositories.""" + + +class UnknownKeyType(Error): + """Keytype {0} is unknown.""" + + +class RepoIdMismatch(Error): + """This key backup seems to be for a different backup repository, aborting.""" + + +class NotABorgKeyFile(Error): + """This file is not a borg key backup, aborting.""" + + +def sha256_truncated(data, num): + h = sha256() + h.update(data) + return h.hexdigest()[:num] + + +class KeyManager: + def __init__(self, repository): + self.repository = repository + self.keyblob = None + self.keyblob_storage = None + + try: + manifest_data = self.repository.get(Manifest.MANIFEST_ID) + except Repository.ObjectNotFound: + raise NoManifestError + + key = identify_key(manifest_data) + self.keyblob_storage = key.STORAGE + if self.keyblob_storage == KeyBlobStorage.NO_STORAGE: + raise UnencryptedRepo() + + def load_keyblob(self): + if self.keyblob_storage == KeyBlobStorage.KEYFILE: + k = KeyfileKey(self.repository) + target = k.find_key() + with open(target, 'r') as fd: + self.keyblob = ''.join(fd.readlines()[1:]) + + elif self.keyblob_storage == KeyBlobStorage.REPO: + self.keyblob = self.repository.load_key().decode() + + def store_keyblob(self, args): + if self.keyblob_storage == KeyBlobStorage.KEYFILE: + k = KeyfileKey(self.repository) + try: + target = k.find_key() + except KeyfileNotFoundError: + target = k.get_new_target(args) + + self.store_keyfile(target) + elif self.keyblob_storage == KeyBlobStorage.REPO: + self.repository.save_key(self.keyblob.encode('utf-8')) + + def get_keyfile_data(self): + data = '%s %s\n' % (KeyfileKey.FILE_ID, bin_to_hex(self.repository.id)) + data += self.keyblob + if not self.keyblob.endswith('\n'): + data += '\n' + return data + + def store_keyfile(self, target): + with open(target, 'w') as fd: + fd.write(self.get_keyfile_data()) + + def export(self, path): + self.store_keyfile(path) + + def export_qr(self, path): + with open(path, 'wb') as fd: + key_data = self.get_keyfile_data() + html = pkgutil.get_data('borg', 'paperkey.html') + html = html.replace(b'', key_data.encode() + b'') + fd.write(html) + + def export_paperkey(self, path): + def grouped(s): + ret = '' + i = 0 + for ch in s: + if i and i % 6 == 0: + ret += ' ' + ret += ch + i += 1 + return ret + + export = 'To restore key use borg key import --paper /path/to/repo\n\n' + + binary = a2b_base64(self.keyblob) + export += 'BORG PAPER KEY v1\n' + lines = (len(binary) + 17) // 18 + repoid = bin_to_hex(self.repository.id)[:18] + complete_checksum = sha256_truncated(binary, 12) + export += 'id: {0:d} / {1} / {2} - {3}\n'.format(lines, + grouped(repoid), + grouped(complete_checksum), + sha256_truncated((str(lines) + '/' + repoid + '/' + complete_checksum).encode('ascii'), 2)) + idx = 0 + while len(binary): + idx += 1 + binline = binary[:18] + checksum = sha256_truncated(idx.to_bytes(2, byteorder='big') + binline, 2) + export += '{0:2d}: {1} - {2}\n'.format(idx, grouped(bin_to_hex(binline)), checksum) + binary = binary[18:] + + if path: + with open(path, 'w') as fd: + fd.write(export) + else: + print(export) + + def import_keyfile(self, args): + file_id = KeyfileKey.FILE_ID + first_line = file_id + ' ' + bin_to_hex(self.repository.id) + '\n' + with dash_open(args.path, 'r') as fd: + file_first_line = fd.read(len(first_line)) + if file_first_line != first_line: + if not file_first_line.startswith(file_id): + raise NotABorgKeyFile() + else: + raise RepoIdMismatch() + self.keyblob = fd.read() + + self.store_keyblob(args) + + def import_paperkey(self, args): + try: + # imported here because it has global side effects + import readline + except ImportError: + print('Note: No line editing available due to missing readline support') + + repoid = bin_to_hex(self.repository.id)[:18] + try: + while True: # used for repeating on overall checksum mismatch + # id line input + while True: + idline = input('id: ').replace(' ', '') + if idline == '': + if yes('Abort import? [yN]:'): + raise EOFError() + + try: + (data, checksum) = idline.split('-') + except ValueError: + print("each line must contain exactly one '-', try again") + continue + try: + (id_lines, id_repoid, id_complete_checksum) = data.split('/') + except ValueError: + print("the id line must contain exactly three '/', try again") + continue + if sha256_truncated(data.lower().encode('ascii'), 2) != checksum: + print('line checksum did not match, try same line again') + continue + try: + lines = int(id_lines) + except ValueError: + print('internal error while parsing length') + + break + + if repoid != id_repoid: + raise RepoIdMismatch() + + result = b'' + idx = 1 + # body line input + while True: + inline = input('{0:2d}: '.format(idx)) + inline = inline.replace(' ', '') + if inline == '': + if yes('Abort import? [yN]:'): + raise EOFError() + try: + (data, checksum) = inline.split('-') + except ValueError: + print("each line must contain exactly one '-', try again") + continue + try: + part = unhexlify(data) + except binascii.Error: + print("only characters 0-9 and a-f and '-' are valid, try again") + continue + if sha256_truncated(idx.to_bytes(2, byteorder='big') + part, 2) != checksum: + print('line checksum did not match, try line {0} again'.format(idx)) + continue + result += part + if idx == lines: + break + idx += 1 + + if sha256_truncated(result, 12) != id_complete_checksum: + print('The overall checksum did not match, retry or enter a blank line to abort.') + continue + + self.keyblob = '\n'.join(textwrap.wrap(b2a_base64(result).decode('ascii'))) + '\n' + self.store_keyblob(args) + break + + except EOFError: + print('\n - aborted') + return diff --git a/src/borg/crypto/low_level.pyx b/src/borg/crypto/low_level.pyx new file mode 100644 index 00000000..82d87407 --- /dev/null +++ b/src/borg/crypto/low_level.pyx @@ -0,0 +1,894 @@ +"""An AEAD style OpenSSL wrapper + +API: + + encrypt(data, header=b'', aad_offset=0) -> envelope + decrypt(envelope, header_len=0, aad_offset=0) -> data + +Envelope layout: + +|<--------------------------- envelope ------------------------------------------>| +|<------------ header ----------->|<---------- ciphersuite specific ------------->| +|<-- not auth data -->|<-- aad -->|<-- e.g.: S(aad, iv, E(data)), iv, E(data) -->| + +|--- #aad_offset ---->| +|------------- #header_len ------>| + +S means a cryptographic signature function (like HMAC or GMAC). +E means a encryption function (like AES). +iv is the initialization vector / nonce, if needed. + +The split of header into not authenticated data and aad (additional authenticated +data) is done to support the legacy envelope layout as used in attic and early borg +(where the TYPE byte was not authenticated) and avoid unneeded memcpy and string +garbage. + +Newly designed envelope layouts can just authenticate the whole header. + +IV handling: + + iv = ... # just never repeat! + cs = CS(hmac_key, enc_key, iv=iv) + envelope = cs.encrypt(data, header, aad_offset) + iv = cs.next_iv(len(data)) + (repeat) +""" + +import hashlib +import hmac +from math import ceil + +from cpython cimport PyMem_Malloc, PyMem_Free +from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release +from cpython.bytes cimport PyBytes_FromStringAndSize + +API_VERSION = '1.1_02' + +cdef extern from "openssl/crypto.h": + int CRYPTO_memcmp(const void *a, const void *b, size_t len) + +cdef extern from "../algorithms/blake2-libselect.h": + ctypedef struct blake2b_state: + pass + + int blake2b_init(blake2b_state *S, size_t outlen) nogil + int blake2b_update(blake2b_state *S, const void *input, size_t inlen) nogil + int blake2b_final(blake2b_state *S, void *out, size_t outlen) nogil + + +cdef extern from "openssl/evp.h": + ctypedef struct EVP_MD: + pass + ctypedef struct EVP_CIPHER: + pass + ctypedef struct EVP_CIPHER_CTX: + pass + ctypedef struct ENGINE: + pass + + const EVP_CIPHER *EVP_aes_256_ctr() + const EVP_CIPHER *EVP_aes_256_ocb() + const EVP_CIPHER *EVP_chacha20_poly1305() + + void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a) + void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a) + + int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl, + const unsigned char *key, const unsigned char *iv) + int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl, + const unsigned char *key, const unsigned char *iv) + int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, + const unsigned char *in_, int inl) + int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, + const unsigned char *in_, int inl) + int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) + int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) + + int EVP_CIPHER_CTX_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr) + int EVP_CTRL_GCM_GET_TAG + int EVP_CTRL_GCM_SET_TAG + int EVP_CTRL_GCM_SET_IVLEN + + const EVP_MD *EVP_sha256() nogil + + EVP_CIPHER_CTX *EVP_CIPHER_CTX_new() + void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *a) + +cdef extern from "openssl/hmac.h": + ctypedef struct HMAC_CTX: + pass + + void HMAC_CTX_init(HMAC_CTX *ctx) + void HMAC_CTX_cleanup(HMAC_CTX *ctx) + + HMAC_CTX *HMAC_CTX_new() + void HMAC_CTX_free(HMAC_CTX *a) + + int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int key_len, const EVP_MD *md, ENGINE *impl) + int HMAC_Update(HMAC_CTX *ctx, const unsigned char *data, int len) + int HMAC_Final(HMAC_CTX *ctx, unsigned char *md, unsigned int *len) + + unsigned char *HMAC(const EVP_MD *evp_md, + const void *key, int key_len, + const unsigned char *data, int data_len, + unsigned char *md, unsigned int *md_len) nogil + +cdef extern from "_crypto_helpers.h": + long OPENSSL_VERSION_NUMBER + + ctypedef struct HMAC_CTX: + pass + + HMAC_CTX *HMAC_CTX_new() + void HMAC_CTX_free(HMAC_CTX *a) + + const EVP_CIPHER *EVP_aes_256_ocb() # dummy + const EVP_CIPHER *EVP_chacha20_poly1305() # dummy + + +openssl10 = OPENSSL_VERSION_NUMBER < 0x10100000 + + +import struct + +_int = struct.Struct('>I') +_long = struct.Struct('>Q') + +bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0] +bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] +long_to_bytes = lambda x: _long.pack(x) + + +def num_cipher_blocks(length, blocksize=16): + """Return the number of cipher blocks required to encrypt/decrypt bytes of data. + + For a precise computation, must be the used cipher's block size (AES: 16, CHACHA20: 64). + + For a safe-upper-boundary computation, must be the MINIMUM of the block sizes (in + bytes) of ALL supported ciphers. This can be used to adjust a counter if the used cipher is not + known (yet). + The default value of blocksize must be adjusted so it reflects this minimum, so a call of this + function without a blocksize is "safe-upper-boundary by default". + + Padding cipher modes are not supported. + """ + return (length + blocksize - 1) // blocksize + + +class CryptoError(Exception): + """Malfunction in the crypto module.""" + + +class IntegrityError(CryptoError): + """Integrity checks failed. Corrupted or tampered data.""" + + +cdef Py_buffer ro_buffer(object data) except *: + cdef Py_buffer view + PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) + return view + + +class UNENCRYPTED: + # Layout: HEADER + PlainText + + def __init__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + assert mac_key is None + assert enc_key is None + self.header_len = header_len + self.set_iv(iv) + + def encrypt(self, data, header=b'', iv=None): + """ + IMPORTANT: it is called encrypt to satisfy the crypto api naming convention, + but this does NOT encrypt and it does NOT compute and store a MAC either. + """ + if iv is not None: + self.set_iv(iv) + assert self.iv is not None, 'iv needs to be set before encrypt is called' + return header + data + + def decrypt(self, envelope): + """ + IMPORTANT: it is called decrypt to satisfy the crypto api naming convention, + but this does NOT decrypt and it does NOT verify a MAC either, because data + is not encrypted and there is no MAC. + """ + return memoryview(envelope)[self.header_len:] + + def block_count(self, length): + return 0 + + def set_iv(self, iv): + self.iv = iv + + def next_iv(self): + return self.iv + + def extract_iv(self, envelope): + return 0 + + +cdef class AES256_CTR_BASE: + # Layout: HEADER + MAC 32 + IV 8 + CT (same as attic / borg < 1.2 IF HEADER = TYPE_BYTE, no AAD) + + cdef EVP_CIPHER_CTX *ctx + cdef unsigned char *enc_key + cdef int cipher_blk_len + cdef int iv_len, iv_len_short + cdef int aad_offset + cdef int header_len + cdef int mac_len + cdef unsigned char iv[16] + cdef long long blocks + + @classmethod + def requirements_check(cls): + if OPENSSL_VERSION_NUMBER < 0x10000000: + raise ValueError('AES CTR requires OpenSSL >= 1.0.0. Detected: OpenSSL %08x' % OPENSSL_VERSION_NUMBER) + + def __init__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + self.requirements_check() + assert isinstance(enc_key, bytes) and len(enc_key) == 32 + self.cipher_blk_len = 16 + self.iv_len = sizeof(self.iv) + self.iv_len_short = 8 + assert aad_offset <= header_len + self.aad_offset = aad_offset + self.header_len = header_len + self.mac_len = 32 + self.enc_key = enc_key + if iv is not None: + self.set_iv(iv) + else: + self.blocks = -1 # make sure set_iv is called before encrypt + + def __cinit__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + self.ctx = EVP_CIPHER_CTX_new() + + def __dealloc__(self): + EVP_CIPHER_CTX_free(self.ctx) + + cdef mac_compute(self, const unsigned char *data1, int data1_len, + const unsigned char *data2, int data2_len, + unsigned char *mac_buf): + raise NotImplementedError + + cdef mac_verify(self, const unsigned char *data1, int data1_len, + const unsigned char *data2, int data2_len, + unsigned char *mac_buf, const unsigned char *mac_wanted): + """ + Calculate MAC of *data1*, *data2*, write result to *mac_buf*, and verify against *mac_wanted.* + """ + raise NotImplementedError + + def encrypt(self, data, header=b'', iv=None): + """ + encrypt data, compute mac over aad + iv + cdata, prepend header. + aad_offset is the offset into the header where aad starts. + """ + if iv is not None: + self.set_iv(iv) + assert self.blocks == 0, 'iv needs to be set before encrypt is called' + cdef int ilen = len(data) + cdef int hlen = len(header) + assert hlen == self.header_len + cdef int aoffset = self.aad_offset + cdef int alen = hlen - aoffset + cdef unsigned char *odata = PyMem_Malloc(hlen + self.mac_len + self.iv_len_short + + ilen + self.cipher_blk_len) # play safe, 1 extra blk + if not odata: + raise MemoryError + cdef int olen + cdef int offset + cdef Py_buffer idata = ro_buffer(data) + cdef Py_buffer hdata = ro_buffer(header) + try: + offset = 0 + for i in range(hlen): + odata[offset+i] = header[i] + offset += hlen + offset += self.mac_len + self.store_iv(odata+offset, self.iv) + offset += self.iv_len_short + rc = EVP_EncryptInit_ex(self.ctx, EVP_aes_256_ctr(), NULL, self.enc_key, self.iv) + if not rc: + raise CryptoError('EVP_EncryptInit_ex failed') + rc = EVP_EncryptUpdate(self.ctx, odata+offset, &olen, idata.buf, ilen) + if not rc: + raise CryptoError('EVP_EncryptUpdate failed') + offset += olen + rc = EVP_EncryptFinal_ex(self.ctx, odata+offset, &olen) + if not rc: + raise CryptoError('EVP_EncryptFinal_ex failed') + offset += olen + self.mac_compute( hdata.buf+aoffset, alen, + odata+hlen+self.mac_len, offset-hlen-self.mac_len, + odata+hlen) + self.blocks += self.block_count(ilen) + return odata[:offset] + finally: + PyMem_Free(odata) + PyBuffer_Release(&hdata) + PyBuffer_Release(&idata) + + def decrypt(self, envelope): + """ + authenticate aad + iv + cdata, decrypt cdata, ignore header bytes up to aad_offset. + """ + cdef int ilen = len(envelope) + cdef int hlen = self.header_len + assert hlen == self.header_len + cdef int aoffset = self.aad_offset + cdef int alen = hlen - aoffset + cdef unsigned char *odata = PyMem_Malloc(ilen + self.cipher_blk_len) # play safe, 1 extra blk + if not odata: + raise MemoryError + cdef int olen + cdef int offset + cdef unsigned char mac_buf[32] + assert sizeof(mac_buf) == self.mac_len + cdef Py_buffer idata = ro_buffer(envelope) + try: + self.mac_verify( idata.buf+aoffset, alen, + idata.buf+hlen+self.mac_len, ilen-hlen-self.mac_len, + mac_buf, idata.buf+hlen) + iv = self.fetch_iv( idata.buf+hlen+self.mac_len) + self.set_iv(iv) + if not EVP_DecryptInit_ex(self.ctx, EVP_aes_256_ctr(), NULL, self.enc_key, iv): + raise CryptoError('EVP_DecryptInit_ex failed') + offset = 0 + rc = EVP_DecryptUpdate(self.ctx, odata+offset, &olen, + idata.buf+hlen+self.mac_len+self.iv_len_short, + ilen-hlen-self.mac_len-self.iv_len_short) + if not rc: + raise CryptoError('EVP_DecryptUpdate failed') + offset += olen + rc = EVP_DecryptFinal_ex(self.ctx, odata+offset, &olen) + if rc <= 0: + raise CryptoError('EVP_DecryptFinal_ex failed') + offset += olen + self.blocks += self.block_count(offset) + return odata[:offset] + finally: + PyMem_Free(odata) + PyBuffer_Release(&idata) + + def block_count(self, length): + return num_cipher_blocks(length, self.cipher_blk_len) + + def set_iv(self, iv): + # set_iv needs to be called before each encrypt() call + if isinstance(iv, int): + iv = iv.to_bytes(self.iv_len, byteorder='big') + assert isinstance(iv, bytes) and len(iv) == self.iv_len + for i in range(self.iv_len): + self.iv[i] = iv[i] + self.blocks = 0 # how many AES blocks got encrypted with this IV? + + def next_iv(self): + # call this after encrypt() to get the next iv (int) for the next encrypt() call + iv = int.from_bytes(self.iv[:self.iv_len], byteorder='big') + return iv + self.blocks + + cdef fetch_iv(self, unsigned char * iv_in): + # fetch lower self.iv_len_short bytes of iv and add upper zero bytes + return b'\0' * (self.iv_len - self.iv_len_short) + iv_in[0:self.iv_len_short] + + cdef store_iv(self, unsigned char * iv_out, unsigned char * iv): + # store only lower self.iv_len_short bytes, upper bytes are assumed to be 0 + cdef int i + for i in range(self.iv_len_short): + iv_out[i] = iv[(self.iv_len-self.iv_len_short)+i] + + def extract_iv(self, envelope): + offset = self.header_len + self.mac_len + return bytes_to_long(envelope[offset:offset+self.iv_len_short]) + + +cdef class AES256_CTR_HMAC_SHA256(AES256_CTR_BASE): + cdef HMAC_CTX *hmac_ctx + cdef unsigned char *mac_key + + def __init__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + assert isinstance(mac_key, bytes) and len(mac_key) == 32 + self.mac_key = mac_key + super().__init__(mac_key, enc_key, iv=iv, header_len=header_len, aad_offset=aad_offset) + + def __cinit__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + self.hmac_ctx = HMAC_CTX_new() + + def __dealloc__(self): + HMAC_CTX_free(self.hmac_ctx) + + cdef mac_compute(self, const unsigned char *data1, int data1_len, + const unsigned char *data2, int data2_len, + unsigned char *mac_buf): + if not HMAC_Init_ex(self.hmac_ctx, self.mac_key, self.mac_len, EVP_sha256(), NULL): + raise CryptoError('HMAC_Init_ex failed') + if not HMAC_Update(self.hmac_ctx, data1, data1_len): + raise CryptoError('HMAC_Update failed') + if not HMAC_Update(self.hmac_ctx, data2, data2_len): + raise CryptoError('HMAC_Update failed') + if not HMAC_Final(self.hmac_ctx, mac_buf, NULL): + raise CryptoError('HMAC_Final failed') + + cdef mac_verify(self, const unsigned char *data1, int data1_len, + const unsigned char *data2, int data2_len, + unsigned char *mac_buf, const unsigned char *mac_wanted): + self.mac_compute(data1, data1_len, data2, data2_len, mac_buf) + if CRYPTO_memcmp(mac_buf, mac_wanted, self.mac_len): + raise IntegrityError('MAC Authentication failed') + + +cdef class AES256_CTR_BLAKE2b(AES256_CTR_BASE): + cdef unsigned char *mac_key + + def __init__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + assert isinstance(mac_key, bytes) and len(mac_key) == 128 + self.mac_key = mac_key + super().__init__(mac_key, enc_key, iv=iv, header_len=header_len, aad_offset=aad_offset) + + def __cinit__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + pass + + def __dealloc__(self): + pass + + cdef mac_compute(self, const unsigned char *data1, int data1_len, + const unsigned char *data2, int data2_len, + unsigned char *mac_buf): + cdef blake2b_state state + cdef int rc + rc = blake2b_init(&state, self.mac_len) + if rc == -1: + raise Exception('blake2b_init() failed') + with nogil: + rc = blake2b_update(&state, self.mac_key, 128) + if rc != -1: + rc = blake2b_update(&state, data1, data1_len) + if rc != -1: + rc = blake2b_update(&state, data2, data2_len) + if rc == -1: + raise Exception('blake2b_update() failed') + rc = blake2b_final(&state, mac_buf, self.mac_len) + if rc == -1: + raise Exception('blake2b_final() failed') + + cdef mac_verify(self, const unsigned char *data1, int data1_len, + const unsigned char *data2, int data2_len, + unsigned char *mac_buf, const unsigned char *mac_wanted): + self.mac_compute(data1, data1_len, data2, data2_len, mac_buf) + if CRYPTO_memcmp(mac_buf, mac_wanted, self.mac_len): + raise IntegrityError('MAC Authentication failed') + + +ctypedef const EVP_CIPHER * (* CIPHER)() + + +cdef class _AEAD_BASE: + # Layout: HEADER + MAC 16 + IV 12 + CT + + cdef CIPHER cipher + cdef EVP_CIPHER_CTX *ctx + cdef unsigned char *enc_key + cdef int cipher_blk_len + cdef int iv_len + cdef int aad_offset + cdef int header_len + cdef int mac_len + cdef unsigned char iv[12] + cdef long long blocks + + @classmethod + def requirements_check(cls): + """check whether library requirements for this ciphersuite are satisfied""" + raise NotImplemented # override / implement in child class + + def __init__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + assert mac_key is None + assert isinstance(enc_key, bytes) and len(enc_key) == 32 + self.iv_len = sizeof(self.iv) + self.header_len = 1 + assert aad_offset <= header_len + self.aad_offset = aad_offset + self.header_len = header_len + self.mac_len = 16 + self.enc_key = enc_key + if iv is not None: + self.set_iv(iv) + else: + self.blocks = -1 # make sure set_iv is called before encrypt + + def __cinit__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + self.ctx = EVP_CIPHER_CTX_new() + + def __dealloc__(self): + EVP_CIPHER_CTX_free(self.ctx) + + def encrypt(self, data, header=b'', iv=None): + """ + encrypt data, compute mac over aad + iv + cdata, prepend header. + aad_offset is the offset into the header where aad starts. + """ + if iv is not None: + self.set_iv(iv) + assert self.blocks == 0, 'iv needs to be set before encrypt is called' + # AES-GCM, AES-OCB, CHACHA20 ciphers all add a internal 32bit counter to the 96bit (12Byte) + # IV we provide, thus we must not encrypt more than 2^32 cipher blocks with same IV). + block_count = self.block_count(len(data)) + if block_count > 2**32: + raise ValueError('too much data, would overflow internal 32bit counter') + cdef int ilen = len(data) + cdef int hlen = len(header) + assert hlen == self.header_len + cdef int aoffset = self.aad_offset + cdef int alen = hlen - aoffset + cdef unsigned char *odata = PyMem_Malloc(hlen + self.mac_len + self.iv_len + + ilen + self.cipher_blk_len) + if not odata: + raise MemoryError + cdef int olen + cdef int offset + cdef Py_buffer idata = ro_buffer(data) + cdef Py_buffer hdata = ro_buffer(header) + try: + offset = 0 + for i in range(hlen): + odata[offset+i] = header[i] + offset += hlen + offset += self.mac_len + self.store_iv(odata+offset, self.iv) + rc = EVP_EncryptInit_ex(self.ctx, self.cipher(), NULL, NULL, NULL) + if not rc: + raise CryptoError('EVP_EncryptInit_ex failed') + if not EVP_CIPHER_CTX_ctrl(self.ctx, EVP_CTRL_GCM_SET_IVLEN, self.iv_len, NULL): + raise CryptoError('EVP_CIPHER_CTX_ctrl SET IVLEN failed') + rc = EVP_EncryptInit_ex(self.ctx, NULL, NULL, self.enc_key, self.iv) + if not rc: + raise CryptoError('EVP_EncryptInit_ex failed') + rc = EVP_EncryptUpdate(self.ctx, NULL, &olen, hdata.buf+aoffset, alen) + if not rc: + raise CryptoError('EVP_EncryptUpdate failed') + if not EVP_EncryptUpdate(self.ctx, NULL, &olen, odata+offset, self.iv_len): + raise CryptoError('EVP_EncryptUpdate failed') + offset += self.iv_len + rc = EVP_EncryptUpdate(self.ctx, odata+offset, &olen, idata.buf, ilen) + if not rc: + raise CryptoError('EVP_EncryptUpdate failed') + offset += olen + rc = EVP_EncryptFinal_ex(self.ctx, odata+offset, &olen) + if not rc: + raise CryptoError('EVP_EncryptFinal_ex failed') + offset += olen + if not EVP_CIPHER_CTX_ctrl(self.ctx, EVP_CTRL_GCM_GET_TAG, self.mac_len, odata+hlen): + raise CryptoError('EVP_CIPHER_CTX_ctrl GET TAG failed') + self.blocks = block_count + return odata[:offset] + finally: + PyMem_Free(odata) + PyBuffer_Release(&hdata) + PyBuffer_Release(&idata) + + def decrypt(self, envelope): + """ + authenticate aad + iv + cdata, decrypt cdata, ignore header bytes up to aad_offset. + """ + # AES-GCM, AES-OCB, CHACHA20 ciphers all add a internal 32bit counter to the 96bit (12Byte) + # IV we provide, thus we must not decrypt more than 2^32 cipher blocks with same IV): + approx_block_count = self.block_count(len(envelope)) # sloppy, but good enough for borg + if approx_block_count > 2**32: + raise ValueError('too much data, would overflow internal 32bit counter') + cdef int ilen = len(envelope) + cdef int hlen = self.header_len + assert hlen == self.header_len + cdef int aoffset = self.aad_offset + cdef int alen = hlen - aoffset + cdef unsigned char *odata = PyMem_Malloc(ilen + self.cipher_blk_len) + if not odata: + raise MemoryError + cdef int olen + cdef int offset + cdef Py_buffer idata = ro_buffer(envelope) + try: + if not EVP_DecryptInit_ex(self.ctx, self.cipher(), NULL, NULL, NULL): + raise CryptoError('EVP_DecryptInit_ex failed') + iv = self.fetch_iv( idata.buf+hlen+self.mac_len) + self.set_iv(iv) + if not EVP_CIPHER_CTX_ctrl(self.ctx, EVP_CTRL_GCM_SET_IVLEN, self.iv_len, NULL): + raise CryptoError('EVP_CIPHER_CTX_ctrl SET IVLEN failed') + if not EVP_DecryptInit_ex(self.ctx, NULL, NULL, self.enc_key, iv): + raise CryptoError('EVP_DecryptInit_ex failed') + if not EVP_CIPHER_CTX_ctrl(self.ctx, EVP_CTRL_GCM_SET_TAG, self.mac_len, idata.buf+hlen): + raise CryptoError('EVP_CIPHER_CTX_ctrl SET TAG failed') + rc = EVP_DecryptUpdate(self.ctx, NULL, &olen, idata.buf+aoffset, alen) + if not rc: + raise CryptoError('EVP_DecryptUpdate failed') + if not EVP_DecryptUpdate(self.ctx, NULL, &olen, + idata.buf+hlen+self.mac_len, self.iv_len): + raise CryptoError('EVP_DecryptUpdate failed') + offset = 0 + rc = EVP_DecryptUpdate(self.ctx, odata+offset, &olen, + idata.buf+hlen+self.mac_len+self.iv_len, + ilen-hlen-self.mac_len-self.iv_len) + if not rc: + raise CryptoError('EVP_DecryptUpdate failed') + offset += olen + rc = EVP_DecryptFinal_ex(self.ctx, odata+offset, &olen) + if rc <= 0: + # a failure here means corrupted or tampered tag (mac) or data. + raise IntegrityError('Authentication / EVP_DecryptFinal_ex failed') + offset += olen + self.blocks = self.block_count(offset) + return odata[:offset] + finally: + PyMem_Free(odata) + PyBuffer_Release(&idata) + + def block_count(self, length): + return num_cipher_blocks(length, self.cipher_blk_len) + + def set_iv(self, iv): + # set_iv needs to be called before each encrypt() call, + # because encrypt does a full initialisation of the cipher context. + if isinstance(iv, int): + iv = iv.to_bytes(self.iv_len, byteorder='big') + assert isinstance(iv, bytes) and len(iv) == self.iv_len + for i in range(self.iv_len): + self.iv[i] = iv[i] + self.blocks = 0 # number of cipher blocks encrypted with this IV + + def next_iv(self): + # call this after encrypt() to get the next iv (int) for the next encrypt() call + # AES-GCM, AES-OCB, CHACHA20 ciphers all add a internal 32bit counter to the 96bit + # (12 byte) IV we provide, thus we only need to increment the IV by 1. + iv = int.from_bytes(self.iv[:self.iv_len], byteorder='big') + return iv + 1 + + cdef fetch_iv(self, unsigned char * iv_in): + return iv_in[0:self.iv_len] + + cdef store_iv(self, unsigned char * iv_out, unsigned char * iv): + cdef int i + for i in range(self.iv_len): + iv_out[i] = iv[i] + + def extract_iv(self, envelope): + offset = self.header_len + self.mac_len + return bytes_to_long(envelope[offset:offset+self.iv_len]) + + +cdef class _AES_BASE(_AEAD_BASE): + def __init__(self, *args, **kwargs): + self.cipher_blk_len = 16 + super().__init__(*args, **kwargs) + + +cdef class _CHACHA_BASE(_AEAD_BASE): + def __init__(self, *args, **kwargs): + self.cipher_blk_len = 64 + super().__init__(*args, **kwargs) + + +cdef class AES256_OCB(_AES_BASE): + @classmethod + def requirements_check(cls): + if OPENSSL_VERSION_NUMBER < 0x10100000: + raise ValueError('AES OCB requires OpenSSL >= 1.1.0. Detected: OpenSSL %08x' % OPENSSL_VERSION_NUMBER) + + def __init__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + self.requirements_check() + self.cipher = EVP_aes_256_ocb + super().__init__(mac_key, enc_key, iv=iv, header_len=header_len, aad_offset=aad_offset) + + +cdef class CHACHA20_POLY1305(_CHACHA_BASE): + @classmethod + def requirements_check(cls): + if OPENSSL_VERSION_NUMBER < 0x10100000: + raise ValueError('CHACHA20-POLY1305 requires OpenSSL >= 1.1.0. Detected: OpenSSL %08x' % OPENSSL_VERSION_NUMBER) + + def __init__(self, mac_key, enc_key, iv=None, header_len=1, aad_offset=1): + self.requirements_check() + self.cipher = EVP_chacha20_poly1305 + super().__init__(mac_key, enc_key, iv=iv, header_len=header_len, aad_offset=aad_offset) + + +cdef class AES: + """A thin wrapper around the OpenSSL EVP cipher API - for legacy code, like key file encryption""" + cdef CIPHER cipher + cdef EVP_CIPHER_CTX *ctx + cdef unsigned char *enc_key + cdef int cipher_blk_len + cdef int iv_len + cdef unsigned char iv[16] + cdef long long blocks + + def __init__(self, enc_key, iv=None): + assert isinstance(enc_key, bytes) and len(enc_key) == 32 + self.enc_key = enc_key + self.iv_len = 16 + assert sizeof(self.iv) == self.iv_len + self.cipher = EVP_aes_256_ctr + self.cipher_blk_len = 16 + if iv is not None: + self.set_iv(iv) + else: + self.blocks = -1 # make sure set_iv is called before encrypt + + def __cinit__(self, enc_key, iv=None): + self.ctx = EVP_CIPHER_CTX_new() + + def __dealloc__(self): + EVP_CIPHER_CTX_free(self.ctx) + + def encrypt(self, data, iv=None): + if iv is not None: + self.set_iv(iv) + assert self.blocks == 0, 'iv needs to be set before encrypt is called' + cdef Py_buffer idata = ro_buffer(data) + cdef int ilen = len(data) + cdef int offset + cdef int olen + cdef unsigned char *odata = PyMem_Malloc(ilen + self.cipher_blk_len) + if not odata: + raise MemoryError + try: + if not EVP_EncryptInit_ex(self.ctx, self.cipher(), NULL, self.enc_key, self.iv): + raise Exception('EVP_EncryptInit_ex failed') + offset = 0 + if not EVP_EncryptUpdate(self.ctx, odata, &olen, idata.buf, ilen): + raise Exception('EVP_EncryptUpdate failed') + offset += olen + if not EVP_EncryptFinal_ex(self.ctx, odata+offset, &olen): + raise Exception('EVP_EncryptFinal failed') + offset += olen + self.blocks = self.block_count(offset) + return odata[:offset] + finally: + PyMem_Free(odata) + PyBuffer_Release(&idata) + + def decrypt(self, data): + cdef Py_buffer idata = ro_buffer(data) + cdef int ilen = len(data) + cdef int offset + cdef int olen + cdef unsigned char *odata = PyMem_Malloc(ilen + self.cipher_blk_len) + if not odata: + raise MemoryError + try: + # Set cipher type and mode + if not EVP_DecryptInit_ex(self.ctx, self.cipher(), NULL, self.enc_key, self.iv): + raise Exception('EVP_DecryptInit_ex failed') + offset = 0 + if not EVP_DecryptUpdate(self.ctx, odata, &olen, idata.buf, ilen): + raise Exception('EVP_DecryptUpdate failed') + offset += olen + if EVP_DecryptFinal_ex(self.ctx, odata+offset, &olen) <= 0: + # this error check is very important for modes with padding or + # authentication. for them, a failure here means corrupted data. + # CTR mode does not use padding nor authentication. + raise Exception('EVP_DecryptFinal failed') + offset += olen + self.blocks = self.block_count(ilen) + return odata[:offset] + finally: + PyMem_Free(odata) + PyBuffer_Release(&idata) + + def block_count(self, length): + return num_cipher_blocks(length, self.cipher_blk_len) + + def set_iv(self, iv): + # set_iv needs to be called before each encrypt() call, + # because encrypt does a full initialisation of the cipher context. + if isinstance(iv, int): + iv = iv.to_bytes(self.iv_len, byteorder='big') + assert isinstance(iv, bytes) and len(iv) == self.iv_len + for i in range(self.iv_len): + self.iv[i] = iv[i] + self.blocks = 0 # number of cipher blocks encrypted with this IV + + def next_iv(self): + # call this after encrypt() to get the next iv (int) for the next encrypt() call + iv = int.from_bytes(self.iv[:self.iv_len], byteorder='big') + return iv + self.blocks + + + +def hmac_sha256(key, data): + cdef Py_buffer data_buf = ro_buffer(data) + cdef const unsigned char *key_ptr = key + cdef int key_len = len(key) + cdef unsigned char md[32] + try: + with nogil: + rc = HMAC(EVP_sha256(), key_ptr, key_len, data_buf.buf, data_buf.len, md, NULL) + if rc != md: + raise CryptoError('HMAC(EVP_sha256) failed') + finally: + PyBuffer_Release(&data_buf) + return PyBytes_FromStringAndSize( &md[0], 32) + + +cdef blake2b_update_from_buffer(blake2b_state *state, obj): + cdef Py_buffer buf = ro_buffer(obj) + try: + with nogil: + rc = blake2b_update(state, buf.buf, buf.len) + if rc == -1: + raise Exception('blake2b_update() failed') + finally: + PyBuffer_Release(&buf) + + +def blake2b_256(key, data): + cdef blake2b_state state + if blake2b_init(&state, 32) == -1: + raise Exception('blake2b_init() failed') + + cdef unsigned char md[32] + cdef unsigned char *key_ptr = key + + # This is secure, because BLAKE2 is not vulnerable to length-extension attacks (unlike SHA-1/2, MD-5 and others). + # See the BLAKE2 paper section 2.9 "Keyed hashing (MAC and PRF)" for details. + # A nice benefit is that this simpler prefix-MAC mode has less overhead than the more complex HMAC mode. + # We don't use the BLAKE2 parameter block (via blake2s_init_key) for this to + # avoid incompatibility with the limited API of OpenSSL. + rc = blake2b_update(&state, key_ptr, len(key)) + if rc == -1: + raise Exception('blake2b_update() failed') + blake2b_update_from_buffer(&state, data) + + rc = blake2b_final(&state, &md[0], 32) + if rc == -1: + raise Exception('blake2b_final() failed') + + return PyBytes_FromStringAndSize( &md[0], 32) + + +def blake2b_128(data): + cdef blake2b_state state + cdef unsigned char md[16] + cdef unsigned char *data_ptr = data + + if blake2b_init(&state, 16) == -1: + raise Exception('blake2b_init() failed') + + rc = blake2b_update(&state, data_ptr, len(data)) + if rc == -1: + raise Exception('blake2b_update() failed') + + rc = blake2b_final(&state, &md[0], 16) + if rc == -1: + raise Exception('blake2b_final() failed') + + return PyBytes_FromStringAndSize( &md[0], 16) + + +def hkdf_hmac_sha512(ikm, salt, info, output_length): + """ + Compute HKDF-HMAC-SHA512 with input key material *ikm*, *salt* and *info* to produce *output_length* bytes. + + This is the "HMAC-based Extract-and-Expand Key Derivation Function (HKDF)" (RFC 5869) + instantiated with HMAC-SHA512. + + *output_length* must not be greater than 64 * 255 bytes. + """ + digest_length = 64 + assert output_length <= (255 * digest_length), 'output_length must be <= 255 * 64 bytes' + # Step 1. HKDF-Extract (ikm, salt) -> prk + if salt is None: + salt = bytes(64) + prk = hmac.HMAC(salt, ikm, hashlib.sha512).digest() + + # Step 2. HKDF-Expand (prk, info, output_length) -> output key + n = ceil(output_length / digest_length) + t_n = b'' + output = b'' + for i in range(n): + msg = t_n + info + (i + 1).to_bytes(1, 'little') + t_n = hmac.HMAC(prk, msg, hashlib.sha512).digest() + output += t_n + return output[:output_length] diff --git a/src/borg/crypto/nonces.py b/src/borg/crypto/nonces.py new file mode 100644 index 00000000..39ec3d72 --- /dev/null +++ b/src/borg/crypto/nonces.py @@ -0,0 +1,87 @@ +import os +import sys +from binascii import unhexlify + +from ..helpers import get_security_dir +from ..helpers import bin_to_hex +from ..platform import SaveFile +from ..remote import InvalidRPCMethod + +from .low_level import bytes_to_long, long_to_bytes + +MAX_REPRESENTABLE_NONCE = 2**64 - 1 +NONCE_SPACE_RESERVATION = 2**28 # This in units of AES blocksize (16 bytes) + + +class NonceManager: + def __init__(self, repository, manifest_nonce): + self.repository = repository + self.end_of_nonce_reservation = None + self.manifest_nonce = manifest_nonce + self.nonce_file = os.path.join(get_security_dir(self.repository.id_str), 'nonce') + + def get_local_free_nonce(self): + try: + with open(self.nonce_file, 'r') as fd: + return bytes_to_long(unhexlify(fd.read())) + except FileNotFoundError: + return None + + def commit_local_nonce_reservation(self, next_unreserved, start_nonce): + if self.get_local_free_nonce() != start_nonce: + raise Exception("nonce space reservation with mismatched previous state") + with SaveFile(self.nonce_file, binary=False) as fd: + fd.write(bin_to_hex(long_to_bytes(next_unreserved))) + + def get_repo_free_nonce(self): + try: + return self.repository.get_free_nonce() + except InvalidRPCMethod as error: + # old server version, suppress further calls + sys.stderr.write("Please upgrade to borg version 1.1+ on the server for safer AES-CTR nonce handling.\n") + self.get_repo_free_nonce = lambda: None + self.commit_repo_nonce_reservation = lambda next_unreserved, start_nonce: None + return None + + def commit_repo_nonce_reservation(self, next_unreserved, start_nonce): + self.repository.commit_nonce_reservation(next_unreserved, start_nonce) + + def ensure_reservation(self, nonce, nonce_space_needed): + """ + Call this before doing encryption, give current, yet unused, integer IV as + and the amount of subsequent (counter-like) IVs needed as . + Return value is the IV (counter) integer you shall use for encryption. + + Note: this method may return the you gave, if a reservation for it exists or + can be established, so make sure you give a unused nonce. + """ + # Nonces may never repeat, even if a transaction aborts or the system crashes. + # Therefore a part of the nonce space is reserved before any nonce is used for encryption. + # As these reservations are committed to permanent storage before any nonce is used, this protects + # against nonce reuse in crashes and transaction aborts. In that case the reservation still + # persists and the whole reserved space is never reused. + # + # Local storage on the client is used to protect against an attacker that is able to rollback the + # state of the server or can do arbitrary modifications to the repository. + # Storage on the server is used for the multi client use case where a transaction on client A is + # aborted and later client B writes to the repository. + # + # This scheme does not protect against attacker who is able to rollback the state of the server + # or can do arbitrary modifications to the repository in the multi client usecase. + + if self.end_of_nonce_reservation: + # we already got a reservation, if nonce_space_needed still fits everything is ok + next_nonce = nonce + assert next_nonce <= self.end_of_nonce_reservation + if next_nonce + nonce_space_needed <= self.end_of_nonce_reservation: + return next_nonce + + repo_free_nonce = self.get_repo_free_nonce() + local_free_nonce = self.get_local_free_nonce() + free_nonce_space = max(x for x in (repo_free_nonce, local_free_nonce, self.manifest_nonce, self.end_of_nonce_reservation) if x is not None) + reservation_end = free_nonce_space + nonce_space_needed + NONCE_SPACE_RESERVATION + assert reservation_end < MAX_REPRESENTABLE_NONCE + self.commit_repo_nonce_reservation(reservation_end, repo_free_nonce) + self.commit_local_nonce_reservation(reservation_end, local_free_nonce) + self.end_of_nonce_reservation = reservation_end + return free_nonce_space diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 93530f89..129e3b20 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -2,9 +2,12 @@ import errno import io import os import stat +import struct +import sys import tempfile import time from collections import defaultdict +from signal import SIGINT from distutils.version import LooseVersion import llfuse @@ -13,10 +16,13 @@ import msgpack from .logger import create_logger logger = create_logger() +from .crypto.low_level import blake2b_128 from .archive import Archive -from .helpers import daemonize, safe_encode +from .hashindex import FuseVersionsIndex +from .helpers import daemonize, hardlinkable, signal_handler, format_file_size from .item import Item from .lrucache import LRUCache +from .remote import RemoteRepository # Does this version of llfuse support ns precision? have_fuse_xtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns') @@ -32,55 +38,235 @@ else: class ItemCache: - def __init__(self): - self.fd = tempfile.TemporaryFile(prefix='borg-tmp') + """ + This is the "meat" of the file system's metadata storage. + + This class generates inode numbers that efficiently index items in archives, + and retrieves items from these inode numbers. + """ + + # 2 MiB are approximately ~230000 items (depends on the average number of items per metadata chunk). + # + # Since growing a bytearray has to copy it, growing it will converge to O(n^2), however, + # this is not yet relevant due to the swiftness of copying memory. If it becomes an issue, + # use an anonymous mmap and just resize that (or, if on 64 bit, make it so big you never need + # to resize it in the first place; that's free). + GROW_META_BY = 2 * 1024 * 1024 + + indirect_entry_struct = struct.Struct('=cII') + assert indirect_entry_struct.size == 9 + + def __init__(self, decrypted_repository): + self.decrypted_repository = decrypted_repository + # self.meta, the "meta-array" is a densely packed array of metadata about where items can be found. + # It is indexed by the inode number minus self.offset. (This is in a way eerily similar to how the first + # unices did this). + # The meta-array contains chunk IDs and item entries (described in iter_archive_items). + # The chunk IDs are referenced by item entries through relative offsets, + # which are bounded by the metadata chunk size. + self.meta = bytearray() + # The current write offset in self.meta + self.write_offset = 0 + + # Offset added to meta-indices, resulting in inodes, + # or subtracted from inodes, resulting in meta-indices. + # XXX: Merge FuseOperations.items and ItemCache to avoid + # this implicit limitation / hack (on the number of synthetic inodes, degenerate + # cases can inflate their number far beyond the number of archives). self.offset = 1000000 - def add(self, item): - pos = self.fd.seek(0, io.SEEK_END) - self.fd.write(msgpack.packb(item.as_dict())) - return pos + self.offset + # A temporary file that contains direct items, i.e. items directly cached in this layer. + # These are items that span more than one chunk and thus cannot be efficiently cached + # by the object cache (self.decrypted_repository), which would require variable-length structures; + # possible but not worth the effort, see iter_archive_items. + self.fd = tempfile.TemporaryFile(prefix='borg-tmp') + + # A small LRU cache for chunks requested by ItemCache.get() from the object cache, + # this significantly speeds up directory traversal and similar operations which + # tend to re-read the same chunks over and over. + # The capacity is kept low because increasing it does not provide any significant advantage, + # but makes LRUCache's square behaviour noticeable and consumes more memory. + self.chunks = LRUCache(capacity=10, dispose=lambda _: None) + + # Instrumentation + # Count of indirect items, i.e. data is cached in the object cache, not directly in this cache + self.indirect_items = 0 + # Count of direct items, i.e. data is in self.fd + self.direct_items = 0 def get(self, inode): - self.fd.seek(inode - self.offset, io.SEEK_SET) - item = next(msgpack.Unpacker(self.fd, read_size=1024)) - return Item(internal_dict=item) + offset = inode - self.offset + if offset < 0: + raise ValueError('ItemCache.get() called with an invalid inode number') + if self.meta[offset] == ord(b'I'): + _, chunk_id_relative_offset, chunk_offset = self.indirect_entry_struct.unpack_from(self.meta, offset) + chunk_id_offset = offset - chunk_id_relative_offset + # bytearray slices are bytearrays as well, explicitly convert to bytes() + chunk_id = bytes(self.meta[chunk_id_offset:chunk_id_offset + 32]) + chunk = self.chunks.get(chunk_id) + if not chunk: + csize, chunk = next(self.decrypted_repository.get_many([chunk_id])) + self.chunks[chunk_id] = chunk + data = memoryview(chunk)[chunk_offset:] + unpacker = msgpack.Unpacker() + unpacker.feed(data) + return Item(internal_dict=next(unpacker)) + elif self.meta[offset] == ord(b'S'): + fd_offset = int.from_bytes(self.meta[offset + 1:offset + 9], 'little') + self.fd.seek(fd_offset, io.SEEK_SET) + return Item(internal_dict=next(msgpack.Unpacker(self.fd, read_size=1024))) + else: + raise ValueError('Invalid entry type in self.meta') + + def iter_archive_items(self, archive_item_ids): + unpacker = msgpack.Unpacker() + + # Current offset in the metadata stream, which consists of all metadata chunks glued together + stream_offset = 0 + # Offset of the current chunk in the metadata stream + chunk_begin = 0 + # Length of the chunk preciding the current chunk + last_chunk_length = 0 + msgpacked_bytes = b'' + + write_offset = self.write_offset + meta = self.meta + pack_indirect_into = self.indirect_entry_struct.pack_into + + def write_bytes(append_msgpacked_bytes): + # XXX: Future versions of msgpack include an Unpacker.tell() method that provides this for free. + nonlocal msgpacked_bytes + nonlocal stream_offset + msgpacked_bytes += append_msgpacked_bytes + stream_offset += len(append_msgpacked_bytes) + + for key, (csize, data) in zip(archive_item_ids, self.decrypted_repository.get_many(archive_item_ids)): + # Store the chunk ID in the meta-array + if write_offset + 32 >= len(meta): + self.meta = meta = meta + bytes(self.GROW_META_BY) + meta[write_offset:write_offset + 32] = key + current_id_offset = write_offset + write_offset += 32 + + # The chunk boundaries cannot be tracked through write_bytes, because the unpack state machine + # *can* and *will* consume partial items, so calls to write_bytes are unrelated to chunk boundaries. + chunk_begin += last_chunk_length + last_chunk_length = len(data) + + unpacker.feed(data) + while True: + try: + item = unpacker.unpack(write_bytes) + except msgpack.OutOfData: + # Need more data, feed the next chunk + break + + current_item = msgpacked_bytes + current_item_length = len(current_item) + current_spans_chunks = stream_offset - current_item_length < chunk_begin + msgpacked_bytes = b'' + + if write_offset + 9 >= len(meta): + self.meta = meta = meta + bytes(self.GROW_META_BY) + + # item entries in the meta-array come in two different flavours, both nine bytes long. + # (1) for items that span chunks: + # + # 'S' + 8 byte offset into the self.fd file, where the msgpacked item starts. + # + # (2) for items that are completely contained in one chunk, which usually is the great majority + # (about 700:1 for system backups) + # + # 'I' + 4 byte offset where the chunk ID is + 4 byte offset in the chunk + # where the msgpacked items starts + # + # The chunk ID offset is the number of bytes _back_ from the start of the entry, i.e.: + # + # |Chunk ID| .... |S1234abcd| + # ^------ offset ----------^ + + if current_spans_chunks: + pos = self.fd.seek(0, io.SEEK_END) + self.fd.write(current_item) + meta[write_offset:write_offset + 9] = b'S' + pos.to_bytes(8, 'little') + self.direct_items += 1 + else: + item_offset = stream_offset - current_item_length - chunk_begin + pack_indirect_into(meta, write_offset, b'I', write_offset - current_id_offset, item_offset) + self.indirect_items += 1 + inode = write_offset + self.offset + write_offset += 9 + + yield inode, Item(internal_dict=item) + + self.write_offset = write_offset class FuseOperations(llfuse.Operations): - """Export archive as a fuse filesystem + """Export archive as a FUSE filesystem """ - + # mount options allow_damaged_files = False + versions = False - def __init__(self, key, repository, manifest, archive, cached_repo): + def __init__(self, key, repository, manifest, args, decrypted_repository): super().__init__() - self._inode_count = 0 + self.repository_uncached = repository + self.decrypted_repository = decrypted_repository + self.args = args + self.manifest = manifest self.key = key - self.repository = cached_repo + # Maps inode numbers to Item instances. This is used for synthetic inodes, + # i.e. file-system objects that are made up by FuseOperations and are not contained + # in the archives. For example archive directories or intermediate directories + # not contained in archives. self.items = {} + # _inode_count is the current count of synthetic inodes, i.e. those in self.items + self._inode_count = 0 + # Maps inode numbers to the inode number of the parent self.parent = {} + # Maps inode numbers to a dictionary mapping byte directory entry names to their inode numbers, + # i.e. this contains all dirents of everything that is mounted. (It becomes really big). self.contents = defaultdict(dict) - self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid()) + self.default_uid = os.getuid() + self.default_gid = os.getgid() + self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=self.default_uid, gid=self.default_gid) self.pending_archives = {} - self.accounted_chunks = {} - self.cache = ItemCache() + self.cache = ItemCache(decrypted_repository) data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1)) logger.debug('mount data cache capacity: %d chunks', data_cache_capacity) self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None) - if archive: - self.process_archive(archive) + + def _create_filesystem(self): + self._create_dir(parent=1) # first call, create root dir (inode == 1) + if self.args.location.archive: + self.process_archive(self.args.location.archive) else: - # Create root inode - self.parent[1] = self.allocate_inode() - self.items[1] = self.default_dir - for archive_name in manifest.archives: - # Create archive placeholder inode - archive_inode = self.allocate_inode() - self.items[archive_inode] = self.default_dir - self.parent[archive_inode] = 1 - self.contents[1][os.fsencode(archive_name)] = archive_inode - self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name) + self.versions_index = FuseVersionsIndex() + for archive in self.manifest.archives.list_considering(self.args): + if self.versions: + # process archives immediately + self.process_archive(archive.name) + else: + # lazily load archives, create archive placeholder inode + archive_inode = self._create_dir(parent=1, mtime=int(archive.ts.timestamp() * 1e9)) + self.contents[1][os.fsencode(archive.name)] = archive_inode + self.pending_archives[archive_inode] = archive.name + + def sig_info_handler(self, sig_no, stack): + logger.debug('fuse: %d synth inodes, %d edges (%s)', + self._inode_count, len(self.parent), + # getsizeof is the size of the dict itself; key and value are two small-ish integers, + # which are shared due to code structure (this has been verified). + format_file_size(sys.getsizeof(self.parent) + len(self.parent) * sys.getsizeof(self._inode_count))) + logger.debug('fuse: %d pending archives', len(self.pending_archives)) + logger.debug('fuse: ItemCache %d entries (%d direct, %d indirect), meta-array size %s, direct items size %s', + self.cache.direct_items + self.cache.indirect_items, self.cache.direct_items, self.cache.indirect_items, + format_file_size(sys.getsizeof(self.cache.meta)), + format_file_size(os.stat(self.cache.fd.fileno()).st_size)) + logger.debug('fuse: data cache: %d/%d entries, %s', len(self.data_cache.items()), self.data_cache._capacity, + format_file_size(sum(len(chunk) for key, chunk in self.data_cache.items()))) + self.decrypted_repository.log_instrumentation() def mount(self, mountpoint, mount_options, foreground=False): """Mount filesystem on *mountpoint* with *mount_options*.""" @@ -92,9 +278,18 @@ class FuseOperations(llfuse.Operations): self.allow_damaged_files = True except ValueError: pass + try: + options.remove('versions') + self.versions = True + except ValueError: + pass + self._create_filesystem() llfuse.init(self, mountpoint, options) if not foreground: - daemonize() + old_id, new_id = daemonize() + if not isinstance(self.repository_uncached, RemoteRepository): + # local repo and the locking process' PID just changed, migrate it: + self.repository_uncached.migrate_lock(old_id, new_id) # If the file system crashes, we do not want to umount because in that # case the mountpoint suddenly appears to become empty. This can have @@ -103,61 +298,122 @@ class FuseOperations(llfuse.Operations): # mirror. umount = False try: - signal = fuse_main() - umount = (signal is None) # no crash and no signal -> umount request + with signal_handler('SIGUSR1', self.sig_info_handler), \ + signal_handler('SIGINFO', self.sig_info_handler): + signal = fuse_main() + # no crash and no signal (or it's ^C and we're in the foreground) -> umount request + umount = (signal is None or (signal == SIGINT and foreground)) finally: llfuse.close(umount) - def process_archive(self, archive, prefix=[]): - """Build fuse inode hierarchy from archive metadata + def _create_dir(self, parent, mtime=None): + """Create directory """ - unpacker = msgpack.Unpacker() - for key, chunk in zip(archive.metadata[b'items'], self.repository.get_many(archive.metadata[b'items'])): - _, data = self.key.decrypt(key, chunk) - unpacker.feed(data) - for item in unpacker: - item = Item(internal_dict=item) + ino = self.allocate_inode() + if mtime is not None: + self.items[ino] = Item(**self.default_dir.as_dict()) + self.items[ino].mtime = mtime + else: + self.items[ino] = self.default_dir + self.parent[ino] = parent + return ino + + def process_archive(self, archive_name, prefix=[]): + """Build FUSE inode hierarchy from archive metadata + """ + self.file_versions = {} # for versions mode: original path -> version + t0 = time.perf_counter() + archive = Archive(self.repository_uncached, self.key, self.manifest, archive_name, + consider_part_files=self.args.consider_part_files) + for item_inode, item in self.cache.iter_archive_items(archive.metadata.items): + path = os.fsencode(item.path) + is_dir = stat.S_ISDIR(item.mode) + if is_dir: try: # This can happen if an archive was created with a command line like # $ borg create ... dir1/file dir1 # In this case the code below will have created a default_dir inode for dir1 already. - inode = self._find_inode(safe_encode(item.path), prefix) + inode = self._find_inode(path, prefix) except KeyError: pass else: self.items[inode] = item continue - segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/') - del item.path - num_segments = len(segments) - parent = 1 - for i, segment in enumerate(segments, 1): - # Insert a default root inode if needed - if self._inode_count == 0 and segment: - archive_inode = self.allocate_inode() - self.items[archive_inode] = self.default_dir - self.parent[archive_inode] = parent - # Leaf segment? - if i == num_segments: - if 'source' in item and stat.S_ISREG(item.mode): - inode = self._find_inode(item.source, prefix) - item = self.cache.get(inode) - item.nlink = item.get('nlink', 1) + 1 - self.items[inode] = item - else: - inode = self.cache.add(item) - self.parent[inode] = parent - if segment: - self.contents[parent][segment] = inode - elif segment in self.contents[parent]: - parent = self.contents[parent][segment] - else: - inode = self.allocate_inode() - self.items[inode] = self.default_dir - self.parent[inode] = parent - if segment: - self.contents[parent][segment] = inode - parent = inode + segments = prefix + path.split(b'/') + parent = 1 + for segment in segments[:-1]: + parent = self.process_inner(segment, parent) + self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode) + duration = time.perf_counter() - t0 + logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name) + + def process_leaf(self, name, item, parent, prefix, is_dir, item_inode): + def file_version(item, path): + if 'chunks' in item: + file_id = blake2b_128(path) + current_version, previous_id = self.versions_index.get(file_id, (0, None)) + + chunk_ids = [chunk_id for chunk_id, _, _ in item.chunks] + contents_id = blake2b_128(b''.join(chunk_ids)) + + if contents_id != previous_id: + current_version += 1 + self.versions_index[file_id] = current_version, contents_id + + return current_version + + def make_versioned_name(name, version, add_dir=False): + if add_dir: + # add intermediate directory with same name as filename + path_fname = name.rsplit(b'/', 1) + name += b'/' + path_fname[-1] + # keep original extension at end to avoid confusing tools + name, ext = os.path.splitext(name) + version_enc = os.fsencode('.%05d' % version) + return name + version_enc + ext + + if self.versions and not is_dir: + parent = self.process_inner(name, parent) + path = os.fsencode(item.path) + version = file_version(item, path) + if version is not None: + # regular file, with contents - maybe a hardlink master + name = make_versioned_name(name, version) + self.file_versions[path] = version + + path = item.path + del item.path # save some space + if 'source' in item and hardlinkable(item.mode): + # a hardlink, no contents, is the hardlink master + source = os.fsencode(item.source) + if self.versions: + # adjust source name with version + version = self.file_versions[source] + source = make_versioned_name(source, version, add_dir=True) + name = make_versioned_name(name, version) + try: + inode = self._find_inode(source, prefix) + except KeyError: + logger.warning('Skipping broken hard link: %s -> %s', path, item.source) + return + item = self.cache.get(inode) + item.nlink = item.get('nlink', 1) + 1 + self.items[inode] = item + else: + inode = item_inode + self.parent[inode] = parent + if name: + self.contents[parent][name] = inode + + def process_inner(self, name, parent_inode): + dir = self.contents[parent_inode] + if name in dir: + inode = dir[name] + else: + inode = self._create_dir(parent_inode) + if name: + dir[name] = inode + return inode def allocate_inode(self): self._inode_count += 1 @@ -182,7 +438,7 @@ class FuseOperations(llfuse.Operations): return self.cache.get(inode) def _find_inode(self, path, prefix=[]): - segments = prefix + os.fsencode(os.path.normpath(path)).split(b'/') + segments = prefix + path.split(b'/') inode = 1 for segment in segments: inode = self.contents[inode][segment] @@ -190,14 +446,6 @@ class FuseOperations(llfuse.Operations): def getattr(self, inode, ctx=None): item = self.get_item(inode) - size = 0 - dsize = 0 - if 'chunks' in item: - for key, chunksize, _ in item.chunks: - size += chunksize - if self.accounted_chunks.get(key, inode) == inode: - self.accounted_chunks[key] = inode - dsize += chunksize entry = llfuse.EntryAttributes() entry.st_ino = inode entry.generation = 0 @@ -205,12 +453,12 @@ class FuseOperations(llfuse.Operations): entry.attr_timeout = 300 entry.st_mode = item.mode entry.st_nlink = item.get('nlink', 1) - entry.st_uid = item.uid - entry.st_gid = item.gid + entry.st_uid = item.uid if item.uid >= 0 else self.default_uid + entry.st_gid = item.gid if item.gid >= 0 else self.default_gid entry.st_rdev = item.get('rdev', 0) - entry.st_size = size + entry.st_size = item.get_size() entry.st_blksize = 512 - entry.st_blocks = dsize / 512 + entry.st_blocks = (entry.st_size + entry.st_blksize - 1) // entry.st_blksize # note: older archives only have mtime (not atime nor ctime) mtime_ns = item.mtime if have_fuse_xtime_ns: @@ -230,15 +478,15 @@ class FuseOperations(llfuse.Operations): def getxattr(self, inode, name, ctx=None): item = self.get_item(inode) try: - return item.get('xattrs', {})[name] + return item.get('xattrs', {})[name] or b'' except KeyError: raise llfuse.FUSEError(llfuse.ENOATTR) from None def _load_pending_archive(self, inode): # Check if this is an archive we need to load - archive = self.pending_archives.pop(inode, None) - if archive: - self.process_archive(archive, [os.fsencode(archive.name)]) + archive_name = self.pending_archives.pop(inode, None) + if archive_name: + self.process_archive(archive_name, [os.fsencode(archive_name)]) def lookup(self, parent_inode, name, ctx=None): self._load_pending_archive(parent_inode) @@ -282,7 +530,7 @@ class FuseOperations(llfuse.Operations): # evict fully read chunk from cache del self.data_cache[id] else: - _, data = self.key.decrypt(id, self.repository.get(id)) + data = self.key.decrypt(id, self.repository_uncached.get(id)) if offset + n < len(data): # chunk was only partially read, cache it self.data_cache[id] = data diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 389cf256..f14eeea9 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -6,27 +6,51 @@ import os cimport cython from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t from libc.errno cimport errno +from libc.string cimport memcpy from cpython.exc cimport PyErr_SetFromErrnoWithFilename +from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release +from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_CheckExact, PyBytes_GET_SIZE, PyBytes_AS_STRING -API_VERSION = 2 +API_VERSION = '1.1_07' cdef extern from "_hashindex.c": ctypedef struct HashIndex: pass - HashIndex *hashindex_read(char *path) + ctypedef struct FuseVersionsElement: + uint32_t version + char hash[16] + + HashIndex *hashindex_read(object file_py, int permit_compact) except * HashIndex *hashindex_init(int capacity, int key_size, int value_size) void hashindex_free(HashIndex *index) - int hashindex_get_size(HashIndex *index) - int hashindex_write(HashIndex *index, char *path) + int hashindex_len(HashIndex *index) + int hashindex_size(HashIndex *index) + void hashindex_write(HashIndex *index, object file_py) except * void *hashindex_get(HashIndex *index, void *key) void *hashindex_next_key(HashIndex *index, void *key) int hashindex_delete(HashIndex *index, void *key) int hashindex_set(HashIndex *index, void *key, void *value) + uint64_t hashindex_compact(HashIndex *index) uint32_t _htole32(uint32_t v) uint32_t _le32toh(uint32_t v) + double HASH_MAX_LOAD + + +cdef extern from "cache_sync/cache_sync.c": + ctypedef struct CacheSyncCtx: + pass + + CacheSyncCtx *cache_sync_init(HashIndex *chunks) + const char *cache_sync_error(const CacheSyncCtx *ctx) + uint64_t cache_sync_num_files(const CacheSyncCtx *ctx) + int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length) + void cache_sync_free(CacheSyncCtx *ctx) + + uint32_t _MAX_VALUE + cdef _NoDefault = object() @@ -47,10 +71,6 @@ AssertionError is raised instead. assert UINT32_MAX == 2**32-1 -# module-level constant because cdef's in classes can't have default values -cdef uint32_t _MAX_VALUE = 2**32-1025 -MAX_VALUE = _MAX_VALUE - assert _MAX_VALUE % 2 == 1 @@ -59,16 +79,20 @@ cdef class IndexBase: cdef HashIndex *index cdef int key_size - def __cinit__(self, capacity=0, path=None, key_size=32): - self.key_size = key_size + _key_size = 32 + + MAX_LOAD_FACTOR = HASH_MAX_LOAD + MAX_VALUE = _MAX_VALUE + + def __cinit__(self, capacity=0, path=None, permit_compact=False): + self.key_size = self._key_size if path: - path = os.fsencode(path) - self.index = hashindex_read(path) - if not self.index: - if errno: - PyErr_SetFromErrnoWithFilename(OSError, path) - return - raise RuntimeError('hashindex_read failed') + if isinstance(path, (str, bytes)): + with open(path, 'rb') as fd: + self.index = hashindex_read(fd, permit_compact) + else: + self.index = hashindex_read(path, permit_compact) + assert self.index, 'hashindex_read() returned NULL with no exception set' else: self.index = hashindex_init(capacity, self.key_size, self.value_size) if not self.index: @@ -79,13 +103,15 @@ cdef class IndexBase: hashindex_free(self.index) @classmethod - def read(cls, path): - return cls(path=path) + def read(cls, path, permit_compact=False): + return cls(path=path, permit_compact=permit_compact) def write(self, path): - path = os.fsencode(path) - if not hashindex_write(self.index, path): - raise Exception('hashindex_write failed') + if isinstance(path, (str, bytes)): + with open(path, 'wb') as fd: + hashindex_write(self.index, fd) + else: + hashindex_write(self.index, path) def clear(self): hashindex_free(self.index) @@ -99,7 +125,12 @@ cdef class IndexBase: def __delitem__(self, key): assert len(key) == self.key_size - if not hashindex_delete(self.index, key): + rc = hashindex_delete(self.index, key) + if rc == 1: + return # success + if rc == -1: + raise KeyError(key) + if rc == 0: raise Exception('hashindex_delete failed') def get(self, key, default=None): @@ -119,7 +150,44 @@ cdef class IndexBase: raise def __len__(self): - return hashindex_get_size(self.index) + return hashindex_len(self.index) + + def size(self): + """Return size (bytes) of hash table.""" + return hashindex_size(self.index) + + def compact(self): + return hashindex_compact(self.index) + + +cdef class FuseVersionsIndex(IndexBase): + # 4 byte version + 16 byte file contents hash + value_size = 20 + _key_size = 16 + + def __getitem__(self, key): + cdef FuseVersionsElement *data + assert len(key) == self.key_size + data = hashindex_get(self.index, key) + if data == NULL: + raise KeyError(key) + return _le32toh(data.version), PyBytes_FromStringAndSize(data.hash, 16) + + def __setitem__(self, key, value): + cdef FuseVersionsElement data + assert len(key) == self.key_size + data.version = value[0] + assert data.version <= _MAX_VALUE, "maximum number of versions reached" + if not PyBytes_CheckExact(value[1]) or PyBytes_GET_SIZE(value[1]) != 16: + raise TypeError("Expected bytes of length 16 for second value") + memcpy(data.hash, PyBytes_AS_STRING(value[1]), 16) + data.version = _htole32(data.version) + if not hashindex_set(self.index, key, &data): + raise Exception('hashindex_set failed') + + def __contains__(self, key): + assert len(key) == self.key_size + return hashindex_get(self.index, key) != NULL cdef class NSIndex(IndexBase): @@ -172,17 +240,22 @@ cdef class NSKeyIterator: cdef HashIndex *index cdef const void *key cdef int key_size + cdef int exhausted def __cinit__(self, key_size): self.key = NULL self.key_size = key_size + self.exhausted = 0 def __iter__(self): return self def __next__(self): + if self.exhausted: + raise StopIteration self.key = hashindex_next_key(self.index, self.key) if not self.key: + self.exhausted = 1 raise StopIteration cdef uint32_t *value = (self.key + self.key_size) cdef uint32_t segment = _le32toh(value[0]) @@ -218,7 +291,7 @@ cdef class ChunkIndex(IndexBase): if not data: raise KeyError(key) cdef uint32_t refcount = _le32toh(data[0]) - assert refcount <= _MAX_VALUE + assert refcount <= _MAX_VALUE, "invalid reference count" return ChunkIndexEntry(refcount, _le32toh(data[1]), _le32toh(data[2])) def __setitem__(self, key, value): @@ -236,7 +309,7 @@ cdef class ChunkIndex(IndexBase): assert len(key) == self.key_size data = hashindex_get(self.index, key) if data != NULL: - assert data[0] <= _MAX_VALUE + assert _le32toh(data[0]) <= _MAX_VALUE, "invalid reference count" return data != NULL def incref(self, key): @@ -291,7 +364,7 @@ cdef class ChunkIndex(IndexBase): unique_chunks += 1 values = (key + self.key_size) refcount = _le32toh(values[0]) - assert refcount <= MAX_VALUE, "invalid reference count" + assert refcount <= _MAX_VALUE, "invalid reference count" chunks += refcount unique_size += _le32toh(values[1]) unique_csize += _le32toh(values[2]) @@ -300,6 +373,48 @@ cdef class ChunkIndex(IndexBase): return size, csize, unique_size, unique_csize, unique_chunks, chunks + def stats_against(self, ChunkIndex master_index): + """ + Calculate chunk statistics of this index against *master_index*. + + A chunk is counted as unique if the number of references + in this index matches the number of references in *master_index*. + + This index must be a subset of *master_index*. + + Return the same statistics tuple as summarize: + size, csize, unique_size, unique_csize, unique_chunks, chunks. + """ + cdef uint64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0, chunks = 0, unique_chunks = 0 + cdef uint32_t our_refcount, chunk_size, chunk_csize + cdef const uint32_t *our_values + cdef const uint32_t *master_values + cdef const void *key = NULL + cdef HashIndex *master = master_index.index + + while True: + key = hashindex_next_key(self.index, key) + if not key: + break + our_values = (key + self.key_size) + master_values = hashindex_get(master, key) + if not master_values: + raise ValueError('stats_against: key contained in self but not in master_index.') + our_refcount = _le32toh(our_values[0]) + chunk_size = _le32toh(master_values[1]) + chunk_csize = _le32toh(master_values[2]) + + chunks += our_refcount + size += chunk_size * our_refcount + csize += chunk_csize * our_refcount + if our_values[0] == master_values[0]: + # our refcount equals the master's refcount, so this chunk is unique to us + unique_chunks += 1 + unique_size += chunk_size + unique_csize += chunk_csize + + return size, csize, unique_size, unique_csize, unique_chunks, chunks + def add(self, key, refs, size, csize): assert len(key) == self.key_size cdef uint32_t[3] data @@ -314,8 +429,8 @@ cdef class ChunkIndex(IndexBase): if values: refcount1 = _le32toh(values[0]) refcount2 = _le32toh(data[0]) - assert refcount1 <= _MAX_VALUE - assert refcount2 <= _MAX_VALUE + assert refcount1 <= _MAX_VALUE, "invalid reference count" + assert refcount2 <= _MAX_VALUE, "invalid reference count" result64 = refcount1 + refcount2 values[0] = _htole32(min(result64, _MAX_VALUE)) values[1] = data[1] @@ -333,25 +448,81 @@ cdef class ChunkIndex(IndexBase): break self._add(key, (key + self.key_size)) + def zero_csize_ids(self): + cdef void *key = NULL + cdef uint32_t *values + entries = [] + while True: + key = hashindex_next_key(self.index, key) + if not key: + break + values = (key + self.key_size) + refcount = _le32toh(values[0]) + assert refcount <= _MAX_VALUE, "invalid reference count" + if _le32toh(values[2]) == 0: + # csize == 0 + entries.append(PyBytes_FromStringAndSize( key, self.key_size)) + return entries + cdef class ChunkKeyIterator: cdef ChunkIndex idx cdef HashIndex *index cdef const void *key cdef int key_size + cdef int exhausted def __cinit__(self, key_size): self.key = NULL self.key_size = key_size + self.exhausted = 0 def __iter__(self): return self def __next__(self): + if self.exhausted: + raise StopIteration self.key = hashindex_next_key(self.index, self.key) if not self.key: + self.exhausted = 1 raise StopIteration cdef uint32_t *value = (self.key + self.key_size) cdef uint32_t refcount = _le32toh(value[0]) - assert refcount <= MAX_VALUE, "invalid reference count" + assert refcount <= _MAX_VALUE, "invalid reference count" return (self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]), _le32toh(value[2])) + + +cdef Py_buffer ro_buffer(object data) except *: + cdef Py_buffer view + PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) + return view + + +cdef class CacheSynchronizer: + cdef ChunkIndex chunks + cdef CacheSyncCtx *sync + + def __cinit__(self, chunks): + self.chunks = chunks + self.sync = cache_sync_init(self.chunks.index) + if not self.sync: + raise Exception('cache_sync_init failed') + + def __dealloc__(self): + if self.sync: + cache_sync_free(self.sync) + + def feed(self, chunk): + cdef Py_buffer chunk_buf = ro_buffer(chunk) + cdef int rc + rc = cache_sync_feed(self.sync, chunk_buf.buf, chunk_buf.len) + PyBuffer_Release(&chunk_buf) + if not rc: + error = cache_sync_error(self.sync) + if error != NULL: + raise ValueError('cache_sync_feed failed: ' + error.decode('ascii')) + + @property + def num_files(self): + return cache_sync_num_files(self.sync) diff --git a/src/borg/helpers.py b/src/borg/helpers.py deleted file mode 100644 index 2c84758a..00000000 --- a/src/borg/helpers.py +++ /dev/null @@ -1,1620 +0,0 @@ -import argparse -import getpass -import hashlib -import logging -import os -import os.path -import platform -import re -import signal -import socket -import sys -import stat -import textwrap -import time -import unicodedata -import uuid -from binascii import hexlify -from collections import namedtuple, deque -from contextlib import contextmanager -from datetime import datetime, timezone, timedelta -from fnmatch import translate -from functools import wraps, partial -from itertools import islice -from operator import attrgetter -from string import Formatter - -if sys.platform != 'win32': - import grp - import pwd -else: - import posixpath - - -import msgpack -import msgpack.fallback - -from .logger import create_logger -logger = create_logger() - -from . import __version__ as borg_version -from . import chunker -from . import crypto -from . import hashindex -from . import shellpattern -from .constants import * # NOQA -from .compress import COMPR_BUFFER, get_compressor - -# meta dict, data bytes -_Chunk = namedtuple('_Chunk', 'meta data') - - -def Chunk(data, **meta): - return _Chunk(meta, data) - - -class Error(Exception): - """Error base class""" - - # if we raise such an Error and it is only catched by the uppermost - # exception handler (that exits short after with the given exit_code), - # it is always a (fatal and abrupt) EXIT_ERROR, never just a warning. - exit_code = EXIT_ERROR - # show a traceback? - traceback = False - - def get_message(self): - return type(self).__doc__.format(*self.args) - - -class ErrorWithTraceback(Error): - """like Error, but show a traceback also""" - traceback = True - - -class IntegrityError(ErrorWithTraceback): - """Data integrity error""" - - -class ExtensionModuleError(Error): - """The Borg binary extension modules do not seem to be properly installed""" - - -class NoManifestError(Error): - """Repository has no manifest.""" - - -class PlaceholderError(Error): - """Formatting Error: "{}".format({}): {}({})""" - - -def check_extension_modules(): - from . import platform - if hashindex.API_VERSION != 2: - raise ExtensionModuleError - if chunker.API_VERSION != 2: - raise ExtensionModuleError - if crypto.API_VERSION != 3: - raise ExtensionModuleError - if platform.API_VERSION != 3: - raise ExtensionModuleError - - -class Manifest: - - MANIFEST_ID = b'\0' * 32 - - def __init__(self, key, repository, item_keys=None): - self.archives = {} - self.config = {} - self.key = key - self.repository = repository - self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS - - @property - def id_str(self): - return bin_to_hex(self.id) - - @classmethod - def load(cls, repository, key=None): - from .key import key_factory - from .repository import Repository - try: - cdata = repository.get(cls.MANIFEST_ID) - except Repository.ObjectNotFound: - raise NoManifestError - if not key: - key = key_factory(repository, cdata) - manifest = cls(key, repository) - _, data = key.decrypt(None, cdata) - manifest.id = key.id_hash(data) - m = msgpack.unpackb(data) - if not m.get(b'version') == 1: - raise ValueError('Invalid manifest version') - manifest.archives = dict((k.decode('utf-8'), v) for k, v in m[b'archives'].items()) - manifest.timestamp = m.get(b'timestamp') - if manifest.timestamp: - manifest.timestamp = manifest.timestamp.decode('ascii') - manifest.config = m[b'config'] - # valid item keys are whatever is known in the repo or every key we know - manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get(b'item_keys', [])) - return manifest, key - - def write(self): - self.timestamp = datetime.utcnow().isoformat() - data = msgpack.packb(StableDict({ - 'version': 1, - 'archives': self.archives, - 'timestamp': self.timestamp, - 'config': self.config, - 'item_keys': tuple(self.item_keys), - })) - self.id = self.key.id_hash(data) - self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data))) - - def list_archive_infos(self, sort_by=None, reverse=False): - # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts - ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') - archives = [] - for name, values in self.archives.items(): - ts = parse_timestamp(values[b'time'].decode('utf-8')) - id = values[b'id'] - archives.append(ArchiveInfo(name=name, id=id, ts=ts)) - if sort_by is not None: - archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse) - return archives - - -def prune_within(archives, within): - multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365} - try: - hours = int(within[:-1]) * multiplier[within[-1]] - except (KeyError, ValueError): - # I don't like how this displays the original exception too: - raise argparse.ArgumentTypeError('Unable to parse --within option: "%s"' % within) - if hours <= 0: - raise argparse.ArgumentTypeError('Number specified using --within option must be positive') - target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600) - return [a for a in archives if a.ts > target] - - -def prune_split(archives, pattern, n, skip=[]): - last = None - keep = [] - if n == 0: - return keep - for a in sorted(archives, key=attrgetter('ts'), reverse=True): - period = to_localtime(a.ts).strftime(pattern) - if period != last: - last = period - if a not in skip: - keep.append(a) - if len(keep) == n: - break - return keep - - -def get_home_dir(): - """Get user's home directory while preferring a possibly set HOME - environment variable - """ - # os.path.expanduser() behaves differently for '~' and '~someuser' as - # parameters: when called with an explicit username, the possibly set - # environment variable HOME is no longer respected. So we have to check if - # it is set and only expand the user's home directory if HOME is unset. - if os.environ.get('HOME', ''): - return os.environ.get('HOME') - else: - return os.path.expanduser('~%s' % os.environ.get('USER', '')) - - -def get_keys_dir(): - """Determine where to repository keys and cache""" - - xdg_config = os.environ.get('XDG_CONFIG_HOME', os.path.join(get_home_dir(), '.config')) - keys_dir = os.environ.get('BORG_KEYS_DIR', os.path.join(xdg_config, 'borg', 'keys')) - if not os.path.exists(keys_dir): - os.makedirs(keys_dir) - os.chmod(keys_dir, stat.S_IRWXU) - return keys_dir - - -def get_cache_dir(): - """Determine where to repository keys and cache""" - xdg_cache = os.environ.get('XDG_CACHE_HOME', os.path.join(get_home_dir(), '.cache')) - cache_dir = os.environ.get('BORG_CACHE_DIR', os.path.join(xdg_cache, 'borg')) - if not os.path.exists(cache_dir): - os.makedirs(cache_dir) - os.chmod(cache_dir, stat.S_IRWXU) - with open(os.path.join(cache_dir, CACHE_TAG_NAME), 'wb') as fd: - fd.write(CACHE_TAG_CONTENTS) - fd.write(textwrap.dedent(""" - # This file is a cache directory tag created by Borg. - # For information about cache directory tags, see: - # http://www.brynosaurus.com/cachedir/ - """).encode('ascii')) - return cache_dir - - -def to_localtime(ts): - """Convert datetime object from UTC to local time zone""" - return datetime(*time.localtime((ts - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds())[:6]) - - -def parse_timestamp(timestamp): - """Parse a ISO 8601 timestamp string""" - if '.' in timestamp: # microseconds might not be present - return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f').replace(tzinfo=timezone.utc) - else: - return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) - - -def load_excludes(fh): - """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on - both line ends are ignored. - """ - return [parse_pattern(pattern) for pattern in clean_lines(fh)] - - -def update_excludes(args): - """Merge exclude patterns from files with those on command line.""" - if hasattr(args, 'exclude_files') and args.exclude_files: - if not hasattr(args, 'excludes') or args.excludes is None: - args.excludes = [] - for file in args.exclude_files: - args.excludes += load_excludes(file) - file.close() - - -class PatternMatcher: - def __init__(self, fallback=None): - self._items = [] - - # Value to return from match function when none of the patterns match. - self.fallback = fallback - - def empty(self): - return not len(self._items) - - def add(self, patterns, value): - """Add list of patterns to internal list. The given value is returned from the match function when one of the - given patterns matches. - """ - self._items.extend((i, value) for i in patterns) - - def match(self, path): - for (pattern, value) in self._items: - if pattern.match(path): - return value - - return self.fallback - - -def normalized(func): - """ Decorator for the Pattern match methods, returning a wrapper that - normalizes OSX paths to match the normalized pattern on OSX, and - returning the original method on other platforms""" - @wraps(func) - def normalize_wrapper(self, path): - return func(self, unicodedata.normalize("NFD", path)) - - if sys.platform in ('darwin',): - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match - return normalize_wrapper - else: - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - return func - - -class PatternBase: - """Shared logic for inclusion/exclusion patterns. - """ - PREFIX = NotImplemented - - def __init__(self, pattern): - self.pattern_orig = pattern - self.match_count = 0 - - if sys.platform in ('darwin',): - pattern = unicodedata.normalize("NFD", pattern) - - self._prepare(pattern) - - @normalized - def match(self, path): - matches = self._match(path) - - if matches: - self.match_count += 1 - - return matches - - def __repr__(self): - return '%s(%s)' % (type(self), self.pattern) - - def __str__(self): - return self.pattern_orig - - def _prepare(self, pattern): - raise NotImplementedError - - def _match(self, path): - raise NotImplementedError - - -# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path -# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path -# separator to the end of the path before matching. - - -class PathPrefixPattern(PatternBase): - """Literal files or directories listed on the command line - for some operations (e.g. extract, but not create). - If a directory is specified, all paths that start with that - path match as well. A trailing slash makes no difference. - """ - PREFIX = "pp" - - def _prepare(self, pattern): - if sys.platform != 'win32': - self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep - else: - self.pattern = posixpath.normpath(pattern).rstrip(posixpath.sep) + posixpath.sep - - def _match(self, path): - if sys.platform != 'win32': - return (path + os.path.sep).startswith(self.pattern) - else: - return (path + posixpath.sep).startswith(self.pattern) - - -class FnmatchPattern(PatternBase): - """Shell glob patterns to exclude. A trailing slash means to - exclude the contents of a directory, but not the directory itself. - """ - PREFIX = "fm" - - def _prepare(self, pattern): - if sys.platform != 'win32': - if pattern.endswith(os.path.sep): - pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep - else: - pattern = os.path.normpath(pattern) + os.path.sep + '*' - else: - if pattern.endswith(os.path.sep) or pattern.endswith(posixpath.sep): - pattern = posixpath.normpath(pattern).rstrip(posixpath.sep) + posixpath.sep + '*' + posixpath.sep - else: - pattern = posixpath.normpath(pattern) + posixpath.sep + '*' - - self.pattern = pattern - - # fnmatch and re.match both cache compiled regular expressions. - # Nevertheless, this is about 10 times faster. - self.regex = re.compile(translate(self.pattern)) - - def _match(self, path): - if sys.platform != 'win32': - return (self.regex.match(path + os.path.sep) is not None) - else: - return (self.regex.match(path.replace('\\', '/') + posixpath.sep) is not None) - - -class ShellPattern(PatternBase): - """Shell glob patterns to exclude. A trailing slash means to - exclude the contents of a directory, but not the directory itself. - """ - PREFIX = "sh" - - def _prepare(self, pattern): - sep = os.path.sep - - if pattern.endswith(sep): - pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep - else: - pattern = os.path.normpath(pattern) + sep + "**" + sep + "*" - - self.pattern = pattern - self.regex = re.compile(shellpattern.translate(self.pattern)) - - def _match(self, path): - return (self.regex.match(path + os.path.sep) is not None) - - -class RegexPattern(PatternBase): - """Regular expression to exclude. - """ - PREFIX = "re" - - def _prepare(self, pattern): - self.pattern = pattern - self.regex = re.compile(pattern) - - def _match(self, path): - # Normalize path separators - if os.path.sep != '/': - path = path.replace(os.path.sep, '/') - - return (self.regex.search(path) is not None) - - -_PATTERN_STYLES = set([ - FnmatchPattern, - PathPrefixPattern, - RegexPattern, - ShellPattern, -]) - -_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES) - - -def parse_pattern(pattern, fallback=FnmatchPattern): - """Read pattern from string and return an instance of the appropriate implementation class. - """ - if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum(): - (style, pattern) = (pattern[:2], pattern[3:]) - - cls = _PATTERN_STYLE_BY_PREFIX.get(style, None) - - if cls is None: - raise ValueError("Unknown pattern style: {}".format(style)) - else: - cls = fallback - - return cls(pattern) - - -def timestamp(s): - """Convert a --timestamp=s argument to a datetime object""" - try: - # is it pointing to a file / directory? - ts = os.stat(s).st_mtime - return datetime.utcfromtimestamp(ts) - except OSError: - # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support. - for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', - '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', - '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M', - '%Y-%m-%d', '%Y-%j', - ): - try: - return datetime.strptime(s, format) - except ValueError: - continue - raise ValueError - - -def ChunkerParams(s): - if s.strip().lower() == "default": - return CHUNKER_PARAMS - chunk_min, chunk_max, chunk_mask, window_size = s.split(',') - if int(chunk_max) > 23: - # do not go beyond 2**23 (8MB) chunk size now, - # COMPR_BUFFER can only cope with up to this size - raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)') - return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size) - - -def CompressionSpec(s): - values = s.split(',') - count = len(values) - if count < 1: - raise ValueError - # --compression algo[,level] - name = values[0] - if name in ('none', 'lz4', ): - return dict(name=name) - if name in ('zlib', 'lzma', ): - if count < 2: - level = 6 # default compression level in py stdlib - elif count == 2: - level = int(values[1]) - if not 0 <= level <= 9: - raise ValueError - else: - raise ValueError - return dict(name=name, level=level) - if name == 'auto': - if 2 <= count <= 3: - compression = ','.join(values[1:]) - else: - raise ValueError - return dict(name=name, spec=CompressionSpec(compression)) - raise ValueError - - -def PrefixSpec(s): - return replace_placeholders(s) - - -def dir_is_cachedir(path): - """Determines whether the specified path is a cache directory (and - therefore should potentially be excluded from the backup) according to - the CACHEDIR.TAG protocol - (http://www.brynosaurus.com/cachedir/spec.html). - """ - - tag_path = os.path.join(path, CACHE_TAG_NAME) - try: - if os.path.exists(tag_path): - with open(tag_path, 'rb') as tag_file: - tag_data = tag_file.read(len(CACHE_TAG_CONTENTS)) - if tag_data == CACHE_TAG_CONTENTS: - return True - except OSError: - pass - return False - - -def dir_is_tagged(path, exclude_caches, exclude_if_present): - """Determines whether the specified path is excluded by being a cache - directory or containing user-specified tag files. Returns a list of the - paths of the tag files (either CACHEDIR.TAG or the matching - user-specified files). - """ - tag_paths = [] - if exclude_caches and dir_is_cachedir(path): - tag_paths.append(os.path.join(path, CACHE_TAG_NAME)) - if exclude_if_present is not None: - for tag in exclude_if_present: - tag_path = os.path.join(path, tag) - if os.path.isfile(tag_path): - tag_paths.append(tag_path) - return tag_paths - - -def partial_format(format, mapping): - """ - Apply format.format_map(mapping) while preserving unknown keys - - Does not support attribute access, indexing and ![rsa] conversions - """ - for key, value in mapping.items(): - key = re.escape(key) - format = re.sub(r'(? 0 else '' - - for unit in units[:-1]: - if abs(round(num, precision)) < power: - if isinstance(num, int): - return "{}{}{}{}{}".format(prefix, num, sep, unit, suffix) - else: - return "{}{:3.{}f}{}{}{}".format(prefix, num, precision, sep, unit, suffix) - num /= float(power) - return "{}{:.{}f}{}{}{}".format(prefix, num, precision, sep, units[-1], suffix) - - -def sizeof_fmt_iec(num, suffix='B', sep='', precision=2, sign=False): - return sizeof_fmt(num, suffix=suffix, sep=sep, precision=precision, sign=sign, - units=['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'], power=1024) - - -def sizeof_fmt_decimal(num, suffix='B', sep='', precision=2, sign=False): - return sizeof_fmt(num, suffix=suffix, sep=sep, precision=precision, sign=sign, - units=['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'], power=1000) - - -def format_archive(archive): - return '%-36s %s [%s]' % ( - archive.name, - format_time(to_localtime(archive.ts)), - bin_to_hex(archive.id), - ) - - -def memoize(function): - cache = {} - - def decorated_function(*args): - try: - return cache[args] - except KeyError: - val = function(*args) - cache[args] = val - return val - return decorated_function - - -@memoize -def uid2user(uid, default=None): - try: - if sys.platform != 'win32': - return pwd.getpwuid(uid).pw_name - else: - return os.getlogin() - except KeyError: - return default - - -@memoize -def user2uid(user, default=None): - try: - if sys.platform != 'win32': - return user and pwd.getpwnam(user).pw_uid - else: - return user and 0 - except KeyError: - return default - - -@memoize -def gid2group(gid, default=None): - try: - if sys.platform != 'win32': - return grp.getgrgid(gid).gr_name - else: - return '' - except KeyError: - return default - - -@memoize -def group2gid(group, default=None): - if sys.platform != 'win32': - if group == '': - return 0 # From windows - try: - return group and grp.getgrnam(group).gr_gid - except KeyError: - return default - else: - return 0 - - -def getuid(): - if sys.platform != 'win32': - return os.getuid() - else: - return 0 - - -def posix_acl_use_stored_uid_gid(acl): - """Replace the user/group field with the stored uid/gid - """ - entries = [] - for entry in safe_decode(acl).split('\n'): - if entry: - fields = entry.split(':') - if len(fields) == 4: - entries.append(':'.join([fields[0], fields[3], fields[2]])) - else: - entries.append(entry) - return safe_encode('\n'.join(entries)) - - -def safe_decode(s, coding='utf-8', errors='surrogateescape'): - """decode bytes to str, with round-tripping "invalid" bytes""" - if s is None: - return None - return s.decode(coding, errors) - - -def safe_encode(s, coding='utf-8', errors='surrogateescape'): - """encode str to bytes, with round-tripping "invalid" bytes""" - if s is None: - return None - return s.encode(coding, errors) - - -def bin_to_hex(binary): - return hexlify(binary).decode('ascii') - - -class Location: - """Object representing a repository / archive location - """ - proto = user = host = port = path = archive = None - # borg mount's FUSE filesystem creates one level of directories from - # the archive names. Thus, we must not accept "/" in archive names. - ssh_re = re.compile(r'(?Pssh)://(?:(?P[^@]+)@)?' - r'(?P[^:/#]+)(?::(?P\d+))?' - r'(?P[^:]+)(?:::(?P[^/]+))?$') - file_re = None - if sys.platform != 'win32': - file_re = re.compile(r'(?Pfile)://' - r'(?P[^:]+)(?:::(?P[^/]+))?$') - else: - file_re = re.compile(r'((?Pfile)://)?' - r'(?P[a-zA-Z])?:[\\/](?P[^:]+)(?:::(?P[^/]+))?$') - scp_re = re.compile(r'((?:(?P[^@]+)@)?(?P[^:/]+):)?' - r'(?P[^:]+)(?:::(?P[^/]+))?$') - # get the repo from BORG_RE env and the optional archive from param. - # if the syntax requires giving REPOSITORY (see "borg mount"), - # use "::" to let it use the env var. - # if REPOSITORY argument is optional, it'll automatically use the env. - env_re = re.compile(r'(?:::(?P[^/]+)?)?$') - - def __init__(self, text=''): - self.orig = text - if not self.parse(self.orig): - raise ValueError - - def parse(self, text): - text = replace_placeholders(text) - valid = self._parse(text) - if valid: - return True - m = self.env_re.match(text) - if not m: - return False - repo = os.environ.get('BORG_REPO') - if repo is None: - return False - valid = self._parse(repo) - if not valid: - return False - self.archive = m.group('archive') - return True - - def _parse(self, text): - if sys.platform == 'win32': - m = self.file_re.match(text) - if m: - self.proto = 'file' - self.path = posixpath.normpath(m.group('drive') + ":\\" + m.group('path')) - self.archive = m.group('archive') - return True - - m = self.ssh_re.match(text) - if m: - self.proto = m.group('proto') - self.user = m.group('user') - self.host = m.group('host') - self.port = m.group('port') and int(m.group('port')) or None - if sys.platform != 'win32': - self.path = os.path.normpath(m.group('path')) - else: - self.path = posixpath.normpath(m.group('path')) - self.archive = m.group('archive') - return True - if sys.platform != 'win32': - m = self.file_re.match(text) - if m: - self.proto = m.group('proto') - self.path = os.path.normpath(m.group('path')) - self.archive = m.group('archive') - return True - m = self.scp_re.match(text) - if m: - self.user = m.group('user') - self.host = m.group('host') - if sys.platform != 'win32': - self.path = os.path.normpath(m.group('path')) - else: - self.path = posixpath.normpath(m.group('path')) - self.archive = m.group('archive') - self.proto = self.host and 'ssh' or 'file' - return True - return False - - def __str__(self): - items = [ - 'proto=%r' % self.proto, - 'user=%r' % self.user, - 'host=%r' % self.host, - 'port=%r' % self.port, - 'path=%r' % self.path, - 'archive=%r' % self.archive, - ] - return ', '.join(items) - - def to_key_filename(self): - name = re.sub('[^\w]', '_', self.path).strip('_') - if self.proto != 'file': - name = self.host + '__' + name - return os.path.join(get_keys_dir(), name) - - def __repr__(self): - return "Location(%s)" % self - - def canonical_path(self): - if self.proto == 'file': - return self.path - else: - if self.path and self.path.startswith('~'): - path = '/' + self.path - elif self.path and not self.path.startswith('/'): - path = '/~/' + self.path - else: - path = self.path - return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '', - self.host, - ':{}'.format(self.port) if self.port else '', - path) - - -def location_validator(archive=None): - def validator(text): - try: - loc = Location(text) - except ValueError: - raise argparse.ArgumentTypeError('Invalid location format: "%s"' % text) from None - if archive is True and not loc.archive: - raise argparse.ArgumentTypeError('"%s": No archive specified' % text) - elif archive is False and loc.archive: - raise argparse.ArgumentTypeError('"%s" No archive can be specified' % text) - return loc - return validator - - -def archivename_validator(): - def validator(text): - if '/' in text or '::' in text or not text: - raise argparse.ArgumentTypeError('Invalid repository name: "%s"' % text) - return text - return validator - - -def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'): - for key in keys: - if isinstance(d.get(key), bytes): - d[key] = d[key].decode(encoding, errors) - return d - - -def remove_surrogates(s, errors='replace'): - """Replace surrogates generated by fsdecode with '?' - """ - return s.encode('utf-8', errors).decode('utf-8') - -_safe_re = None -if sys.platform != 'win32': - _safe_re = re.compile(r'^((\.\.)?/+)+') -else: - _safe_re = re.compile(r'^((\.\.)?[/\\]+)+') - - -def make_path_safe(path): - """Make path safe by making it relative and local - """ - if sys.platform != 'win32': - return _safe_re.sub('', path) or '.' - else: - tail = path - if len(path) > 2 and (path[0:2] == '//' or path[0:2] == '\\\\' or path[1] == ':'): - drive, tail = os.path.splitdrive(path) - tail = tail.replace('\\', '/') - return posixpath.normpath(_safe_re.sub('', tail) or '.') - - -def daemonize(): - """Detach process from controlling terminal and run in background - """ - pid = os.fork() - if pid: - os._exit(0) - os.setsid() - pid = os.fork() - if pid: - os._exit(0) - os.chdir('/') - os.close(0) - os.close(1) - os.close(2) - fd = os.open('/dev/null', os.O_RDWR) - os.dup2(fd, 0) - os.dup2(fd, 1) - os.dup2(fd, 2) - - -class StableDict(dict): - """A dict subclass with stable items() ordering""" - def items(self): - return sorted(super().items()) - - -def bigint_to_int(mtime): - """Convert bytearray to int - """ - if isinstance(mtime, bytes): - return int.from_bytes(mtime, 'little', signed=True) - return mtime - - -def int_to_bigint(value): - """Convert integers larger than 64 bits to bytearray - - Smaller integers are left alone - """ - if value.bit_length() > 63: - return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) - return value - - -def is_slow_msgpack(): - return msgpack.Packer is msgpack.fallback.Packer - - -FALSISH = ('No', 'NO', 'no', 'N', 'n', '0', ) -TRUISH = ('Yes', 'YES', 'yes', 'Y', 'y', '1', ) -DEFAULTISH = ('Default', 'DEFAULT', 'default', 'D', 'd', '', ) - - -def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, - retry_msg=None, invalid_msg=None, env_msg=None, - falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH, - default=False, retry=True, env_var_override=None, ofile=None, input=input): - """Output (usually a question) and let user input an answer. - Qualifies the answer according to falsish, truish and defaultish as True, False or . - If it didn't qualify and retry_msg is None (no retries wanted), - return the default [which defaults to False]. Otherwise let user retry - answering until answer is qualified. - - If env_var_override is given and this var is present in the environment, do not ask - the user, but just use the env var contents as answer as if it was typed in. - Otherwise read input from stdin and proceed as normal. - If EOF is received instead an input or an invalid input without retry possibility, - return default. - - :param msg: introducing message to output on ofile, no \n is added [None] - :param retry_msg: retry message to output on ofile, no \n is added [None] - :param false_msg: message to output before returning False [None] - :param true_msg: message to output before returning True [None] - :param default_msg: message to output before returning a [None] - :param invalid_msg: message to output after a invalid answer was given [None] - :param env_msg: message to output when using input from env_var_override [None], - needs to have 2 placeholders for answer and env var name, e.g.: "{} (from {})" - :param falsish: sequence of answers qualifying as False - :param truish: sequence of answers qualifying as True - :param defaultish: sequence of answers qualifying as - :param default: default return value (defaultish answer was given or no-answer condition) [False] - :param retry: if True and input is incorrect, retry. Otherwise return default. [True] - :param env_var_override: environment variable name [None] - :param ofile: output stream [sys.stderr] - :param input: input function [input from builtins] - :return: boolean answer value, True or False - """ - # note: we do not assign sys.stderr as default above, so it is - # really evaluated NOW, not at function definition time. - if ofile is None: - ofile = sys.stderr - if default not in (True, False): - raise ValueError("invalid default value, must be True or False") - if msg: - print(msg, file=ofile, end='', flush=True) - while True: - answer = None - if env_var_override: - answer = os.environ.get(env_var_override) - if answer is not None and env_msg: - print(env_msg.format(answer, env_var_override), file=ofile) - if answer is None: - try: - answer = input() - except EOFError: - # avoid defaultish[0], defaultish could be empty - answer = truish[0] if default else falsish[0] - if answer in defaultish: - if default_msg: - print(default_msg, file=ofile) - return default - if answer in truish: - if true_msg: - print(true_msg, file=ofile) - return True - if answer in falsish: - if false_msg: - print(false_msg, file=ofile) - return False - # if we get here, the answer was invalid - if invalid_msg: - print(invalid_msg, file=ofile) - if not retry: - return default - if retry_msg: - print(retry_msg, file=ofile, end='', flush=True) - # in case we used an environment variable and it gave an invalid answer, do not use it again: - env_var_override = None - - -class ProgressIndicatorPercent: - def __init__(self, total, step=5, start=0, same_line=False, msg="%3.0f%%"): - """ - Percentage-based progress indicator - - :param total: total amount of items - :param step: step size in percent - :param start: at which percent value to start - :param same_line: if True, emit output always on same line - :param msg: output message, must contain one %f placeholder for the percentage - """ - self.counter = 0 # 0 .. (total-1) - self.total = total - self.trigger_at = start # output next percentage value when reaching (at least) this - self.step = step - self.msg = msg - self.same_line = same_line - self.handler = None - self.logger = logging.getLogger('borg.output.progress') - - # If there are no handlers, set one up explicitly because the - # terminator and propagation needs to be set. If there are, - # they must have been set up by BORG_LOGGING_CONF: skip setup. - if not self.logger.handlers: - self.handler = logging.StreamHandler(stream=sys.stderr) - self.handler.setLevel(logging.INFO) - self.handler.terminator = '\r' if self.same_line else '\n' - - self.logger.addHandler(self.handler) - if self.logger.level == logging.NOTSET: - self.logger.setLevel(logging.WARN) - self.logger.propagate = False - - def __del__(self): - if self.handler is not None: - self.logger.removeHandler(self.handler) - self.handler.close() - - def progress(self, current=None): - if current is not None: - self.counter = current - pct = self.counter * 100 / self.total - self.counter += 1 - if pct >= self.trigger_at: - self.trigger_at += self.step - return pct - - def show(self, current=None): - pct = self.progress(current) - if pct is not None: - return self.output(pct) - - def output(self, percent): - self.logger.info(self.msg % percent) - - def finish(self): - if self.same_line: - self.logger.info(" " * len(self.msg % 100.0)) - - -class ProgressIndicatorEndless: - def __init__(self, step=10, file=None): - """ - Progress indicator (long row of dots) - - :param step: every Nth call, call the func - :param file: output file, default: sys.stderr - """ - self.counter = 0 # call counter - self.triggered = 0 # increases 1 per trigger event - self.step = step # trigger every calls - if file is None: - file = sys.stderr - self.file = file - - def progress(self): - self.counter += 1 - trigger = self.counter % self.step == 0 - if trigger: - self.triggered += 1 - return trigger - - def show(self): - trigger = self.progress() - if trigger: - return self.output(self.triggered) - - def output(self, triggered): - print('.', end='', file=self.file, flush=True) - - def finish(self): - print(file=self.file) - - -def sysinfo(): - info = [] - info.append('Platform: %s' % (' '.join(platform.uname()), )) - if sys.platform.startswith('linux'): - info.append('Linux: %s %s %s' % platform.linux_distribution()) - info.append('Borg: %s Python: %s %s' % (borg_version, platform.python_implementation(), platform.python_version())) - info.append('PID: %d CWD: %s' % (os.getpid(), os.getcwd())) - info.append('sys.argv: %r' % sys.argv) - info.append('SSH_ORIGINAL_COMMAND: %r' % os.environ.get('SSH_ORIGINAL_COMMAND')) - info.append('') - return '\n'.join(info) - - -def log_multi(*msgs, level=logging.INFO, logger=logger): - """ - log multiple lines of text, each line by a separate logging call for cosmetic reasons - - each positional argument may be a single or multiple lines (separated by newlines) of text. - """ - lines = [] - for msg in msgs: - lines.extend(msg.splitlines()) - for line in lines: - logger.log(level, line) - - -class BaseFormatter: - FIXED_KEYS = { - # Formatting aids - 'LF': '\n', - 'SPACE': ' ', - 'TAB': '\t', - 'CR': '\r', - 'NUL': '\0', - 'NEWLINE': os.linesep, - 'NL': os.linesep, - } - - def get_item_data(self, item): - raise NotImplementedError - - def format_item(self, item): - return self.format.format_map(self.get_item_data(item)) - - @staticmethod - def keys_help(): - return " - NEWLINE: OS dependent line separator\n" \ - " - NL: alias of NEWLINE\n" \ - " - NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath\n" \ - " - SPACE\n" \ - " - TAB\n" \ - " - CR\n" \ - " - LF" - - -class ArchiveFormatter(BaseFormatter): - - def __init__(self, format): - self.format = partial_format(format, self.FIXED_KEYS) - - def get_item_data(self, archive): - return { - 'barchive': archive.name, - 'archive': remove_surrogates(archive.name), - 'id': bin_to_hex(archive.id), - 'time': format_time(to_localtime(archive.ts)), - } - - @staticmethod - def keys_help(): - return " - archive: archive name interpreted as text (might be missing non-text characters, see barchive)\n" \ - " - barchive: verbatim archive name, can contain any character except NUL\n" \ - " - time: time of creation of the archive\n" \ - " - id: internal ID of the archive" - - -class ItemFormatter(BaseFormatter): - KEY_DESCRIPTIONS = { - 'bpath': 'verbatim POSIX path, can contain any character except NUL', - 'path': 'path interpreted as text (might be missing non-text characters, see bpath)', - 'source': 'link target for links (identical to linktarget)', - 'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links', - 'csize': 'compressed size', - 'num_chunks': 'number of chunks in this file', - 'unique_chunks': 'number of unique chunks in this file', - } - KEY_GROUPS = ( - ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'), - ('size', 'csize', 'num_chunks', 'unique_chunks'), - ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'), - tuple(sorted(hashlib.algorithms_guaranteed)), - ('archiveid', 'archivename', 'extra'), - ) - - @classmethod - def available_keys(cls): - class FakeArchive: - fpr = name = "" - - from .item import Item - fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0) - formatter = cls(FakeArchive, "") - keys = [] - keys.extend(formatter.call_keys.keys()) - keys.extend(formatter.get_item_data(fake_item).keys()) - return keys - - @classmethod - def keys_help(cls): - help = [] - keys = cls.available_keys() - for key in cls.FIXED_KEYS: - keys.remove(key) - - for group in cls.KEY_GROUPS: - for key in group: - keys.remove(key) - text = " - " + key - if key in cls.KEY_DESCRIPTIONS: - text += ": " + cls.KEY_DESCRIPTIONS[key] - help.append(text) - help.append("") - assert not keys, str(keys) - return "\n".join(help) - - def __init__(self, archive, format): - self.archive = archive - static_keys = { - 'archivename': archive.name, - 'archiveid': archive.fpr, - } - static_keys.update(self.FIXED_KEYS) - self.format = partial_format(format, static_keys) - self.format_keys = {f[1] for f in Formatter().parse(format)} - self.call_keys = { - 'size': self.calculate_size, - 'csize': self.calculate_csize, - 'num_chunks': self.calculate_num_chunks, - 'unique_chunks': self.calculate_unique_chunks, - 'isomtime': partial(self.format_time, 'mtime'), - 'isoctime': partial(self.format_time, 'ctime'), - 'isoatime': partial(self.format_time, 'atime'), - 'mtime': partial(self.time, 'mtime'), - 'ctime': partial(self.time, 'ctime'), - 'atime': partial(self.time, 'atime'), - } - for hash_function in hashlib.algorithms_guaranteed: - self.add_key(hash_function, partial(self.hash_item, hash_function)) - self.used_call_keys = set(self.call_keys) & self.format_keys - self.item_data = static_keys - - def add_key(self, key, callable_with_item): - self.call_keys[key] = callable_with_item - self.used_call_keys = set(self.call_keys) & self.format_keys - - def get_item_data(self, item): - mode = stat.filemode(item.mode) - item_type = mode[0] - item_data = self.item_data - - source = item.get('source', '') - extra = '' - if source: - source = remove_surrogates(source) - if item_type == 'l': - extra = ' -> %s' % source - else: - mode = 'h' + mode[1:] - extra = ' link to %s' % source - item_data['type'] = item_type - item_data['mode'] = mode - item_data['user'] = item.user or item.uid - item_data['group'] = item.group or item.gid - item_data['uid'] = item.uid - item_data['gid'] = item.gid - item_data['path'] = remove_surrogates(item.path) - item_data['bpath'] = item.path - item_data['source'] = source - item_data['linktarget'] = source - item_data['extra'] = extra - item_data['flags'] = item.get('bsdflags') - for key in self.used_call_keys: - item_data[key] = self.call_keys[key](item) - return item_data - - def calculate_num_chunks(self, item): - return len(item.get('chunks', [])) - - def calculate_unique_chunks(self, item): - chunk_index = self.archive.cache.chunks - return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1) - - def calculate_size(self, item): - return sum(c.size for c in item.get('chunks', [])) - - def calculate_csize(self, item): - return sum(c.csize for c in item.get('chunks', [])) - - def hash_item(self, hash_function, item): - if 'chunks' not in item: - return "" - hash = hashlib.new(hash_function) - for _, data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]): - hash.update(data) - return hash.hexdigest() - - def format_time(self, key, item): - return format_time(safe_timestamp(item.get(key) or item.mtime)) - - def time(self, key, item): - return safe_timestamp(item.get(key) or item.mtime) - - -class ChunkIteratorFileWrapper: - """File-like wrapper for chunk iterators""" - - def __init__(self, chunk_iterator): - self.chunk_iterator = chunk_iterator - self.chunk_offset = 0 - self.chunk = b'' - self.exhausted = False - - def _refill(self): - remaining = len(self.chunk) - self.chunk_offset - if not remaining: - try: - chunk = next(self.chunk_iterator) - self.chunk = memoryview(chunk.data) - except StopIteration: - self.exhausted = True - return 0 # EOF - self.chunk_offset = 0 - remaining = len(self.chunk) - return remaining - - def _read(self, nbytes): - if not nbytes: - return b'' - remaining = self._refill() - will_read = min(remaining, nbytes) - self.chunk_offset += will_read - return self.chunk[self.chunk_offset - will_read:self.chunk_offset] - - def read(self, nbytes): - parts = [] - while nbytes and not self.exhausted: - read_data = self._read(nbytes) - nbytes -= len(read_data) - parts.append(read_data) - return b''.join(parts) - - -def open_item(archive, item): - """Return file-like object for archived item (with chunks).""" - chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks]) - return ChunkIteratorFileWrapper(chunk_iterator) - - -def file_status(mode): - if stat.S_ISREG(mode): - return 'A' - elif stat.S_ISDIR(mode): - return 'd' - elif stat.S_ISBLK(mode): - return 'b' - elif stat.S_ISCHR(mode): - return 'c' - elif stat.S_ISLNK(mode): - return 's' - elif stat.S_ISFIFO(mode): - return 'f' - return '?' - - -def consume(iterator, n=None): - """Advance the iterator n-steps ahead. If n is none, consume entirely.""" - # Use functions that consume iterators at C speed. - if n is None: - # feed the entire iterator into a zero-length deque - deque(iterator, maxlen=0) - else: - # advance to the empty slice starting at position n - next(islice(iterator, n, n), None) - -# GenericDirEntry, scandir_generic (c) 2012 Ben Hoyt -# from the python-scandir package (3-clause BSD license, just like us, so no troubles here) -# note: simplified version - - -class GenericDirEntry: - __slots__ = ('name', '_scandir_path', '_path') - - def __init__(self, scandir_path, name): - self._scandir_path = scandir_path - self.name = name - self._path = None - - @property - def path(self): - if self._path is None: - self._path = os.path.join(self._scandir_path, self.name) - return self._path - - def stat(self, follow_symlinks=True): - assert not follow_symlinks - return os.lstat(self.path) - - def _check_type(self, type): - st = self.stat(False) - return stat.S_IFMT(st.st_mode) == type - - def is_dir(self, follow_symlinks=True): - assert not follow_symlinks - return self._check_type(stat.S_IFDIR) - - def is_file(self, follow_symlinks=True): - assert not follow_symlinks - return self._check_type(stat.S_IFREG) - - def is_symlink(self): - return self._check_type(stat.S_IFLNK) - - def inode(self): - st = self.stat(False) - return st.st_ino - - def __repr__(self): - return '<{0}: {1!r}>'.format(self.__class__.__name__, self.path) - - -def scandir_generic(path='.'): - """Like os.listdir(), but yield DirEntry objects instead of returning a list of names.""" - for name in sorted(os.listdir(path)): - yield GenericDirEntry(path, name) - -try: - from os import scandir -except ImportError: - try: - # Try python-scandir on Python 3.4 - from scandir import scandir - except ImportError: - # If python-scandir is not installed, then use a version that is just as slow as listdir. - scandir = scandir_generic - - -def scandir_inorder(path='.'): - return sorted(scandir(path), key=lambda dirent: dirent.inode()) - - -def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True): - """ - clean lines (usually read from a config file): - - 1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments. - - note: only "pure comment lines" are supported, no support for "trailing comments". - - :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines - :param lstrip: lstrip call arguments or False, if lstripping is not desired - :param rstrip: rstrip call arguments or False, if rstripping is not desired - :param remove_comments: remove comment lines (lines starting with "#") - :param remove_empty: remove empty lines - :return: yields processed lines - """ - for line in lines: - if lstrip is not False: - line = line.lstrip(lstrip) - if rstrip is not False: - line = line.rstrip(rstrip) - if remove_empty and not line: - continue - if remove_comments and line.startswith('#'): - continue - yield line - - -class CompressionDecider1: - def __init__(self, compression, compression_files): - """ - Initialize a CompressionDecider instance (and read config files, if needed). - - :param compression: default CompressionSpec (e.g. from --compression option) - :param compression_files: list of compression config files (e.g. from --compression-from) or - a list of other line iterators - """ - self.compression = compression - if not compression_files: - self.matcher = None - else: - self.matcher = PatternMatcher(fallback=compression) - for file in compression_files: - try: - for line in clean_lines(file): - try: - compr_spec, fn_pattern = line.split(':', 1) - except: - continue - self.matcher.add([parse_pattern(fn_pattern)], CompressionSpec(compr_spec)) - finally: - if hasattr(file, 'close'): - file.close() - - def decide(self, path): - if self.matcher is not None: - return self.matcher.match(path) - return self.compression - - -class CompressionDecider2: - def __init__(self, compression): - self.compression = compression - - def decide(self, chunk): - # nothing fancy here yet: we either use what the metadata says or the default - # later, we can decide based on the chunk data also. - # if we compress the data here to decide, we can even update the chunk data - # and modify the metadata as desired. - compr_spec = chunk.meta.get('compress', self.compression) - compr_args = dict(buffer=COMPR_BUFFER) - compr_args.update(compr_spec) - if compr_args['name'] == 'auto': - # we did not decide yet, use heuristic: - compr_args, chunk = self.heuristic_lz4(compr_args, chunk) - return compr_args, chunk - - def heuristic_lz4(self, compr_args, chunk): - meta, data = chunk - lz4 = get_compressor('lz4', buffer=compr_args['buffer']) - cdata = lz4.compress(data) - data_len = len(data) - cdata_len = len(cdata) - if cdata_len < data_len: - compr_spec = compr_args['spec'] - else: - # uncompressible - we could have a special "uncompressible compressor" - # that marks such data as uncompressible via compression-type metadata. - compr_spec = CompressionSpec('none') - compr_args.update(compr_spec) - logger.debug("len(data) == %d, len(lz4(data)) == %d, choosing %s", data_len, cdata_len, compr_spec) - return compr_args, Chunk(data, **meta) - - -@contextmanager -def signal_handler(signo, handler): - old_signal_handler = signal.signal(signo, handler) - try: - yield - finally: - signal.signal(signo, old_signal_handler) diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py new file mode 100644 index 00000000..b1c875a4 --- /dev/null +++ b/src/borg/helpers/__init__.py @@ -0,0 +1,42 @@ +""" +This package contains all sorts of small helper / utility functionality, +that did not fit better elsewhere. + +Code used to be in borg/helpers.py but was split into the modules in this +package, which are imported into here for compatibility. +""" + +from .checks import * # NOQA +from .datastruct import * # NOQA +from .errors import * # NOQA +from .fs import * # NOQA +from .manifest import * # NOQA +from .misc import * # NOQA +from .msgpack import * # NOQA +from .parseformat import * # NOQA +from .process import * # NOQA +from .progress import * # NOQA +from .time import * # NOQA +from .usergroup import * # NOQA +from .yes import * # NOQA + +""" +The global exit_code variable is used so that modules other than archiver can increase the program exit code if a +warning or error occurred during their operation. This is different from archiver.exit_code, which is only accessible +from the archiver object. + +Note: keep this in helpers/__init__.py as the code expects to be able to assign to helpers.exit_code. +""" +exit_code = EXIT_SUCCESS + + +def set_ec(ec): + """ + Sets the exit code of the program, if an exit code higher or equal than this is set, this does nothing. This + makes EXIT_ERROR override EXIT_WARNING, etc.. + + ec: exit code to set + """ + global exit_code + exit_code = max(exit_code, ec) + return exit_code diff --git a/src/borg/helpers/checks.py b/src/borg/helpers/checks.py new file mode 100644 index 00000000..dbd6fa7f --- /dev/null +++ b/src/borg/helpers/checks.py @@ -0,0 +1,38 @@ +import os +import sys + +from .errors import Error + + +class PythonLibcTooOld(Error): + """FATAL: this Python was compiled for a too old (g)libc and misses required functionality.""" + + +def check_python(): + if sys.platform == 'win32': + # Does not support symlinks anyway + return + required_funcs = {os.stat, os.utime, os.chown} + if not os.supports_follow_symlinks.issuperset(required_funcs): + raise PythonLibcTooOld + + +class ExtensionModuleError(Error): + """The Borg binary extension modules do not seem to be properly installed.""" + + +def check_extension_modules(): + import borg.crypto.low_level + from .. import platform, compress, item, chunker, hashindex + if hashindex.API_VERSION != '1.1_07': + raise ExtensionModuleError + if chunker.API_VERSION != '1.1_01': + raise ExtensionModuleError + if compress.API_VERSION != '1.1_03': + raise ExtensionModuleError + if borg.crypto.low_level.API_VERSION != '1.1_02': + raise ExtensionModuleError + if platform.API_VERSION != platform.OS_API_VERSION != '1.1_02': + raise ExtensionModuleError + if item.API_VERSION != '1.1_03': + raise ExtensionModuleError diff --git a/src/borg/helpers/datastruct.py b/src/borg/helpers/datastruct.py new file mode 100644 index 00000000..1650d3cd --- /dev/null +++ b/src/borg/helpers/datastruct.py @@ -0,0 +1,51 @@ +from .errors import Error + + +class StableDict(dict): + """A dict subclass with stable items() ordering""" + def items(self): + return sorted(super().items()) + + +class Buffer: + """ + Provides a managed, resizable buffer. + """ + + class MemoryLimitExceeded(Error, OSError): + """Requested buffer size {} is above the limit of {}.""" + + def __init__(self, allocator, size=4096, limit=None): + """ + Initialize the buffer: use allocator(size) call to allocate a buffer. + Optionally, set the upper for the buffer size. + """ + assert callable(allocator), 'must give alloc(size) function as first param' + assert limit is None or size <= limit, 'initial size must be <= limit' + self.allocator = allocator + self.limit = limit + self.resize(size, init=True) + + def __len__(self): + return len(self.buffer) + + def resize(self, size, init=False): + """ + resize the buffer - to avoid frequent reallocation, we usually always grow (if needed). + giving init=True it is possible to first-time initialize or shrink the buffer. + if a buffer size beyond the limit is requested, raise Buffer.MemoryLimitExceeded (OSError). + """ + size = int(size) + if self.limit is not None and size > self.limit: + raise Buffer.MemoryLimitExceeded(size, self.limit) + if init or len(self) < size: + self.buffer = self.allocator(size) + + def get(self, size=None, init=False): + """ + return a buffer of at least the requested size (None: any current size). + init=True can be given to trigger shrinking of the buffer to the given size. + """ + if size is not None: + self.resize(size, init) + return self.buffer diff --git a/src/borg/helpers/errors.py b/src/borg/helpers/errors.py new file mode 100644 index 00000000..80a47a9b --- /dev/null +++ b/src/borg/helpers/errors.py @@ -0,0 +1,36 @@ +from ..constants import * # NOQA + +import borg.crypto.low_level + + +class Error(Exception): + """Error base class""" + + # if we raise such an Error and it is only catched by the uppermost + # exception handler (that exits short after with the given exit_code), + # it is always a (fatal and abrupt) EXIT_ERROR, never just a warning. + exit_code = EXIT_ERROR + # show a traceback? + traceback = False + + def __init__(self, *args): + super().__init__(*args) + self.args = args + + def get_message(self): + return type(self).__doc__.format(*self.args) + + __str__ = get_message + + +class ErrorWithTraceback(Error): + """like Error, but show a traceback also""" + traceback = True + + +class IntegrityError(ErrorWithTraceback, borg.crypto.low_level.IntegrityError): + """Data integrity error: {}""" + + +class DecompressionError(IntegrityError): + """Decompression error: {}""" diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py new file mode 100644 index 00000000..eab9714d --- /dev/null +++ b/src/borg/helpers/fs.py @@ -0,0 +1,177 @@ +import errno +import os +import os.path +import re +import stat +import subprocess +import sys +import textwrap + +from .process import prepare_subprocess_env + +from ..constants import * # NOQA + + +def get_home_dir(): + """Get user's home directory while preferring a possibly set HOME + environment variable + """ + # os.path.expanduser() behaves differently for '~' and '~someuser' as + # parameters: when called with an explicit username, the possibly set + # environment variable HOME is no longer respected. So we have to check if + # it is set and only expand the user's home directory if HOME is unset. + if os.environ.get('HOME', ''): + return os.environ.get('HOME') + else: + return os.path.expanduser('~%s' % os.environ.get('USER', '')) + + +def get_keys_dir(): + """Determine where to repository keys and cache""" + + keys_dir = os.environ.get('BORG_KEYS_DIR', os.path.join(get_config_dir(), 'keys')) + if not os.path.exists(keys_dir): + os.makedirs(keys_dir) + os.chmod(keys_dir, stat.S_IRWXU) + return keys_dir + + +def get_security_dir(repository_id=None): + """Determine where to store local security information.""" + security_dir = os.environ.get('BORG_SECURITY_DIR', os.path.join(get_config_dir(), 'security')) + if repository_id: + security_dir = os.path.join(security_dir, repository_id) + if not os.path.exists(security_dir): + os.makedirs(security_dir) + os.chmod(security_dir, stat.S_IRWXU) + return security_dir + + +def get_cache_dir(): + """Determine where to repository keys and cache""" + xdg_cache = os.environ.get('XDG_CACHE_HOME', os.path.join(get_home_dir(), '.cache')) + cache_dir = os.environ.get('BORG_CACHE_DIR', os.path.join(xdg_cache, 'borg')) + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + os.chmod(cache_dir, stat.S_IRWXU) + with open(os.path.join(cache_dir, CACHE_TAG_NAME), 'wb') as fd: + fd.write(CACHE_TAG_CONTENTS) + fd.write(textwrap.dedent(""" + # This file is a cache directory tag created by Borg. + # For information about cache directory tags, see: + # http://www.brynosaurus.com/cachedir/ + """).encode('ascii')) + return cache_dir + + +def get_config_dir(): + """Determine where to store whole config""" + xdg_config = os.environ.get('XDG_CONFIG_HOME', os.path.join(get_home_dir(), '.config')) + config_dir = os.environ.get('BORG_CONFIG_DIR', os.path.join(xdg_config, 'borg')) + if not os.path.exists(config_dir): + os.makedirs(config_dir) + os.chmod(config_dir, stat.S_IRWXU) + return config_dir + + +def dir_is_cachedir(path): + """Determines whether the specified path is a cache directory (and + therefore should potentially be excluded from the backup) according to + the CACHEDIR.TAG protocol + (http://www.brynosaurus.com/cachedir/spec.html). + """ + + tag_path = os.path.join(path, CACHE_TAG_NAME) + try: + if os.path.exists(tag_path): + with open(tag_path, 'rb') as tag_file: + tag_data = tag_file.read(len(CACHE_TAG_CONTENTS)) + if tag_data == CACHE_TAG_CONTENTS: + return True + except OSError: + pass + return False + + +def dir_is_tagged(path, exclude_caches, exclude_if_present): + """Determines whether the specified path is excluded by being a cache + directory or containing user-specified tag files/directories. Returns a + list of the paths of the tag files/directories (either CACHEDIR.TAG or the + matching user-specified files/directories). + """ + tag_paths = [] + if exclude_caches and dir_is_cachedir(path): + tag_paths.append(os.path.join(path, CACHE_TAG_NAME)) + if exclude_if_present is not None: + for tag in exclude_if_present: + tag_path = os.path.join(path, tag) + if os.path.exists(tag_path): + tag_paths.append(tag_path) + return tag_paths + + +_safe_re = re.compile(r'^((\.\.)?/+)+') + + +def make_path_safe(path): + """Make path safe by making it relative and local + """ + return _safe_re.sub('', path) or '.' + + +def hardlinkable(mode): + """return True if we support hardlinked items of this type""" + return stat.S_ISREG(mode) or stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode) + + +def scandir_inorder(path='.'): + return sorted(os.scandir(path), key=lambda dirent: dirent.inode()) + + +def secure_erase(path): + """Attempt to securely erase a file by writing random data over it before deleting it.""" + with open(path, 'r+b') as fd: + length = os.stat(fd.fileno()).st_size + fd.write(os.urandom(length)) + fd.flush() + os.fsync(fd.fileno()) + os.unlink(path) + + +def truncate_and_unlink(path): + """ + Truncate and then unlink *path*. + + Do not create *path* if it does not exist. + Open *path* for truncation in r+b mode (=O_RDWR|O_BINARY). + + Use this when deleting potentially large files when recovering + from a VFS error such as ENOSPC. It can help a full file system + recover. Refer to the "File system interaction" section + in repository.py for further explanations. + """ + try: + with open(path, 'r+b') as fd: + fd.truncate() + except OSError as err: + if err.errno != errno.ENOTSUP: + raise + # don't crash if the above ops are not supported. + os.unlink(path) + + +def dash_open(path, mode): + assert '+' not in mode # the streams are either r or w, but never both + if path == '-': + stream = sys.stdin if 'r' in mode else sys.stdout + return stream.buffer if 'b' in mode else stream + else: + return open(path, mode) + + +def umount(mountpoint): + env = prepare_subprocess_env(system=True) + try: + return subprocess.call(['fusermount', '-u', mountpoint], env=env) + except FileNotFoundError: + return subprocess.call(['umount', mountpoint], env=env) diff --git a/src/borg/helpers/manifest.py b/src/borg/helpers/manifest.py new file mode 100644 index 00000000..eb88c038 --- /dev/null +++ b/src/borg/helpers/manifest.py @@ -0,0 +1,258 @@ +import enum +import os +import os.path +import re +from collections import abc, namedtuple +from datetime import datetime, timedelta +from operator import attrgetter + +from .errors import Error + +from ..logger import create_logger +logger = create_logger() + +from .datastruct import StableDict +from .parseformat import bin_to_hex, safe_encode, safe_decode +from .time import parse_timestamp +from .. import shellpattern +from ..constants import * # NOQA + + +class NoManifestError(Error): + """Repository has no manifest.""" + + +class MandatoryFeatureUnsupported(Error): + """Unsupported repository feature(s) {}. A newer version of borg is required to access this repository.""" + + +ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') + +AI_HUMAN_SORT_KEYS = ['timestamp'] + list(ArchiveInfo._fields) +AI_HUMAN_SORT_KEYS.remove('ts') + + +class Archives(abc.MutableMapping): + """ + Nice wrapper around the archives dict, making sure only valid types/values get in + and we can deal with str keys (and it internally encodes to byte keys) and either + str timestamps or datetime timestamps. + """ + def __init__(self): + # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts) + self._archives = {} + + def __len__(self): + return len(self._archives) + + def __iter__(self): + return iter(safe_decode(name) for name in self._archives) + + def __getitem__(self, name): + assert isinstance(name, str) + _name = safe_encode(name) + values = self._archives.get(_name) + if values is None: + raise KeyError + ts = parse_timestamp(values[b'time'].decode('utf-8')) + return ArchiveInfo(name=name, id=values[b'id'], ts=ts) + + def __setitem__(self, name, info): + assert isinstance(name, str) + name = safe_encode(name) + assert isinstance(info, tuple) + id, ts = info + assert isinstance(id, bytes) + if isinstance(ts, datetime): + ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT) + assert isinstance(ts, str) + ts = ts.encode() + self._archives[name] = {b'id': id, b'time': ts} + + def __delitem__(self, name): + assert isinstance(name, str) + name = safe_encode(name) + del self._archives[name] + + def list(self, *, glob=None, match_end=r'\Z', sort_by=(), first=None, last=None, reverse=False): + """ + Return list of ArchiveInfo instances according to the parameters. + + First match *glob* (considering *match_end*), then *sort_by*. + Apply *first* and *last* filters, and then possibly *reverse* the list. + + *sort_by* is a list of sort keys applied in reverse order. + """ + if isinstance(sort_by, (str, bytes)): + raise TypeError('sort_by must be a sequence of str') + regex = re.compile(shellpattern.translate(glob or '*', match_end=match_end)) + archives = [x for x in self.values() if regex.match(x.name) is not None] + for sortkey in reversed(sort_by): + archives.sort(key=attrgetter(sortkey)) + if first: + archives = archives[:first] + elif last: + archives = archives[max(len(archives) - last, 0):] + if reverse: + archives.reverse() + return archives + + def list_considering(self, args): + """ + get a list of archives, considering --first/last/prefix/glob-archives/sort cmdline args + """ + if args.location.archive: + raise Error('The options --first, --last, --prefix and --glob-archives can only be used on repository targets.') + if args.prefix: + args.glob_archives = args.prefix + '*' + return self.list(sort_by=args.sort_by.split(','), glob=args.glob_archives, first=args.first, last=args.last) + + def set_raw_dict(self, d): + """set the dict we get from the msgpack unpacker""" + for k, v in d.items(): + assert isinstance(k, bytes) + assert isinstance(v, dict) and b'id' in v and b'time' in v + self._archives[k] = v + + def get_raw_dict(self): + """get the dict we can give to the msgpack packer""" + return self._archives + + +class Manifest: + + @enum.unique + class Operation(enum.Enum): + # The comments here only roughly describe the scope of each feature. In the end, additions need to be + # based on potential problems older clients could produce when accessing newer repositories and the + # tradeofs of locking version out or still allowing access. As all older versions and their exact + # behaviours are known when introducing new features sometimes this might not match the general descriptions + # below. + + # The READ operation describes which features are needed to safely list and extract the archives in the + # repository. + READ = 'read' + # The CHECK operation is for all operations that need either to understand every detail + # of the repository (for consistency checks and repairs) or are seldom used functions that just + # should use the most restrictive feature set because more fine grained compatibility tracking is + # not needed. + CHECK = 'check' + # The WRITE operation is for adding archives. Features here ensure that older clients don't add archives + # in an old format, or is used to lock out clients that for other reasons can no longer safely add new + # archives. + WRITE = 'write' + # The DELETE operation is for all operations (like archive deletion) that need a 100% correct reference + # count and the need to be able to find all (directly and indirectly) referenced chunks of a given archive. + DELETE = 'delete' + + NO_OPERATION_CHECK = tuple() + + SUPPORTED_REPO_FEATURES = frozenset([]) + + MANIFEST_ID = b'\0' * 32 + + def __init__(self, key, repository, item_keys=None): + self.archives = Archives() + self.config = {} + self.key = key + self.repository = repository + self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS + self.tam_verified = False + self.timestamp = None + + @property + def id_str(self): + return bin_to_hex(self.id) + + @property + def last_timestamp(self): + return parse_timestamp(self.timestamp, tzinfo=None) + + @classmethod + def load(cls, repository, operations, key=None, force_tam_not_required=False): + from ..item import ManifestItem + from ..crypto.key import key_factory, tam_required_file, tam_required + from ..repository import Repository + try: + cdata = repository.get(cls.MANIFEST_ID) + except Repository.ObjectNotFound: + raise NoManifestError + if not key: + key = key_factory(repository, cdata) + manifest = cls(key, repository) + data = key.decrypt(None, cdata) + manifest_dict, manifest.tam_verified = key.unpack_and_verify_manifest(data, force_tam_not_required=force_tam_not_required) + m = ManifestItem(internal_dict=manifest_dict) + manifest.id = key.id_hash(data) + if m.get('version') not in (1, 2): + raise ValueError('Invalid manifest version') + manifest.archives.set_raw_dict(m.archives) + manifest.timestamp = m.get('timestamp') + manifest.config = m.config + # valid item keys are whatever is known in the repo or every key we know + manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', [])) + + if manifest.tam_verified: + manifest_required = manifest.config.get(b'tam_required', False) + security_required = tam_required(repository) + if manifest_required and not security_required: + logger.debug('Manifest is TAM verified and says TAM is required, updating security database...') + file = tam_required_file(repository) + open(file, 'w').close() + if not manifest_required and security_required: + logger.debug('Manifest is TAM verified and says TAM is *not* required, updating security database...') + os.unlink(tam_required_file(repository)) + manifest.check_repository_compatibility(operations) + return manifest, key + + def check_repository_compatibility(self, operations): + for operation in operations: + assert isinstance(operation, self.Operation) + feature_flags = self.config.get(b'feature_flags', None) + if feature_flags is None: + return + if operation.value.encode() not in feature_flags: + continue + requirements = feature_flags[operation.value.encode()] + if b'mandatory' in requirements: + unsupported = set(requirements[b'mandatory']) - self.SUPPORTED_REPO_FEATURES + if unsupported: + raise MandatoryFeatureUnsupported([f.decode() for f in unsupported]) + + def get_all_mandatory_features(self): + result = {} + feature_flags = self.config.get(b'feature_flags', None) + if feature_flags is None: + return result + + for operation, requirements in feature_flags.items(): + if b'mandatory' in requirements: + result[operation.decode()] = set([feature.decode() for feature in requirements[b'mandatory']]) + return result + + def write(self): + from ..item import ManifestItem + if self.key.tam_required: + self.config[b'tam_required'] = True + # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly + if self.timestamp is None: + self.timestamp = datetime.utcnow().strftime(ISO_FORMAT) + else: + prev_ts = self.last_timestamp + incremented = (prev_ts + timedelta(microseconds=1)).strftime(ISO_FORMAT) + self.timestamp = max(incremented, datetime.utcnow().strftime(ISO_FORMAT)) + # include checks for limits as enforced by limited unpacker (used by load()) + assert len(self.archives) <= MAX_ARCHIVES + assert all(len(name) <= 255 for name in self.archives) + assert len(self.item_keys) <= 100 + manifest = ManifestItem( + version=1, + archives=StableDict(self.archives.get_raw_dict()), + timestamp=self.timestamp, + config=StableDict(self.config), + item_keys=tuple(sorted(self.item_keys)), + ) + self.tam_verified = True + data = self.key.pack_and_authenticate_metadata(manifest.as_dict()) + self.id = self.key.id_hash(data) + self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) diff --git a/src/borg/helpers/misc.py b/src/borg/helpers/misc.py new file mode 100644 index 00000000..e33b46f0 --- /dev/null +++ b/src/borg/helpers/misc.py @@ -0,0 +1,166 @@ +import logging +import io +import os +import os.path +import platform +import sys +from collections import deque +from datetime import datetime, timezone, timedelta +from itertools import islice +from operator import attrgetter + +from ..logger import create_logger +logger = create_logger() + +from .time import to_localtime +from .. import __version__ as borg_version +from .. import chunker + + +def prune_within(archives, hours): + target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600) + return [a for a in archives if a.ts > target] + + +def prune_split(archives, pattern, n, skip=[]): + last = None + keep = [] + if n == 0: + return keep + for a in sorted(archives, key=attrgetter('ts'), reverse=True): + period = to_localtime(a.ts).strftime(pattern) + if period != last: + last = period + if a not in skip: + keep.append(a) + if len(keep) == n: + break + return keep + + +def sysinfo(): + info = [] + info.append('Platform: %s' % (' '.join(platform.uname()), )) + if sys.platform.startswith('linux'): + info.append('Linux: %s %s %s' % platform.linux_distribution()) + info.append('Borg: %s Python: %s %s' % (borg_version, platform.python_implementation(), platform.python_version())) + info.append('PID: %d CWD: %s' % (os.getpid(), os.getcwd())) + info.append('sys.argv: %r' % sys.argv) + info.append('SSH_ORIGINAL_COMMAND: %r' % os.environ.get('SSH_ORIGINAL_COMMAND')) + info.append('') + return '\n'.join(info) + + +def log_multi(*msgs, level=logging.INFO, logger=logger): + """ + log multiple lines of text, each line by a separate logging call for cosmetic reasons + + each positional argument may be a single or multiple lines (separated by newlines) of text. + """ + lines = [] + for msg in msgs: + lines.extend(msg.splitlines()) + for line in lines: + logger.log(level, line) + + +class ChunkIteratorFileWrapper: + """File-like wrapper for chunk iterators""" + + def __init__(self, chunk_iterator, read_callback=None): + """ + *chunk_iterator* should be an iterator yielding bytes. These will be buffered + internally as necessary to satisfy .read() calls. + + *read_callback* will be called with one argument, some byte string that has + just been read and will be subsequently returned to a caller of .read(). + It can be used to update a progress display. + """ + self.chunk_iterator = chunk_iterator + self.chunk_offset = 0 + self.chunk = b'' + self.exhausted = False + self.read_callback = read_callback + + def _refill(self): + remaining = len(self.chunk) - self.chunk_offset + if not remaining: + try: + chunk = next(self.chunk_iterator) + self.chunk = memoryview(chunk) + except StopIteration: + self.exhausted = True + return 0 # EOF + self.chunk_offset = 0 + remaining = len(self.chunk) + return remaining + + def _read(self, nbytes): + if not nbytes: + return b'' + remaining = self._refill() + will_read = min(remaining, nbytes) + self.chunk_offset += will_read + return self.chunk[self.chunk_offset - will_read:self.chunk_offset] + + def read(self, nbytes): + parts = [] + while nbytes and not self.exhausted: + read_data = self._read(nbytes) + nbytes -= len(read_data) + parts.append(read_data) + if self.read_callback: + self.read_callback(read_data) + return b''.join(parts) + + +def open_item(archive, item): + """Return file-like object for archived item (with chunks).""" + chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks]) + return ChunkIteratorFileWrapper(chunk_iterator) + + +def chunkit(it, size): + """ + Chunk an iterator into pieces of . + + >>> list(chunker('ABCDEFG', 3)) + [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']] + """ + iterable = iter(it) + return iter(lambda: list(islice(iterable, size)), []) + + +def consume(iterator, n=None): + """Advance the iterator n-steps ahead. If n is none, consume entirely.""" + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +class ErrorIgnoringTextIOWrapper(io.TextIOWrapper): + def read(self, n): + if not self.closed: + try: + return super().read(n) + except BrokenPipeError: + try: + super().close() + except OSError: + pass + return '' + + def write(self, s): + if not self.closed: + try: + return super().write(s) + except BrokenPipeError: + try: + super().close() + except OSError: + pass + return len(s) diff --git a/src/borg/helpers/msgpack.py b/src/borg/helpers/msgpack.py new file mode 100644 index 00000000..72bc1f45 --- /dev/null +++ b/src/borg/helpers/msgpack.py @@ -0,0 +1,64 @@ +import msgpack +import msgpack.fallback + +from .datastruct import StableDict +from ..constants import * # NOQA + + +def is_slow_msgpack(): + return msgpack.Packer is msgpack.fallback.Packer + + +def get_limited_unpacker(kind): + """return a limited Unpacker because we should not trust msgpack data received from remote""" + args = dict(use_list=False, # return tuples, not lists + max_bin_len=0, # not used + max_ext_len=0, # not used + max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE), + max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object + ) + if kind == 'server': + args.update(dict(max_array_len=100, # misc. cmd tuples + max_map_len=100, # misc. cmd dicts + )) + elif kind == 'client': + args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan() + max_map_len=100, # misc. result dicts + )) + elif kind == 'manifest': + args.update(dict(use_list=True, # default value + max_array_len=100, # ITEM_KEYS ~= 22 + max_map_len=MAX_ARCHIVES, # list of archives + max_str_len=255, # archive name + object_hook=StableDict, + unicode_errors='surrogateescape', + )) + elif kind == 'key': + args.update(dict(use_list=True, # default value + max_array_len=0, # not used + max_map_len=10, # EncryptedKey dict + max_str_len=4000, # inner key data + object_hook=StableDict, + unicode_errors='surrogateescape', + )) + else: + raise ValueError('kind must be "server", "client", "manifest" or "key"') + return msgpack.Unpacker(**args) + + +def bigint_to_int(mtime): + """Convert bytearray to int + """ + if isinstance(mtime, bytes): + return int.from_bytes(mtime, 'little', signed=True) + return mtime + + +def int_to_bigint(value): + """Convert integers larger than 64 bits to bytearray + + Smaller integers are left alone + """ + if value.bit_length() > 63: + return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) + return value diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py new file mode 100644 index 00000000..71cc44cb --- /dev/null +++ b/src/borg/helpers/parseformat.py @@ -0,0 +1,1006 @@ +import argparse +import hashlib +import json +import os +import sys +import os.path +import re +import socket +import stat +import uuid +from binascii import hexlify +from collections import Counter, OrderedDict +from datetime import datetime, timezone +from functools import partial +from string import Formatter + +from ..logger import create_logger +logger = create_logger() + +from .errors import Error +from .fs import get_keys_dir +from .time import OutputTimestamp, format_time, to_localtime, safe_timestamp, safe_s +from .usergroup import uid2user +from .. import __version__ as borg_version +from .. import __version_tuple__ as borg_version_tuple +from ..constants import * # NOQA + +if sys.platform == 'win32': + import posixpath + + +def bin_to_hex(binary): + return hexlify(binary).decode('ascii') + + +def safe_decode(s, coding='utf-8', errors='surrogateescape'): + """decode bytes to str, with round-tripping "invalid" bytes""" + if s is None: + return None + return s.decode(coding, errors) + + +def safe_encode(s, coding='utf-8', errors='surrogateescape'): + """encode str to bytes, with round-tripping "invalid" bytes""" + if s is None: + return None + return s.encode(coding, errors) + + +def remove_surrogates(s, errors='replace'): + """Replace surrogates generated by fsdecode with '?'""" + return s.encode('utf-8', errors).decode('utf-8') + + +def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'): + for key in keys: + if isinstance(d.get(key), bytes): + d[key] = d[key].decode(encoding, errors) + return d + + +def positive_int_validator(value): + """argparse type for positive integers""" + int_value = int(value) + if int_value <= 0: + raise argparse.ArgumentTypeError('A positive integer is required: %s' % value) + return int_value + + +def interval(s): + """Convert a string representing a valid interval to a number of hours.""" + multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365} + + if s.endswith(tuple(multiplier.keys())): + number = s[:-1] + suffix = s[-1] + else: + # range suffixes in ascending multiplier order + ranges = [k for k, v in sorted(multiplier.items(), key=lambda t: t[1])] + raise argparse.ArgumentTypeError( + 'Unexpected interval time unit "%s": expected one of %r' % (s[-1], ranges)) + + try: + hours = int(number) * multiplier[suffix] + except ValueError: + hours = -1 + + if hours <= 0: + raise argparse.ArgumentTypeError( + 'Unexpected interval number "%s": expected an integer greater than 0' % number) + + return hours + + +def timestamp(s): + """Convert a --timestamp=s argument to a datetime object""" + try: + # is it pointing to a file / directory? + ts = safe_s(os.stat(s).st_mtime) + return datetime.utcfromtimestamp(ts) + except OSError: + # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support. + for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', + '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', + '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M', + '%Y-%m-%d', '%Y-%j', + ): + try: + return datetime.strptime(s, format) + except ValueError: + continue + raise ValueError + + +def ChunkerParams(s): + if s.strip().lower() == "default": + return CHUNKER_PARAMS + chunk_min, chunk_max, chunk_mask, window_size = s.split(',') + if int(chunk_max) > 23: + raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)') + return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size) + + +def FilesCacheMode(s): + ENTRIES_MAP = dict(ctime='c', mtime='m', size='s', inode='i', rechunk='r', disabled='d') + VALID_MODES = ('cis', 'ims', 'cs', 'ms', 'cr', 'mr', 'd') # letters in alpha order + entries = set(s.strip().split(',')) + if not entries <= set(ENTRIES_MAP): + raise ValueError('cache mode must be a comma-separated list of: %s' % ','.join(sorted(ENTRIES_MAP))) + short_entries = {ENTRIES_MAP[entry] for entry in entries} + mode = ''.join(sorted(short_entries)) + if mode not in VALID_MODES: + raise ValueError('cache mode short must be one of: %s' % ','.join(VALID_MODES)) + return mode + + +assert FilesCacheMode(DEFAULT_FILES_CACHE_MODE_UI) == DEFAULT_FILES_CACHE_MODE # keep these 2 values in sync! + + +def partial_format(format, mapping): + """ + Apply format.format_map(mapping) while preserving unknown keys + + Does not support attribute access, indexing and ![rsa] conversions + """ + for key, value in mapping.items(): + key = re.escape(key) + format = re.sub(r'(? 0 else '' + + for unit in units[:-1]: + if abs(round(num, precision)) < power: + if isinstance(num, int): + return "{}{}{}{}{}".format(prefix, num, sep, unit, suffix) + else: + return "{}{:3.{}f}{}{}{}".format(prefix, num, precision, sep, unit, suffix) + num /= float(power) + return "{}{:.{}f}{}{}{}".format(prefix, num, precision, sep, units[-1], suffix) + + +def sizeof_fmt_iec(num, suffix='B', sep='', precision=2, sign=False): + return sizeof_fmt(num, suffix=suffix, sep=sep, precision=precision, sign=sign, + units=['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'], power=1024) + + +def sizeof_fmt_decimal(num, suffix='B', sep='', precision=2, sign=False): + return sizeof_fmt(num, suffix=suffix, sep=sep, precision=precision, sign=sign, + units=['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'], power=1000) + + +def format_archive(archive): + return '%-36s %s [%s]' % ( + archive.name, + format_time(to_localtime(archive.ts)), + bin_to_hex(archive.id), + ) + + +def parse_stringified_list(s): + l = re.split(" *, *", s) + return [item for item in l if item != ''] + + +class Location: + """Object representing a repository / archive location + """ + proto = user = _host = port = path = archive = None + + # user must not contain "@", ":" or "/". + # Quoting adduser error message: + # "To avoid problems, the username should consist only of letters, digits, + # underscores, periods, at signs and dashes, and not start with a dash + # (as defined by IEEE Std 1003.1-2001)." + # We use "@" as separator between username and hostname, so we must + # disallow it within the pure username part. + optional_user_re = r""" + (?:(?P[^@:/]+)@)? + """ + + # path must not contain :: (it ends at :: or string end), but may contain single colons. + # to avoid ambiguities with other regexes, it must also not start with ":" nor with "//" nor with "ssh://". + scp_path_re = r""" + (?!(:|//|ssh://)) # not starting with ":" or // or ssh:// + (?P([^:]|(:(?!:)))+) # any chars, but no "::" + """ + + # file_path must not contain :: (it ends at :: or string end), but may contain single colons. + # it must start with a / and that slash is part of the path. + file_path_re = r""" + (?P(([^/]*)/([^:]|(:(?!:)))+)) # start opt. servername, then /, then any chars, but no "::" + """ + + # abs_path must not contain :: (it ends at :: or string end), but may contain single colons. + # it must start with a / and that slash is part of the path. + abs_path_re = r""" + (?P(/([^:]|(:(?!:)))+)) # start with /, then any chars, but no "::" + """ + + # optional ::archive_name at the end, archive name must not contain "/". + # borg mount's FUSE filesystem creates one level of directories from + # the archive names and of course "/" is not valid in a directory name. + optional_archive_re = r""" + (?: + :: # "::" as separator + (?P[^/]+) # archive name must not contain "/" + )?$""" # must match until the end + + # regexes for misc. kinds of supported location specifiers: + ssh_re = re.compile(r""" + (?Pssh):// # ssh:// + """ + optional_user_re + r""" # user@ (optional) + (?P([^:/]+|\[[0-9a-fA-F:.]+\]))(?::(?P\d+))? # host or host:port or [ipv6] or [ipv6]:port + """ + abs_path_re + optional_archive_re, re.VERBOSE) # path or path::archive + + file_re = re.compile(r""" + (?Pfile):// # file:// + """ + file_path_re + optional_archive_re, re.VERBOSE) # servername/path, path or path::archive + + # note: scp_re is also use for local paths + scp_re = re.compile(r""" + ( + """ + optional_user_re + r""" # user@ (optional) + (?P([^:/]+|\[[0-9a-fA-F:.]+\])): # host: (don't match / or [ipv6] in host to disambiguate from file:) + )? # user@host: part is optional + """ + scp_path_re + optional_archive_re, re.VERBOSE) # path with optional archive + + # get the repo from BORG_REPO env and the optional archive from param. + # if the syntax requires giving REPOSITORY (see "borg mount"), + # use "::" to let it use the env var. + # if REPOSITORY argument is optional, it'll automatically use the env. + env_re = re.compile(r""" # the repo part is fetched from BORG_REPO + (?:::$) # just "::" is ok (when a pos. arg is required, no archive) + | # or + """ + optional_archive_re, re.VERBOSE) # archive name (optional, may be empty) + + win_file_re = re.compile(r'(?:file://)?(?P(?:[a-zA-Z]:[\\/])?(?:[^:]*))' + optional_archive_re, re.VERBOSE) + + def __init__(self, text=''): + self.orig = text + if not self.parse(self.orig): + raise ValueError('Location: parse failed: %s' % self.orig) + + def parse(self, text): + text = replace_placeholders(text) + valid = self._parse(text) + if valid: + return True + m = self.env_re.match(text) + if not m: + return False + repo = os.environ.get('BORG_REPO') + if repo is None: + return False + valid = self._parse(repo) + if not valid: + return False + self.archive = m.group('archive') + return True + + def _parse(self, text): + def normpath_special(p): + # avoid that normpath strips away our relative path hack and even makes p absolute + relative = p.startswith('/./') + if sys.platform != 'win32': + p = os.path.normpath(p) + else: + p = posixpath.normpath(p) + return ('/.' + p) if relative else p + if sys.platform != 'win32': + m = self.ssh_re.match(text) + if m: + self.proto = m.group('proto') + self.user = m.group('user') + self._host = m.group('host') + self.port = m.group('port') and int(m.group('port')) or None + self.path = normpath_special(m.group('path')) + self.archive = m.group('archive') + return True + m = self.file_re.match(text) + if m: + self.proto = m.group('proto') + self.path = normpath_special(m.group('path')) + self.archive = m.group('archive') + return True + m = self.scp_re.match(text) + if m: + self.user = m.group('user') + self._host = m.group('host') + self.path = normpath_special(m.group('path')) + self.archive = m.group('archive') + self.proto = self._host and 'ssh' or 'file' + return True + else: + m = self.win_file_re.match(text) + if m: + self.proto = 'file' + self.path = os.path.normpath(m.group('path').replace('/', '\\')) + self.archive = m.group('archive') + return True + m = self.ssh_re.match(text) + if m: + self.proto = m.group('proto') + self.user = m.group('user') + self._host = m.group('host') + self.port = m.group('port') and int(m.group('port')) or None + self.path = normpath_special(m.group('path')) + self.archive = m.group('archive') + return True + + return False + + def __str__(self): + items = [ + 'proto=%r' % self.proto, + 'user=%r' % self.user, + 'host=%r' % self.host, + 'port=%r' % self.port, + 'path=%r' % self.path, + 'archive=%r' % self.archive, + ] + return ', '.join(items) + + def to_key_filename(self): + name = re.sub('[^\w]', '_', self.path).strip('_') + if self.proto != 'file': + name = re.sub('[^\w]', '_', self.host) + '__' + name + if len(name) > 100: + # Limit file names to some reasonable length. Most file systems + # limit them to 255 [unit of choice]; due to variations in unicode + # handling we truncate to 100 *characters*. + name = name[:100] + return os.path.join(get_keys_dir(), name) + + def __repr__(self): + return "Location(%s)" % self + + @property + def host(self): + # strip square brackets used for IPv6 addrs + if self._host is not None: + return self._host.lstrip('[').rstrip(']') + + def canonical_path(self): + if self.proto == 'file': + return self.path + else: + if self.path and self.path.startswith('~'): + path = '/' + self.path # /~/x = path x relative to home dir + elif self.path and not self.path.startswith('/'): + path = '/./' + self.path # /./x = path x relative to cwd + else: + path = self.path + return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '', + self._host, # needed for ipv6 addrs + ':{}'.format(self.port) if self.port else '', + path) + + +def location_validator(archive=None): + def validator(text): + try: + loc = Location(text) + except ValueError: + raise argparse.ArgumentTypeError('Invalid location format: "%s"' % text) from None + if archive is True and not loc.archive: + raise argparse.ArgumentTypeError('"%s": No archive specified' % text) + elif archive is False and loc.archive: + raise argparse.ArgumentTypeError('"%s" No archive can be specified' % text) + return loc + return validator + + +def archivename_validator(): + def validator(text): + if '/' in text or '::' in text or not text: + raise argparse.ArgumentTypeError('Invalid repository name: "%s"' % text) + return text + return validator + + +class BaseFormatter: + FIXED_KEYS = { + # Formatting aids + 'LF': '\n', + 'SPACE': ' ', + 'TAB': '\t', + 'CR': '\r', + 'NUL': '\0', + 'NEWLINE': os.linesep, + 'NL': os.linesep, + } + + def get_item_data(self, item): + raise NotImplementedError + + def format_item(self, item): + return self.format.format_map(self.get_item_data(item)) + + @staticmethod + def keys_help(): + return "- NEWLINE: OS dependent line separator\n" \ + "- NL: alias of NEWLINE\n" \ + "- NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath\n" \ + "- SPACE\n" \ + "- TAB\n" \ + "- CR\n" \ + "- LF" + + +class ArchiveFormatter(BaseFormatter): + KEY_DESCRIPTIONS = { + 'name': 'archive name interpreted as text (might be missing non-text characters, see barchive)', + 'archive': 'archive name interpreted as text (might be missing non-text characters, see barchive)', + 'barchive': 'verbatim archive name, can contain any character except NUL', + 'comment': 'archive comment interpreted as text (might be missing non-text characters, see bcomment)', + 'bcomment': 'verbatim archive comment, can contain any character except NUL', + 'time': 'time (start) of creation of the archive', + # *start* is the key used by borg-info for this timestamp, this makes the formats more compatible + 'start': 'time (start) of creation of the archive', + 'end': 'time (end) of creation of the archive', + 'id': 'internal ID of the archive', + } + KEY_GROUPS = ( + ('name', 'archive', 'barchive', 'comment', 'bcomment', 'id'), + ('time', 'start', 'end'), + ) + + @classmethod + def available_keys(cls): + from .manifest import ArchiveInfo + fake_archive_info = ArchiveInfo('archivename', b'\1'*32, datetime(1970, 1, 1, tzinfo=timezone.utc)) + formatter = cls('', None, None, None) + keys = [] + keys.extend(formatter.call_keys.keys()) + keys.extend(formatter.get_item_data(fake_archive_info).keys()) + return keys + + @classmethod + def keys_help(cls): + help = [] + keys = cls.available_keys() + for key in cls.FIXED_KEYS: + keys.remove(key) + + for group in cls.KEY_GROUPS: + for key in group: + keys.remove(key) + text = "- " + key + if key in cls.KEY_DESCRIPTIONS: + text += ": " + cls.KEY_DESCRIPTIONS[key] + help.append(text) + help.append("") + assert not keys, str(keys) + return "\n".join(help) + + def __init__(self, format, repository, manifest, key, *, json=False): + self.repository = repository + self.manifest = manifest + self.key = key + self.name = None + self.id = None + self._archive = None + self.json = json + static_keys = {} # here could be stuff on repo level, above archive level + static_keys.update(self.FIXED_KEYS) + self.format = partial_format(format, static_keys) + self.format_keys = {f[1] for f in Formatter().parse(format)} + self.call_keys = { + 'comment': partial(self.get_comment, rs=True), + 'bcomment': partial(self.get_comment, rs=False), + 'end': self.get_ts_end, + } + self.used_call_keys = set(self.call_keys) & self.format_keys + if self.json: + self.item_data = {} + self.format_item = self.format_item_json + else: + self.item_data = static_keys + + def format_item_json(self, item): + return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + '\n' + + def get_item_data(self, archive_info): + self.name = archive_info.name + self.id = archive_info.id + item_data = {} + item_data.update(self.item_data) + item_data.update({ + 'name': remove_surrogates(archive_info.name), + 'archive': remove_surrogates(archive_info.name), + 'barchive': archive_info.name, + 'id': bin_to_hex(archive_info.id), + 'time': self.format_time(archive_info.ts), + 'start': self.format_time(archive_info.ts), + }) + for key in self.used_call_keys: + item_data[key] = self.call_keys[key]() + return item_data + + @property + def archive(self): + """lazy load / update loaded archive""" + if self._archive is None or self._archive.id != self.id: + from ..archive import Archive + self._archive = Archive(self.repository, self.key, self.manifest, self.name) + return self._archive + + def get_comment(self, rs): + return remove_surrogates(self.archive.comment) if rs else self.archive.comment + + def get_ts_end(self): + return self.format_time(self.archive.ts_end) + + def format_time(self, ts): + return OutputTimestamp(ts) + + +class ItemFormatter(BaseFormatter): + KEY_DESCRIPTIONS = { + 'bpath': 'verbatim POSIX path, can contain any character except NUL', + 'path': 'path interpreted as text (might be missing non-text characters, see bpath)', + 'source': 'link target for links (identical to linktarget)', + 'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links', + 'csize': 'compressed size', + 'dsize': 'deduplicated size', + 'dcsize': 'deduplicated compressed size', + 'num_chunks': 'number of chunks in this file', + 'unique_chunks': 'number of unique chunks in this file', + 'health': 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)', + } + KEY_GROUPS = ( + ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'), + ('size', 'csize', 'dsize', 'dcsize', 'num_chunks', 'unique_chunks'), + ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'), + tuple(sorted(hashlib.algorithms_guaranteed)), + ('archiveid', 'archivename', 'extra'), + ('health', ) + ) + + KEYS_REQUIRING_CACHE = ( + 'dsize', 'dcsize', 'unique_chunks', + ) + + @classmethod + def available_keys(cls): + class FakeArchive: + fpr = name = "" + + from ..item import Item + fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0) + formatter = cls(FakeArchive, "") + keys = [] + keys.extend(formatter.call_keys.keys()) + keys.extend(formatter.get_item_data(fake_item).keys()) + return keys + + @classmethod + def keys_help(cls): + help = [] + keys = cls.available_keys() + for key in cls.FIXED_KEYS: + keys.remove(key) + + for group in cls.KEY_GROUPS: + for key in group: + keys.remove(key) + text = "- " + key + if key in cls.KEY_DESCRIPTIONS: + text += ": " + cls.KEY_DESCRIPTIONS[key] + help.append(text) + help.append("") + assert not keys, str(keys) + return "\n".join(help) + + @classmethod + def format_needs_cache(cls, format): + format_keys = {f[1] for f in Formatter().parse(format)} + return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys) + + def __init__(self, archive, format, *, json_lines=False): + self.archive = archive + self.json_lines = json_lines + static_keys = { + 'archivename': archive.name, + 'archiveid': archive.fpr, + } + static_keys.update(self.FIXED_KEYS) + if self.json_lines: + self.item_data = {} + self.format_item = self.format_item_json + else: + self.item_data = static_keys + self.format = partial_format(format, static_keys) + self.format_keys = {f[1] for f in Formatter().parse(format)} + self.call_keys = { + 'size': self.calculate_size, + 'csize': self.calculate_csize, + 'dsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size), + 'dcsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.csize), + 'num_chunks': self.calculate_num_chunks, + 'unique_chunks': partial(self.sum_unique_chunks_metadata, lambda chunk: 1), + 'isomtime': partial(self.format_iso_time, 'mtime'), + 'isoctime': partial(self.format_iso_time, 'ctime'), + 'isoatime': partial(self.format_iso_time, 'atime'), + 'mtime': partial(self.format_time, 'mtime'), + 'ctime': partial(self.format_time, 'ctime'), + 'atime': partial(self.format_time, 'atime'), + } + for hash_function in hashlib.algorithms_guaranteed: + self.add_key(hash_function, partial(self.hash_item, hash_function)) + self.used_call_keys = set(self.call_keys) & self.format_keys + + def format_item_json(self, item): + return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + '\n' + + def add_key(self, key, callable_with_item): + self.call_keys[key] = callable_with_item + self.used_call_keys = set(self.call_keys) & self.format_keys + + def get_item_data(self, item): + item_data = {} + item_data.update(self.item_data) + mode = stat.filemode(item.mode) + item_type = mode[0] + + source = item.get('source', '') + extra = '' + if source: + source = remove_surrogates(source) + if item_type == 'l': + extra = ' -> %s' % source + else: + mode = 'h' + mode[1:] + extra = ' link to %s' % source + item_data['type'] = item_type + item_data['mode'] = mode + item_data['user'] = item.user or item.uid + item_data['group'] = item.group or item.gid + item_data['uid'] = item.uid + item_data['gid'] = item.gid + item_data['path'] = remove_surrogates(item.path) + if self.json_lines: + item_data['healthy'] = 'chunks_healthy' not in item + else: + item_data['bpath'] = item.path + item_data['extra'] = extra + item_data['health'] = 'broken' if 'chunks_healthy' in item else 'healthy' + item_data['source'] = source + item_data['linktarget'] = source + item_data['flags'] = item.get('bsdflags') + for key in self.used_call_keys: + item_data[key] = self.call_keys[key](item) + return item_data + + def sum_unique_chunks_metadata(self, metadata_func, item): + """ + sum unique chunks metadata, a unique chunk is a chunk which is referenced globally as often as it is in the + item + + item: The item to sum its unique chunks' metadata + metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return + the metadata needed from the chunk + """ + chunk_index = self.archive.cache.chunks + chunks = item.get('chunks', []) + chunks_counter = Counter(c.id for c in chunks) + return sum(metadata_func(c) for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id]) + + def calculate_num_chunks(self, item): + return len(item.get('chunks', [])) + + def calculate_size(self, item): + # note: does not support hardlink slaves, they will be size 0 + return item.get_size(compressed=False) + + def calculate_csize(self, item): + # note: does not support hardlink slaves, they will be csize 0 + return item.get_size(compressed=True) + + def hash_item(self, hash_function, item): + if 'chunks' not in item: + return "" + hash = hashlib.new(hash_function) + for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]): + hash.update(data) + return hash.hexdigest() + + def format_time(self, key, item): + return OutputTimestamp(safe_timestamp(item.get(key) or item.mtime)) + + def format_iso_time(self, key, item): + return self.format_time(key, item).isoformat() + + +def file_status(mode): + if stat.S_ISREG(mode): + return 'A' + elif stat.S_ISDIR(mode): + return 'd' + elif stat.S_ISBLK(mode): + return 'b' + elif stat.S_ISCHR(mode): + return 'c' + elif stat.S_ISLNK(mode): + return 's' + elif stat.S_ISFIFO(mode): + return 'f' + return '?' + + +def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True): + """ + clean lines (usually read from a config file): + + 1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments. + + note: only "pure comment lines" are supported, no support for "trailing comments". + + :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines + :param lstrip: lstrip call arguments or False, if lstripping is not desired + :param rstrip: rstrip call arguments or False, if rstripping is not desired + :param remove_comments: remove comment lines (lines starting with "#") + :param remove_empty: remove empty lines + :return: yields processed lines + """ + for line in lines: + if lstrip is not False: + line = line.lstrip(lstrip) + if rstrip is not False: + line = line.rstrip(rstrip) + if remove_empty and not line: + continue + if remove_comments and line.startswith('#'): + continue + yield line + + +def swidth_slice(string, max_width): + """ + Return a slice of *max_width* cells from *string*. + + Negative *max_width* means from the end of string. + + *max_width* is in units of character cells (or "columns"). + Latin characters are usually one cell wide, many CJK characters are two cells wide. + """ + from ..platform import swidth + reverse = max_width < 0 + max_width = abs(max_width) + if reverse: + string = reversed(string) + current_swidth = 0 + result = [] + for character in string: + current_swidth += swidth(character) + if current_swidth > max_width: + break + result.append(character) + if reverse: + result.reverse() + return ''.join(result) + + +def ellipsis_truncate(msg, space): + """ + shorten a long string by adding ellipsis between it and return it, example: + this_is_a_very_long_string -------> this_is..._string + """ + from ..platform import swidth + ellipsis_width = swidth('...') + msg_width = swidth(msg) + if space < 8: + # if there is very little space, just show ... + return '...' + ' ' * (space - ellipsis_width) + if space < ellipsis_width + msg_width: + return '%s...%s' % (swidth_slice(msg, space // 2 - ellipsis_width), + swidth_slice(msg, -space // 2)) + return msg + ' ' * (space - msg_width) + + +class BorgJsonEncoder(json.JSONEncoder): + def default(self, o): + from ..repository import Repository + from ..remote import RemoteRepository + from ..archive import Archive + from ..cache import LocalCache, AdHocCache + if isinstance(o, Repository) or isinstance(o, RemoteRepository): + return { + 'id': bin_to_hex(o.id), + 'location': o._location.canonical_path(), + } + if isinstance(o, Archive): + return o.info() + if isinstance(o, LocalCache): + return { + 'path': o.path, + 'stats': o.stats(), + } + if isinstance(o, AdHocCache): + return { + 'stats': o.stats(), + } + if callable(getattr(o, 'to_json', None)): + return o.to_json() + return super().default(o) + + +def basic_json_data(manifest, *, cache=None, extra=None): + key = manifest.key + data = extra or {} + data.update({ + 'repository': BorgJsonEncoder().default(manifest.repository), + 'encryption': { + 'mode': key.ARG_NAME, + }, + }) + data['repository']['last_modified'] = OutputTimestamp(manifest.last_timestamp.replace(tzinfo=timezone.utc)) + if key.NAME.startswith('key file'): + data['encryption']['keyfile'] = key.find_key() + if cache: + data['cache'] = cache + return data + + +def json_dump(obj): + """Dump using BorgJSONEncoder.""" + return json.dumps(obj, sort_keys=True, indent=4, cls=BorgJsonEncoder) + + +def json_print(obj): + print(json_dump(obj)) + + +def prepare_dump_dict(d): + def decode_bytes(value): + # this should somehow be reversible later, but usual strings should + # look nice and chunk ids should mostly show in hex. Use a special + # inband signaling character (ASCII DEL) to distinguish between + # decoded and hex mode. + if not value.startswith(b'\x7f'): + try: + value = value.decode() + return value + except UnicodeDecodeError: + pass + return '\u007f' + bin_to_hex(value) + + def decode_tuple(t): + res = [] + for value in t: + if isinstance(value, dict): + value = decode(value) + elif isinstance(value, tuple) or isinstance(value, list): + value = decode_tuple(value) + elif isinstance(value, bytes): + value = decode_bytes(value) + res.append(value) + return res + + def decode(d): + res = OrderedDict() + for key, value in d.items(): + if isinstance(value, dict): + value = decode(value) + elif isinstance(value, (tuple, list)): + value = decode_tuple(value) + elif isinstance(value, bytes): + value = decode_bytes(value) + if isinstance(key, bytes): + key = key.decode() + res[key] = value + return res + + return decode(d) diff --git a/src/borg/helpers/process.py b/src/borg/helpers/process.py new file mode 100644 index 00000000..2b2026a7 --- /dev/null +++ b/src/borg/helpers/process.py @@ -0,0 +1,159 @@ +import contextlib +import os +import os.path +import re +import shlex +import signal +import subprocess +import sys + +from .. import __version__ + +from ..logger import create_logger +logger = create_logger() + + +def daemonize(): + """Detach process from controlling terminal and run in background + + Returns: old and new get_process_id tuples + """ + from ..platform import get_process_id + old_id = get_process_id() + pid = os.fork() + if pid: + os._exit(0) + os.setsid() + pid = os.fork() + if pid: + os._exit(0) + os.chdir('/') + os.close(0) + os.close(1) + os.close(2) + fd = os.open(os.devnull, os.O_RDWR) + os.dup2(fd, 0) + os.dup2(fd, 1) + os.dup2(fd, 2) + new_id = get_process_id() + return old_id, new_id + + +class SignalException(BaseException): + """base class for all signal-based exceptions""" + + +class SigHup(SignalException): + """raised on SIGHUP signal""" + + +class SigTerm(SignalException): + """raised on SIGTERM signal""" + + +@contextlib.contextmanager +def signal_handler(sig, handler): + """ + when entering context, set up signal handler for signal . + when leaving context, restore original signal handler. + + can bei either a str when giving a signal.SIGXXX attribute name (it + won't crash if the attribute name does not exist as some names are platform + specific) or a int, when giving a signal number. + + is any handler value as accepted by the signal.signal(sig, handler). + """ + if isinstance(sig, str): + sig = getattr(signal, sig, None) + if sig is not None: + orig_handler = signal.signal(sig, handler) + try: + yield + finally: + if sig is not None: + signal.signal(sig, orig_handler) + + +def raising_signal_handler(exc_cls): + def handler(sig_no, frame): + # setting SIG_IGN avoids that an incoming second signal of this + # kind would raise a 2nd exception while we still process the + # exception handler for exc_cls for the 1st signal. + signal.signal(sig_no, signal.SIG_IGN) + raise exc_cls + + return handler + + +def popen_with_error_handling(cmd_line: str, log_prefix='', **kwargs): + """ + Handle typical errors raised by subprocess.Popen. Return None if an error occurred, + otherwise return the Popen object. + + *cmd_line* is split using shlex (e.g. 'gzip -9' => ['gzip', '-9']). + + Log messages will be prefixed with *log_prefix*; if set, it should end with a space + (e.g. log_prefix='--some-option: '). + + Does not change the exit code. + """ + assert not kwargs.get('shell'), 'Sorry pal, shell mode is a no-no' + try: + command = shlex.split(cmd_line) + if not command: + raise ValueError('an empty command line is not permitted') + except ValueError as ve: + logger.error('%s%s', log_prefix, ve) + return + logger.debug('%scommand line: %s', log_prefix, command) + try: + return subprocess.Popen(command, **kwargs) + except FileNotFoundError: + logger.error('%sexecutable not found: %s', log_prefix, command[0]) + return + except PermissionError: + logger.error('%spermission denied: %s', log_prefix, command[0]) + return + + +def is_terminal(fd=sys.stdout): + return hasattr(fd, 'isatty') and fd.isatty() and (sys.platform != 'win32' or 'ANSICON' in os.environ) + + +def prepare_subprocess_env(system, env=None): + """ + Prepare the environment for a subprocess we are going to create. + + :param system: True for preparing to invoke system-installed binaries, + False for stuff inside the pyinstaller environment (like borg, python). + :param env: optionally give a environment dict here. if not given, default to os.environ. + :return: a modified copy of the environment + """ + env = dict(env if env is not None else os.environ) + if system: + # a pyinstaller binary's bootloader modifies LD_LIBRARY_PATH=/tmp/_MEIXXXXXX, + # but we do not want that system binaries (like ssh or other) pick up + # (non-matching) libraries from there. + # thus we install the original LDLP, before pyinstaller has modified it: + lp_key = 'LD_LIBRARY_PATH' + lp_orig = env.get(lp_key + '_ORIG') # pyinstaller >= 20160820 / v3.2.1 has this + if lp_orig is not None: + env[lp_key] = lp_orig + else: + # We get here in 2 cases: + # 1. when not running a pyinstaller-made binary. + # in this case, we must not kill LDLP. + # 2. when running a pyinstaller-made binary and there was no LDLP + # in the original env (in this case, the pyinstaller bootloader + # does *not* put ..._ORIG into the env either). + # in this case, we must kill LDLP. + # The directory used by pyinstaller is created by mkdtemp("_MEIXXXXXX"), + # we can use that to differentiate between the cases. + lp = env.get(lp_key) + if lp is not None and re.search(r'/_MEI......', lp): + env.pop(lp_key) + # security: do not give secrets to subprocess + env.pop('BORG_PASSPHRASE', None) + # for information, give borg version to the subprocess + env['BORG_VERSION'] = __version__ + return env diff --git a/src/borg/helpers/progress.py b/src/borg/helpers/progress.py new file mode 100644 index 00000000..1968c355 --- /dev/null +++ b/src/borg/helpers/progress.py @@ -0,0 +1,199 @@ +import logging +import json +import sys +import time +from shutil import get_terminal_size + +from ..logger import create_logger +logger = create_logger() + +from .parseformat import ellipsis_truncate + + +def justify_to_terminal_size(message): + terminal_space = get_terminal_size(fallback=(-1, -1))[0] + # justify only if we are outputting to a terminal + if terminal_space != -1: + return message.ljust(terminal_space) + return message + + +class ProgressIndicatorBase: + LOGGER = 'borg.output.progress' + JSON_TYPE = None + json = False + + operation_id_counter = 0 + + @classmethod + def operation_id(cls): + """Unique number, can be used by receiving applications to distinguish different operations.""" + cls.operation_id_counter += 1 + return cls.operation_id_counter + + def __init__(self, msgid=None): + self.handler = None + self.logger = logging.getLogger(self.LOGGER) + self.id = self.operation_id() + self.msgid = msgid + + # If there are no handlers, set one up explicitly because the + # terminator and propagation needs to be set. If there are, + # they must have been set up by BORG_LOGGING_CONF: skip setup. + if not self.logger.handlers: + self.handler = logging.StreamHandler(stream=sys.stderr) + self.handler.setLevel(logging.INFO) + logger = logging.getLogger('borg') + # Some special attributes on the borg logger, created by setup_logging + # But also be able to work without that + try: + formatter = logger.formatter + terminator = '\n' if logger.json else '\r' + self.json = logger.json + except AttributeError: + terminator = '\r' + else: + self.handler.setFormatter(formatter) + self.handler.terminator = terminator + + self.logger.addHandler(self.handler) + if self.logger.level == logging.NOTSET: + self.logger.setLevel(logging.WARN) + self.logger.propagate = False + + # If --progress is not set then the progress logger level will be WARN + # due to setup_implied_logging (it may be NOTSET with a logging config file, + # but the interactions there are generally unclear), so self.emit becomes + # False, which is correct. + # If --progress is set then the level will be INFO as per setup_implied_logging; + # note that this is always the case for serve processes due to a "args.progress |= is_serve". + # In this case self.emit is True. + self.emit = self.logger.getEffectiveLevel() == logging.INFO + + def __del__(self): + if self.handler is not None: + self.logger.removeHandler(self.handler) + self.handler.close() + + def output_json(self, *, finished=False, **kwargs): + assert self.json + if not self.emit: + return + kwargs.update(dict( + operation=self.id, + msgid=self.msgid, + type=self.JSON_TYPE, + finished=finished, + time=time.time(), + )) + print(json.dumps(kwargs), file=sys.stderr, flush=True) + + def finish(self): + if self.json: + self.output_json(finished=True) + else: + self.output('') + + +class ProgressIndicatorMessage(ProgressIndicatorBase): + JSON_TYPE = 'progress_message' + + def output(self, msg): + if self.json: + self.output_json(message=msg) + else: + self.logger.info(justify_to_terminal_size(msg)) + + +class ProgressIndicatorPercent(ProgressIndicatorBase): + JSON_TYPE = 'progress_percent' + + def __init__(self, total=0, step=5, start=0, msg="%3.0f%%", msgid=None): + """ + Percentage-based progress indicator + + :param total: total amount of items + :param step: step size in percent + :param start: at which percent value to start + :param msg: output message, must contain one %f placeholder for the percentage + """ + self.counter = 0 # 0 .. (total-1) + self.total = total + self.trigger_at = start # output next percentage value when reaching (at least) this + self.step = step + self.msg = msg + + super().__init__(msgid=msgid) + + def progress(self, current=None, increase=1): + if current is not None: + self.counter = current + pct = self.counter * 100 / self.total + self.counter += increase + if pct >= self.trigger_at: + self.trigger_at += self.step + return pct + + def show(self, current=None, increase=1, info=None): + """ + Show and output the progress message + + :param current: set the current percentage [None] + :param increase: increase the current percentage [None] + :param info: array of strings to be formatted with msg [None] + """ + pct = self.progress(current, increase) + if pct is not None: + # truncate the last argument, if no space is available + if info is not None: + if not self.json: + # no need to truncate if we're not outputing to a terminal + terminal_space = get_terminal_size(fallback=(-1, -1))[0] + if terminal_space != -1: + space = terminal_space - len(self.msg % tuple([pct] + info[:-1] + [''])) + info[-1] = ellipsis_truncate(info[-1], space) + return self.output(self.msg % tuple([pct] + info), justify=False, info=info) + + return self.output(self.msg % pct) + + def output(self, message, justify=True, info=None): + if self.json: + self.output_json(message=message, current=self.counter, total=self.total, info=info) + else: + if justify: + message = justify_to_terminal_size(message) + self.logger.info(message) + + +class ProgressIndicatorEndless: + def __init__(self, step=10, file=None): + """ + Progress indicator (long row of dots) + + :param step: every Nth call, call the func + :param file: output file, default: sys.stderr + """ + self.counter = 0 # call counter + self.triggered = 0 # increases 1 per trigger event + self.step = step # trigger every calls + if file is None: + file = sys.stderr + self.file = file + + def progress(self): + self.counter += 1 + trigger = self.counter % self.step == 0 + if trigger: + self.triggered += 1 + return trigger + + def show(self): + trigger = self.progress() + if trigger: + return self.output(self.triggered) + + def output(self, triggered): + print('.', end='', file=self.file, flush=True) + + def finish(self): + print(file=self.file) diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py new file mode 100644 index 00000000..c5a5ddd8 --- /dev/null +++ b/src/borg/helpers/time.py @@ -0,0 +1,139 @@ +import os +import time +from datetime import datetime, timezone + +from ..constants import ISO_FORMAT, ISO_FORMAT_NO_USECS + + +def to_localtime(ts): + """Convert datetime object from UTC to local time zone""" + return datetime(*time.localtime((ts - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds())[:6]) + + +def parse_timestamp(timestamp, tzinfo=timezone.utc): + """Parse a ISO 8601 timestamp string""" + fmt = ISO_FORMAT if '.' in timestamp else ISO_FORMAT_NO_USECS + dt = datetime.strptime(timestamp, fmt) + if tzinfo is not None: + dt = dt.replace(tzinfo=tzinfo) + return dt + + +def timestamp(s): + """Convert a --timestamp=s argument to a datetime object""" + try: + # is it pointing to a file / directory? + ts = safe_s(os.stat(s).st_mtime) + return datetime.utcfromtimestamp(ts) + except OSError: + # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support. + for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', + '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', + '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M', + '%Y-%m-%d', '%Y-%j', + ): + try: + return datetime.strptime(s, format) + except ValueError: + continue + raise ValueError + + +# Not too rarely, we get crappy timestamps from the fs, that overflow some computations. +# As they are crap anyway (valid filesystem timestamps always refer to the past up to +# the present, but never to the future), nothing is lost if we just clamp them to the +# maximum value we can support. +# As long as people are using borg on 32bit platforms to access borg archives, we must +# keep this value True. But we can expect that we can stop supporting 32bit platforms +# well before coming close to the year 2038, so this will never be a practical problem. +SUPPORT_32BIT_PLATFORMS = True # set this to False before y2038. + +if SUPPORT_32BIT_PLATFORMS: + # second timestamps will fit into a signed int32 (platform time_t limit). + # nanosecond timestamps thus will naturally fit into a signed int64. + # subtract last 48h to avoid any issues that could be caused by tz calculations. + # this is in the year 2038, so it is also less than y9999 (which is a datetime internal limit). + # msgpack can pack up to uint64. + MAX_S = 2**31-1 - 48*3600 + MAX_NS = MAX_S * 1000000000 +else: + # nanosecond timestamps will fit into a signed int64. + # subtract last 48h to avoid any issues that could be caused by tz calculations. + # this is in the year 2262, so it is also less than y9999 (which is a datetime internal limit). + # round down to 1e9 multiple, so MAX_NS corresponds precisely to a integer MAX_S. + # msgpack can pack up to uint64. + MAX_NS = (2**63-1 - 48*3600*1000000000) // 1000000000 * 1000000000 + MAX_S = MAX_NS // 1000000000 + + +def safe_s(ts): + if 0 <= ts <= MAX_S: + return ts + elif ts < 0: + return 0 + else: + return MAX_S + + +def safe_ns(ts): + if 0 <= ts <= MAX_NS: + return ts + elif ts < 0: + return 0 + else: + return MAX_NS + + +def safe_timestamp(item_timestamp_ns): + t_ns = safe_ns(item_timestamp_ns) + return datetime.fromtimestamp(t_ns / 1e9) + + +def format_time(ts: datetime): + """ + Convert *ts* to a human-friendly format with textual weekday. + """ + return ts.strftime('%a, %Y-%m-%d %H:%M:%S') + + +def isoformat_time(ts: datetime): + """ + Format *ts* according to ISO 8601. + """ + # note: first make all datetime objects tz aware before adding %z here. + return ts.strftime(ISO_FORMAT) + + +def format_timedelta(td): + """Format timedelta in a human friendly format + """ + ts = td.total_seconds() + s = ts % 60 + m = int(ts / 60) % 60 + h = int(ts / 3600) % 24 + txt = '%.2f seconds' % s + if m: + txt = '%d minutes %s' % (m, txt) + if h: + txt = '%d hours %s' % (h, txt) + if td.days: + txt = '%d days %s' % (td.days, txt) + return txt + + +class OutputTimestamp: + def __init__(self, ts: datetime): + if ts.tzinfo == timezone.utc: + ts = to_localtime(ts) + self.ts = ts + + def __format__(self, format_spec): + return format_time(self.ts) + + def __str__(self): + return '{}'.format(self) + + def isoformat(self): + return isoformat_time(self.ts) + + to_json = isoformat diff --git a/src/borg/helpers/usergroup.py b/src/borg/helpers/usergroup.py new file mode 100644 index 00000000..8777fa84 --- /dev/null +++ b/src/borg/helpers/usergroup.py @@ -0,0 +1,52 @@ +import sys +if sys.platform != 'win32': + import grp + import pwd +from functools import lru_cache + + +@lru_cache(maxsize=None) +def uid2user(uid, default=None): + try: + return pwd.getpwuid(uid).pw_name + except KeyError: + return default + + +@lru_cache(maxsize=None) +def user2uid(user, default=None): + try: + return user and pwd.getpwnam(user).pw_uid + except KeyError: + return default + + +@lru_cache(maxsize=None) +def gid2group(gid, default=None): + try: + return grp.getgrgid(gid).gr_name + except KeyError: + return default + + +@lru_cache(maxsize=None) +def group2gid(group, default=None): + try: + return group and grp.getgrnam(group).gr_gid + except KeyError: + return default + + +def posix_acl_use_stored_uid_gid(acl): + """Replace the user/group field with the stored uid/gid + """ + from .parseformat import safe_decode, safe_encode + entries = [] + for entry in safe_decode(acl).split('\n'): + if entry: + fields = entry.split(':') + if len(fields) == 4: + entries.append(':'.join([fields[0], fields[3], fields[2]])) + else: + entries.append(entry) + return safe_encode('\n'.join(entries)) diff --git a/src/borg/helpers/yes.py b/src/borg/helpers/yes.py new file mode 100644 index 00000000..91a9809a --- /dev/null +++ b/src/borg/helpers/yes.py @@ -0,0 +1,108 @@ +import logging +import json +import os +import os.path +import sys + +FALSISH = ('No', 'NO', 'no', 'N', 'n', '0', ) +TRUISH = ('Yes', 'YES', 'yes', 'Y', 'y', '1', ) +DEFAULTISH = ('Default', 'DEFAULT', 'default', 'D', 'd', '', ) + + +def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, + retry_msg=None, invalid_msg=None, env_msg='{} (from {})', + falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH, + default=False, retry=True, env_var_override=None, ofile=None, input=input, prompt=True, + msgid=None): + """Output (usually a question) and let user input an answer. + Qualifies the answer according to falsish, truish and defaultish as True, False or . + If it didn't qualify and retry is False (no retries wanted), return the default [which + defaults to False]. If retry is True let user retry answering until answer is qualified. + + If env_var_override is given and this var is present in the environment, do not ask + the user, but just use the env var contents as answer as if it was typed in. + Otherwise read input from stdin and proceed as normal. + If EOF is received instead an input or an invalid input without retry possibility, + return default. + + :param msg: introducing message to output on ofile, no \n is added [None] + :param retry_msg: retry message to output on ofile, no \n is added [None] + :param false_msg: message to output before returning False [None] + :param true_msg: message to output before returning True [None] + :param default_msg: message to output before returning a [None] + :param invalid_msg: message to output after a invalid answer was given [None] + :param env_msg: message to output when using input from env_var_override ['{} (from {})'], + needs to have 2 placeholders for answer and env var name + :param falsish: sequence of answers qualifying as False + :param truish: sequence of answers qualifying as True + :param defaultish: sequence of answers qualifying as + :param default: default return value (defaultish answer was given or no-answer condition) [False] + :param retry: if True and input is incorrect, retry. Otherwise return default. [True] + :param env_var_override: environment variable name [None] + :param ofile: output stream [sys.stderr] + :param input: input function [input from builtins] + :return: boolean answer value, True or False + """ + def output(msg, msg_type, is_prompt=False, **kwargs): + json_output = getattr(logging.getLogger('borg'), 'json', False) + if json_output: + kwargs.update(dict( + type='question_%s' % msg_type, + msgid=msgid, + message=msg, + )) + print(json.dumps(kwargs), file=sys.stderr) + else: + if is_prompt: + print(msg, file=ofile, end='', flush=True) + else: + print(msg, file=ofile) + + msgid = msgid or env_var_override + # note: we do not assign sys.stderr as default above, so it is + # really evaluated NOW, not at function definition time. + if ofile is None: + ofile = sys.stderr + if default not in (True, False): + raise ValueError("invalid default value, must be True or False") + if msg: + output(msg, 'prompt', is_prompt=True) + while True: + answer = None + if env_var_override: + answer = os.environ.get(env_var_override) + if answer is not None and env_msg: + output(env_msg.format(answer, env_var_override), 'env_answer', env_var=env_var_override) + if answer is None: + if not prompt: + return default + try: + answer = input() + except EOFError: + # avoid defaultish[0], defaultish could be empty + answer = truish[0] if default else falsish[0] + if answer in defaultish: + if default_msg: + output(default_msg, 'accepted_default') + return default + if answer in truish: + if true_msg: + output(true_msg, 'accepted_true') + return True + if answer in falsish: + if false_msg: + output(false_msg, 'accepted_false') + return False + # if we get here, the answer was invalid + if invalid_msg: + output(invalid_msg, 'invalid_answer') + if not retry: + return default + if retry_msg: + output(retry_msg, 'prompt_retry', is_prompt=True) + # in case we used an environment variable and it gave an invalid answer, do not use it again: + env_var_override = None + + +def hostname_is_unique(): + return yes(env_var_override='BORG_HOSTNAME_IS_UNIQUE', prompt=False, env_msg=None, default=True) diff --git a/src/borg/item.pyx b/src/borg/item.pyx new file mode 100644 index 00000000..aacc74e1 --- /dev/null +++ b/src/borg/item.pyx @@ -0,0 +1,522 @@ +import stat +from collections import namedtuple + +from .constants import ITEM_KEYS +from .helpers import safe_encode, safe_decode +from .helpers import bigint_to_int, int_to_bigint +from .helpers import StableDict +from .helpers import format_file_size + +cdef extern from "_item.c": + object _object_to_optr(object obj) + object _optr_to_object(object bytes) + + +API_VERSION = '1.1_03' + + +class PropDict: + """ + Manage a dictionary via properties. + + - initialization by giving a dict or kw args + - on initialization, normalize dict keys to be str type + - access dict via properties, like: x.key_name + - membership check via: 'key_name' in x + - optionally, encode when setting a value + - optionally, decode when getting a value + - be safe against typos in key names: check against VALID_KEYS + - when setting a value: check type of value + + When "packing" a dict, ie. you have a dict with some data and want to convert it into an instance, + then use eg. Item({'a': 1, ...}). This way all keys in your dictionary are validated. + + When "unpacking", that is you've read a dictionary with some data from somewhere (eg. msgpack), + then use eg. Item(internal_dict={...}). This does not validate the keys, therefore unknown keys + are ignored instead of causing an error. + """ + VALID_KEYS = None # override with in child class + + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + + def __init__(self, data_dict=None, internal_dict=None, **kw): + if data_dict is None: + data = kw + elif not isinstance(data_dict, dict): + raise TypeError("data_dict must be dict") + else: + data = data_dict + self._dict = {} + self.update_internal(internal_dict or {}) + self.update(data) + + def update(self, d): + for k, v in d.items(): + if isinstance(k, bytes): + k = k.decode() + setattr(self, self._check_key(k), v) + + def update_internal(self, d): + for k, v in d.items(): + if isinstance(k, bytes): + k = k.decode() + self._dict[k] = v + + def __eq__(self, other): + return self.as_dict() == other.as_dict() + + def __repr__(self): + return '%s(internal_dict=%r)' % (self.__class__.__name__, self._dict) + + def as_dict(self): + """return the internal dictionary""" + return StableDict(self._dict) + + def _check_key(self, key): + """make sure key is of type str and known""" + if not isinstance(key, str): + raise TypeError("key must be str") + if key not in self.VALID_KEYS: + raise ValueError("key '%s' is not a valid key" % key) + return key + + def __contains__(self, key): + """do we have this key?""" + return self._check_key(key) in self._dict + + def get(self, key, default=None): + """get value for key, return default if key does not exist""" + return getattr(self, self._check_key(key), default) + + @staticmethod + def _make_property(key, value_type, value_type_name=None, encode=None, decode=None): + """return a property that deals with self._dict[key]""" + assert isinstance(key, str) + if value_type_name is None: + value_type_name = value_type.__name__ + doc = "%s (%s)" % (key, value_type_name) + type_error_msg = "%s value must be %s" % (key, value_type_name) + attr_error_msg = "attribute %s not found" % key + + def _get(self): + try: + value = self._dict[key] + except KeyError: + raise AttributeError(attr_error_msg) from None + if decode is not None: + value = decode(value) + return value + + def _set(self, value): + if not isinstance(value, value_type): + raise TypeError(type_error_msg) + if encode is not None: + value = encode(value) + self._dict[key] = value + + def _del(self): + try: + del self._dict[key] + except KeyError: + raise AttributeError(attr_error_msg) from None + + return property(_get, _set, _del, doc=doc) + + +ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize') + +class Item(PropDict): + """ + Item abstraction that deals with validation and the low-level details internally: + + Items are created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to Item(internal_dict=d) and use item.key_name later. + msgpack gives us byte-typed values for stuff that should be str, we automatically decode when getting + such a property and encode when setting it. + + If an Item shall be serialized, give as_dict() method output to msgpack packer. + + A bug in Attic up to and including release 0.13 added a (meaningless) 'acl' key to every item. + We must never re-use this key. See test_attic013_acl_bug for details. + """ + + VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', } # str-typed keys + + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + + # properties statically defined, so that IDEs can know their names: + + path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + + acl_access = PropDict._make_property('acl_access', bytes) + acl_default = PropDict._make_property('acl_default', bytes) + acl_extended = PropDict._make_property('acl_extended', bytes) + acl_nfs4 = PropDict._make_property('acl_nfs4', bytes) + + mode = PropDict._make_property('mode', int) + uid = PropDict._make_property('uid', int) + gid = PropDict._make_property('gid', int) + rdev = PropDict._make_property('rdev', int) + bsdflags = PropDict._make_property('bsdflags', int) + + # note: we need to keep the bigint conversion for compatibility with borg 1.0 archives. + atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + + # size is only present for items with a chunk list and then it is sum(chunk_sizes) + # compatibility note: this is a new feature, in old archives size will be missing. + size = PropDict._make_property('size', int) + + hardlink_master = PropDict._make_property('hardlink_master', bool) + + chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None') + chunks_healthy = PropDict._make_property('chunks_healthy', (list, type(None)), 'list or None') + + xattrs = PropDict._make_property('xattrs', StableDict) + + deleted = PropDict._make_property('deleted', bool) + nlink = PropDict._make_property('nlink', int) + + part = PropDict._make_property('part', int) + win_dacl = PropDict._make_property('win_dacl', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + win_sacl = PropDict._make_property('win_sacl', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + user_sid = PropDict._make_property('user_sid', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + + def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False, consider_ids=None): + """ + Determine the (uncompressed or compressed) size of this item. + + :param hardlink_masters: If given, the size of hardlink slaves is computed via the hardlink master's chunk list, + otherwise size will be returned as 0. + :param memorize: Whether the computed size value will be stored into the item. + :param compressed: Whether the compressed or uncompressed size will be returned. + :param from_chunks: If true, size is computed from chunks even if a precomputed value is available. + :param consider_ids: Returns the size of the given ids only. + """ + attr = 'csize' if compressed else 'size' + assert not (compressed and memorize), 'Item does not have a csize field.' + assert not (consider_ids is not None and memorize), "Can't store size when considering only certain ids" + try: + if from_chunks or consider_ids is not None: + raise AttributeError + size = getattr(self, attr) + except AttributeError: + if stat.S_ISLNK(self.mode): + # get out of here quickly. symlinks have no own chunks, their fs size is the length of the target name. + # also, there is the dual-use issue of .source (#2343), so don't confuse it with a hardlink slave. + return len(self.source) + # no precomputed (c)size value available, compute it: + try: + chunks = getattr(self, 'chunks') + having_chunks = True + except AttributeError: + having_chunks = False + # this item has no (own) chunks list, but if this is a hardlink slave + # and we know the master, we can still compute the size. + if hardlink_masters is None: + chunks = None + else: + try: + master = getattr(self, 'source') + except AttributeError: + # not a hardlink slave, likely a directory or special file w/o chunks + chunks = None + else: + # hardlink slave, try to fetch hardlink master's chunks list + # todo: put precomputed size into hardlink_masters' values and use it, if present + chunks, _ = hardlink_masters.get(master, (None, None)) + if chunks is None: + return 0 + if consider_ids is not None: + size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks if chunk.id in consider_ids) + else: + size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks) + # if requested, memorize the precomputed (c)size for items that have an own chunks list: + if memorize and having_chunks: + setattr(self, attr, size) + return size + + def to_optr(self): + """ + Return an "object pointer" (optr), an opaque bag of bytes. + The return value is effectively a reference to this object + that can be passed exactly once to Item.from_optr to get this + object back. + + to_optr/from_optr must be used symmetrically, + don't call from_optr multiple times. + + This object can't be deallocated after a call to to_optr() + until from_optr() is called. + """ + return _object_to_optr(self) + + @classmethod + def from_optr(self, optr): + return _optr_to_object(optr) + + @classmethod + def create_deleted(cls, path): + return cls(deleted=True, chunks=[], mode=0, path=path) + + def is_link(self): + return self._is_type(stat.S_ISLNK) + + def is_dir(self): + return self._is_type(stat.S_ISDIR) + + def _is_type(self, typetest): + try: + return typetest(self.mode) + except AttributeError: + return False + + +class EncryptedKey(PropDict): + """ + EncryptedKey abstraction that deals with validation and the low-level details internally: + + A EncryptedKey is created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to EncryptedKey(d) and use enc_key.xxx later. + + If a EncryptedKey shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = {'version', 'algorithm', 'iterations', 'salt', 'hash', 'data'} # str-typed keys + + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + + version = PropDict._make_property('version', int) + algorithm = PropDict._make_property('algorithm', str, encode=str.encode, decode=bytes.decode) + iterations = PropDict._make_property('iterations', int) + salt = PropDict._make_property('salt', bytes) + hash = PropDict._make_property('hash', bytes) + data = PropDict._make_property('data', bytes) + + +class Key(PropDict): + """ + Key abstraction that deals with validation and the low-level details internally: + + A Key is created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to Key(d) and use key.xxx later. + + If a Key shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = {'version', 'repository_id', 'enc_key', 'enc_hmac_key', 'id_key', 'chunk_seed', 'tam_required'} # str-typed keys + + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + + version = PropDict._make_property('version', int) + repository_id = PropDict._make_property('repository_id', bytes) + enc_key = PropDict._make_property('enc_key', bytes) + enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes) + id_key = PropDict._make_property('id_key', bytes) + chunk_seed = PropDict._make_property('chunk_seed', int) + tam_required = PropDict._make_property('tam_required', bool) + + +class ArchiveItem(PropDict): + """ + ArchiveItem abstraction that deals with validation and the low-level details internally: + + An ArchiveItem is created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to ArchiveItem(d) and use arch.xxx later. + + If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = {'version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end', + 'comment', 'chunker_params', + 'recreate_cmdline', 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', + } # str-typed keys + + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + + version = PropDict._make_property('version', int) + name = PropDict._make_property('name', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + items = PropDict._make_property('items', list) + cmdline = PropDict._make_property('cmdline', list) # list of s-e-str + hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + username = PropDict._make_property('username', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + time = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + chunker_params = PropDict._make_property('chunker_params', tuple) + recreate_source_id = PropDict._make_property('recreate_source_id', bytes) + recreate_cmdline = PropDict._make_property('recreate_cmdline', list) # list of s-e-str + recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str + recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples + + +class ManifestItem(PropDict): + """ + ManifestItem abstraction that deals with validation and the low-level details internally: + + A ManifestItem is created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to ManifestItem(d) and use manifest.xxx later. + + If a ManifestItem shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = {'version', 'archives', 'timestamp', 'config', 'item_keys', } # str-typed keys + + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + + version = PropDict._make_property('version', int) + archives = PropDict._make_property('archives', dict) # name -> dict + timestamp = PropDict._make_property('timestamp', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + config = PropDict._make_property('config', dict) + item_keys = PropDict._make_property('item_keys', tuple) + +class ItemDiff: + """ + Comparison of two items from different archives. + + The items may have different paths and still be considered equal (e.g. for renames). + It does not include extended or time attributes in the comparison. + """ + + def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_owner=False, can_compare_chunk_ids=False): + self._item1 = item1 + self._item2 = item2 + self._numeric_owner = numeric_owner + self._can_compare_chunk_ids = can_compare_chunk_ids + self.equal = self._equal(chunk_iterator1, chunk_iterator2) + + def __repr__(self): + if self.equal: + return 'equal' + + changes = [] + + if self._item1.is_link() or self._item2.is_link(): + changes.append(self._link_string()) + + if 'chunks' in self._item1 and 'chunks' in self._item2: + changes.append(self._content_string()) + + if self._item1.is_dir() or self._item2.is_dir(): + changes.append(self._dir_string()) + + if not (self._item1.get('deleted') or self._item2.get('deleted')): + changes.append(self._owner_string()) + changes.append(self._mode_string()) + + return ' '.join((x for x in changes if x)) + + def _equal(self, chunk_iterator1, chunk_iterator2): + # if both are deleted, there is nothing at path regardless of what was deleted + if self._item1.get('deleted') and self._item2.get('deleted'): + return True + + attr_list = ['deleted', 'mode', 'source'] + attr_list += ['uid', 'gid'] if self._numeric_owner else ['user', 'group'] + for attr in attr_list: + if self._item1.get(attr) != self._item2.get(attr): + return False + + if 'mode' in self._item1: # mode of item1 and item2 is equal + if (self._item1.is_link() and 'source' in self._item1 and 'source' in self._item2 + and self._item1.source != self._item2.source): + return False + + if 'chunks' in self._item1 and 'chunks' in self._item2: + return self._content_equal(chunk_iterator1, chunk_iterator2) + + return True + + def _link_string(self): + if self._item1.get('deleted'): + return 'added link' + if self._item2.get('deleted'): + return 'removed link' + if 'source' in self._item1 and 'source' in self._item2 and self._item1.source != self._item2.source: + return 'changed link' + + def _content_string(self): + if self._item1.get('deleted'): + return ('added {:>13}'.format(format_file_size(self._item2.get_size()))) + if self._item2.get('deleted'): + return ('removed {:>11}'.format(format_file_size(self._item1.get_size()))) + if not self._can_compare_chunk_ids: + return 'modified' + chunk_ids1 = {c.id for c in self._item1.chunks} + chunk_ids2 = {c.id for c in self._item2.chunks} + added_ids = chunk_ids2 - chunk_ids1 + removed_ids = chunk_ids1 - chunk_ids2 + added = self._item2.get_size(consider_ids=added_ids) + removed = self._item1.get_size(consider_ids=removed_ids) + return ('{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True), + format_file_size(-removed, precision=1, sign=True))) + + def _dir_string(self): + if self._item2.get('deleted') and not self._item1.get('deleted'): + return 'removed directory' + if self._item1.get('deleted') and not self._item2.get('deleted'): + return 'added directory' + + def _owner_string(self): + u_attr, g_attr = ('uid', 'gid') if self._numeric_owner else ('user', 'group') + u1, g1 = self._item1.get(u_attr), self._item1.get(g_attr) + u2, g2 = self._item2.get(u_attr), self._item2.get(g_attr) + if (u1, g1) != (u2, g2): + return '[{}:{} -> {}:{}]'.format(u1, g1, u2, g2) + + def _mode_string(self): + if 'mode' in self._item1 and 'mode' in self._item2 and self._item1.mode != self._item2.mode: + return '[{} -> {}]'.format(stat.filemode(self._item1.mode), stat.filemode(self._item2.mode)) + + def _content_equal(self, chunk_iterator1, chunk_iterator2): + if self._can_compare_chunk_ids: + return self._item1.chunks == self._item2.chunks + if self._item1.get_size() != self._item2.get_size(): + return False + return ItemDiff._chunk_content_equal(chunk_iterator1, chunk_iterator2) + + @staticmethod + def _chunk_content_equal(chunks1, chunks2): + """ + Compare chunk content and return True if they are identical. + + The chunks must be given as chunk iterators (like returned by :meth:`.DownloadPipeline.fetch_many`). + """ + + end = object() + alen = ai = 0 + blen = bi = 0 + while True: + if not alen - ai: + a = next(chunks1, end) + if a is end: + return not blen - bi and next(chunks2, end) is end + a = memoryview(a) + alen = len(a) + ai = 0 + if not blen - bi: + b = next(chunks2, end) + if b is end: + return not alen - ai and next(chunks1, end) is end + b = memoryview(b) + blen = len(b) + bi = 0 + slicelen = min(alen - ai, blen - bi) + if a[ai:ai + slicelen] != b[bi:bi + slicelen]: + return False + ai += slicelen + bi += slicelen diff --git a/src/borg/key.py b/src/borg/key.py deleted file mode 100644 index 6965ae73..00000000 --- a/src/borg/key.py +++ /dev/null @@ -1,508 +0,0 @@ -import configparser -import getpass -import os -import sys -import textwrap -from binascii import a2b_base64, b2a_base64, hexlify -from hashlib import sha256, pbkdf2_hmac -from hmac import compare_digest - -import msgpack - -from .logger import create_logger -logger = create_logger() - -from .constants import * # NOQA -from .compress import Compressor, COMPR_BUFFER, get_compressor -from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256 -from .helpers import Chunk -from .helpers import Error, IntegrityError -from .helpers import yes -from .helpers import get_keys_dir -from .helpers import bin_to_hex -from .helpers import CompressionDecider2, CompressionSpec -from .item import Key, EncryptedKey - - -PREFIX = b'\0' * 8 - - -class PassphraseWrong(Error): - """passphrase supplied in BORG_PASSPHRASE is incorrect""" - - -class PasswordRetriesExceeded(Error): - """exceeded the maximum password retries""" - - -class UnsupportedPayloadError(Error): - """Unsupported payload type {}. A newer version is required to access this repository.""" - - -class KeyfileNotFoundError(Error): - """No key file for repository {} found in {}.""" - - -class KeyfileInvalidError(Error): - """Invalid key file for repository {} found in {}.""" - - -class KeyfileMismatchError(Error): - """Mismatch between repository {} and key file {}.""" - - -class RepoKeyNotFoundError(Error): - """No key entry found in the config of repository {}.""" - - -def key_creator(repository, args): - if args.encryption == 'keyfile': - return KeyfileKey.create(repository, args) - elif args.encryption == 'repokey': - return RepoKey.create(repository, args) - else: - return PlaintextKey.create(repository, args) - - -def key_factory(repository, manifest_data): - key_type = manifest_data[0] - if key_type == KeyfileKey.TYPE: - return KeyfileKey.detect(repository, manifest_data) - elif key_type == RepoKey.TYPE: - return RepoKey.detect(repository, manifest_data) - elif key_type == PassphraseKey.TYPE: - # we just dispatch to repokey mode and assume the passphrase was migrated to a repokey. - # see also comment in PassphraseKey class. - return RepoKey.detect(repository, manifest_data) - elif key_type == PlaintextKey.TYPE: - return PlaintextKey.detect(repository, manifest_data) - else: - raise UnsupportedPayloadError(key_type) - - -class KeyBase: - TYPE = None # override in subclasses - - def __init__(self, repository): - self.TYPE_STR = bytes([self.TYPE]) - self.repository = repository - self.target = None # key location file path / repo obj - self.compression_decider2 = CompressionDecider2(CompressionSpec('none')) - self.compressor = Compressor('none', buffer=COMPR_BUFFER) # for decompression - - def id_hash(self, data): - """Return HMAC hash using the "id" HMAC key - """ - - def compress(self, chunk): - compr_args, chunk = self.compression_decider2.decide(chunk) - compressor = Compressor(**compr_args) - meta, data = chunk - data = compressor.compress(data) - return Chunk(data, **meta) - - def encrypt(self, chunk): - pass - - def decrypt(self, id, data): - pass - - -class PlaintextKey(KeyBase): - TYPE = 0x02 - - chunk_seed = 0 - - @classmethod - def create(cls, repository, args): - logger.info('Encryption NOT enabled.\nUse the "--encryption=repokey|keyfile" to enable encryption.') - return cls(repository) - - @classmethod - def detect(cls, repository, manifest_data): - return cls(repository) - - def id_hash(self, data): - return sha256(data).digest() - - def encrypt(self, chunk): - chunk = self.compress(chunk) - return b''.join([self.TYPE_STR, chunk.data]) - - def decrypt(self, id, data): - if data[0] != self.TYPE: - raise IntegrityError('Invalid encryption envelope') - data = self.compressor.decompress(memoryview(data)[1:]) - if id and sha256(data).digest() != id: - raise IntegrityError('Chunk id verification failed') - return Chunk(data) - - -class AESKeyBase(KeyBase): - """Common base class shared by KeyfileKey and PassphraseKey - - Chunks are encrypted using 256bit AES in Counter Mode (CTR) - - Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT - - To reduce payload size only 8 bytes of the 16 bytes nonce is saved - in the payload, the first 8 bytes are always zeros. This does not - affect security but limits the maximum repository capacity to - only 295 exabytes! - """ - - PAYLOAD_OVERHEAD = 1 + 32 + 8 # TYPE + HMAC + NONCE - - def id_hash(self, data): - """Return HMAC hash using the "id" HMAC key - """ - return hmac_sha256(self.id_key, data) - - def encrypt(self, chunk): - chunk = self.compress(chunk) - self.enc_cipher.reset() - data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(chunk.data))) - hmac = hmac_sha256(self.enc_hmac_key, data) - return b''.join((self.TYPE_STR, hmac, data)) - - def decrypt(self, id, data): - if not (data[0] == self.TYPE or - data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)): - raise IntegrityError('Invalid encryption envelope') - data_view = memoryview(data) - hmac_given = data_view[1:33] - hmac_computed = memoryview(hmac_sha256(self.enc_hmac_key, data_view[33:])) - if not compare_digest(hmac_computed, hmac_given): - raise IntegrityError('Encryption envelope checksum mismatch') - self.dec_cipher.reset(iv=PREFIX + data[33:41]) - data = self.compressor.decompress(self.dec_cipher.decrypt(data_view[41:])) - if id: - hmac_given = id - hmac_computed = hmac_sha256(self.id_key, data) - if not compare_digest(hmac_computed, hmac_given): - raise IntegrityError('Chunk id verification failed') - return Chunk(data) - - def extract_nonce(self, payload): - if not (payload[0] == self.TYPE or - payload[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)): - raise IntegrityError('Invalid encryption envelope') - nonce = bytes_to_long(payload[33:41]) - return nonce - - def init_from_random_data(self, data): - self.enc_key = data[0:32] - self.enc_hmac_key = data[32:64] - self.id_key = data[64:96] - self.chunk_seed = bytes_to_int(data[96:100]) - # Convert to signed int32 - if self.chunk_seed & 0x80000000: - self.chunk_seed = self.chunk_seed - 0xffffffff - 1 - - def init_ciphers(self, enc_iv=b''): - self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv) - self.dec_cipher = AES(is_encrypt=False, key=self.enc_key) - - -class Passphrase(str): - @classmethod - def env_passphrase(cls, default=None): - passphrase = os.environ.get('BORG_PASSPHRASE', default) - if passphrase is not None: - return cls(passphrase) - - @classmethod - def getpass(cls, prompt): - return cls(getpass.getpass(prompt)) - - @classmethod - def verification(cls, passphrase): - if yes('Do you want your passphrase to be displayed for verification? [yN]: ', - env_var_override='BORG_DISPLAY_PASSPHRASE'): - print('Your passphrase (between double-quotes): "%s"' % passphrase, - file=sys.stderr) - print('Make sure the passphrase displayed above is exactly what you wanted.', - file=sys.stderr) - try: - passphrase.encode('ascii') - except UnicodeEncodeError: - print('Your passphrase (UTF-8 encoding in hex): %s' % - bin_to_hex(passphrase.encode('utf-8')), - file=sys.stderr) - print('As you have a non-ASCII passphrase, it is recommended to keep the UTF-8 encoding in hex together with the passphrase at a safe place.', - file=sys.stderr) - - @classmethod - def new(cls, allow_empty=False): - passphrase = cls.env_passphrase() - if passphrase is not None: - return passphrase - for retry in range(1, 11): - passphrase = cls.getpass('Enter new passphrase: ') - if allow_empty or passphrase: - passphrase2 = cls.getpass('Enter same passphrase again: ') - if passphrase == passphrase2: - cls.verification(passphrase) - logger.info('Remember your passphrase. Your data will be inaccessible without it.') - return passphrase - else: - print('Passphrases do not match', file=sys.stderr) - else: - print('Passphrase must not be blank', file=sys.stderr) - else: - raise PasswordRetriesExceeded - - def __repr__(self): - return '' - - def kdf(self, salt, iterations, length): - return pbkdf2_hmac('sha256', self.encode('utf-8'), salt, iterations, length) - - -class PassphraseKey(AESKeyBase): - # This mode was killed in borg 1.0, see: https://github.com/borgbackup/borg/issues/97 - # Reasons: - # - you can never ever change your passphrase for existing repos. - # - you can never ever use a different iterations count for existing repos. - # "Killed" means: - # - there is no automatic dispatch to this class via type byte - # - --encryption=passphrase is an invalid argument now - # This class is kept for a while to support migration from passphrase to repokey mode. - TYPE = 0x01 - iterations = 100000 # must not be changed ever! - - @classmethod - def create(cls, repository, args): - key = cls(repository) - logger.warning('WARNING: "passphrase" mode is unsupported since borg 1.0.') - passphrase = Passphrase.new(allow_empty=False) - key.init(repository, passphrase) - return key - - @classmethod - def detect(cls, repository, manifest_data): - prompt = 'Enter passphrase for %s: ' % repository._location.orig - key = cls(repository) - passphrase = Passphrase.env_passphrase() - if passphrase is None: - passphrase = Passphrase.getpass(prompt) - for retry in range(1, 3): - key.init(repository, passphrase) - try: - key.decrypt(None, manifest_data) - num_blocks = num_aes_blocks(len(manifest_data) - 41) - key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) - return key - except IntegrityError: - passphrase = Passphrase.getpass(prompt) - else: - raise PasswordRetriesExceeded - - def change_passphrase(self): - class ImmutablePassphraseError(Error): - """The passphrase for this encryption key type can't be changed.""" - - raise ImmutablePassphraseError - - def init(self, repository, passphrase): - self.init_from_random_data(passphrase.kdf(repository.id, self.iterations, 100)) - self.init_ciphers() - - -class KeyfileKeyBase(AESKeyBase): - @classmethod - def detect(cls, repository, manifest_data): - key = cls(repository) - target = key.find_key() - prompt = 'Enter passphrase for key %s: ' % target - passphrase = Passphrase.env_passphrase() - if passphrase is None: - passphrase = Passphrase() - if not key.load(target, passphrase): - for retry in range(0, 3): - passphrase = Passphrase.getpass(prompt) - if key.load(target, passphrase): - break - else: - raise PasswordRetriesExceeded - else: - if not key.load(target, passphrase): - raise PassphraseWrong - num_blocks = num_aes_blocks(len(manifest_data) - 41) - key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) - return key - - def find_key(self): - raise NotImplementedError - - def load(self, target, passphrase): - raise NotImplementedError - - def _load(self, key_data, passphrase): - cdata = a2b_base64(key_data) - data = self.decrypt_key_file(cdata, passphrase) - if data: - data = msgpack.unpackb(data) - key = Key(internal_dict=data) - if key.version != 1: - raise IntegrityError('Invalid key file header') - self.repository_id = key.repository_id - self.enc_key = key.enc_key - self.enc_hmac_key = key.enc_hmac_key - self.id_key = key.id_key - self.chunk_seed = key.chunk_seed - return True - return False - - def decrypt_key_file(self, data, passphrase): - data = msgpack.unpackb(data) - enc_key = EncryptedKey(internal_dict=data) - assert enc_key.version == 1 - assert enc_key.algorithm == 'sha256' - key = passphrase.kdf(enc_key.salt, enc_key.iterations, 32) - data = AES(is_encrypt=False, key=key).decrypt(enc_key.data) - if hmac_sha256(key, data) == enc_key.hash: - return data - - def encrypt_key_file(self, data, passphrase): - salt = os.urandom(32) - iterations = PBKDF2_ITERATIONS - key = passphrase.kdf(salt, iterations, 32) - hash = hmac_sha256(key, data) - cdata = AES(is_encrypt=True, key=key).encrypt(data) - enc_key = EncryptedKey( - version=1, - salt=salt, - iterations=iterations, - algorithm='sha256', - hash=hash, - data=cdata, - ) - return msgpack.packb(enc_key.as_dict()) - - def _save(self, passphrase): - key = Key( - version=1, - repository_id=self.repository_id, - enc_key=self.enc_key, - enc_hmac_key=self.enc_hmac_key, - id_key=self.id_key, - chunk_seed=self.chunk_seed, - ) - data = self.encrypt_key_file(msgpack.packb(key.as_dict()), passphrase) - key_data = '\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii'))) - return key_data - - def change_passphrase(self): - passphrase = Passphrase.new(allow_empty=True) - self.save(self.target, passphrase) - logger.info('Key updated') - - @classmethod - def create(cls, repository, args): - passphrase = Passphrase.new(allow_empty=True) - key = cls(repository) - key.repository_id = repository.id - key.init_from_random_data(os.urandom(100)) - key.init_ciphers() - target = key.get_new_target(args) - key.save(target, passphrase) - logger.info('Key in "%s" created.' % target) - logger.info('Keep this key safe. Your data will be inaccessible without it.') - return key - - def save(self, target, passphrase): - raise NotImplementedError - - def get_new_target(self, args): - raise NotImplementedError - - -class KeyfileKey(KeyfileKeyBase): - TYPE = 0x00 - FILE_ID = 'BORG_KEY' - - def sanity_check(self, filename, id): - file_id = self.FILE_ID.encode() + b' ' - repo_id = hexlify(id) - with open(filename, 'rb') as fd: - # we do the magic / id check in binary mode to avoid stumbling over - # decoding errors if somebody has binary files in the keys dir for some reason. - if fd.read(len(file_id)) != file_id: - raise KeyfileInvalidError(self.repository._location.canonical_path(), filename) - if fd.read(len(repo_id)) != repo_id: - raise KeyfileMismatchError(self.repository._location.canonical_path(), filename) - return filename - - def find_key(self): - id = self.repository.id - keyfile = os.environ.get('BORG_KEY_FILE') - if keyfile: - return self.sanity_check(keyfile, id) - keys_dir = get_keys_dir() - for name in os.listdir(keys_dir): - filename = os.path.join(keys_dir, name) - try: - return self.sanity_check(filename, id) - except (KeyfileInvalidError, KeyfileMismatchError): - pass - raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir()) - - def get_new_target(self, args): - keyfile = os.environ.get('BORG_KEY_FILE') - if keyfile: - return keyfile - filename = args.location.to_key_filename() - path = filename - i = 1 - while os.path.exists(path): - i += 1 - path = filename + '.%d' % i - return path - - def load(self, target, passphrase): - with open(target, 'r') as fd: - key_data = ''.join(fd.readlines()[1:]) - success = self._load(key_data, passphrase) - if success: - self.target = target - return success - - def save(self, target, passphrase): - key_data = self._save(passphrase) - with open(target, 'w') as fd: - fd.write('%s %s\n' % (self.FILE_ID, bin_to_hex(self.repository_id))) - fd.write(key_data) - fd.write('\n') - self.target = target - - -class RepoKey(KeyfileKeyBase): - TYPE = 0x03 - - def find_key(self): - loc = self.repository._location.canonical_path() - try: - self.repository.load_key() - return loc - except configparser.NoOptionError: - raise RepoKeyNotFoundError(loc) from None - - def get_new_target(self, args): - return self.repository - - def load(self, target, passphrase): - # what we get in target is just a repo location, but we already have the repo obj: - target = self.repository - key_data = target.load_key() - key_data = key_data.decode('utf-8') # remote repo: msgpack issue #99, getting bytes - success = self._load(key_data, passphrase) - if success: - self.target = target - return success - - def save(self, target, passphrase): - key_data = self._save(passphrase) - key_data = key_data.encode('utf-8') # remote repo: msgpack issue #99, giving bytes - target.save_key(key_data) - self.target = target diff --git a/src/borg/locking.py b/src/borg/locking.py index 8382ec57..0fc09275 100644 --- a/src/borg/locking.py +++ b/src/borg/locking.py @@ -1,24 +1,15 @@ import json import os -import socket import time +from . import platform from .helpers import Error, ErrorWithTraceback +from .logger import create_logger ADD, REMOVE = 'add', 'remove' SHARED, EXCLUSIVE = 'shared', 'exclusive' -# only determine the PID and hostname once. -# for FUSE mounts, we fork a child process that needs to release -# the lock made by the parent, so it needs to use the same PID for that. -_pid = os.getpid() -_hostname = socket.gethostname() - - -def get_id(): - """Get identification tuple for 'us'""" - thread_id = 0 - return _hostname, _pid, thread_id +logger = create_logger(__name__) class TimeoutTimer: @@ -109,12 +100,14 @@ class ExclusiveLock: This makes sure the lock is released again if the block is left, no matter how (e.g. if an exception occurred). """ - def __init__(self, path, timeout=None, sleep=None, id=None): + def __init__(self, path, timeout=None, sleep=None, id=None, kill_stale_locks=False): self.timeout = timeout self.sleep = sleep self.path = os.path.abspath(path) - self.id = id or get_id() + self.id = id or platform.get_process_id() self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id) + self.kill_stale_locks = kill_stale_locks + self.stale_warning_printed = False def __enter__(self): return self.acquire() @@ -137,6 +130,7 @@ class ExclusiveLock: except FileExistsError: # already locked if self.by_me(): return self + self.kill_stale_lock() if timer.timed_out_or_sleep(): raise LockTimeout(self.path) except OSError as err: @@ -160,12 +154,64 @@ class ExclusiveLock: def by_me(self): return os.path.exists(self.unique_name) + def kill_stale_lock(self): + for name in os.listdir(self.path): + try: + host_pid, thread_str = name.rsplit('-', 1) + host, pid_str = host_pid.rsplit('.', 1) + pid = int(pid_str) + thread = int(thread_str) + except ValueError: + # Malformed lock name? Or just some new format we don't understand? + # It's safer to just exit. + return False + + if platform.process_alive(host, pid, thread): + return False + + if not self.kill_stale_locks: + if not self.stale_warning_printed: + # Log this at warning level to hint the user at the ability + logger.warning("Found stale lock %s, but not deleting because BORG_HOSTNAME_IS_UNIQUE is not set.", name) + self.stale_warning_printed = True + return False + + try: + os.unlink(os.path.join(self.path, name)) + logger.warning('Killed stale lock %s.', name) + except OSError as err: + if not self.stale_warning_printed: + # This error will bubble up and likely result in locking failure + logger.error('Found stale lock %s, but cannot delete due to %s', name, str(err)) + self.stale_warning_printed = True + return False + + try: + os.rmdir(self.path) + except OSError: + # Directory is not empty = we lost the race to somebody else + # Permission denied = we cannot operate anyway + # other error like EIO = we cannot operate and it's unsafe too. + return False + + return True + def break_lock(self): if self.is_locked(): for name in os.listdir(self.path): os.unlink(os.path.join(self.path, name)) os.rmdir(self.path) + def migrate_lock(self, old_id, new_id): + """migrate the lock ownership from old_id to new_id""" + assert self.id == old_id + new_unique_name = os.path.join(self.path, "%s.%d-%x" % new_id) + if self.is_locked() and self.by_me(): + with open(new_unique_name, "wb"): + pass + os.unlink(self.unique_name) + self.id, self.unique_name = new_id, new_unique_name + class LockRoster: """ @@ -174,14 +220,30 @@ class LockRoster: Note: you usually should call the methods with an exclusive lock held, to avoid conflicting access by multiple threads/processes/machines. """ - def __init__(self, path, id=None): + def __init__(self, path, id=None, kill_stale_locks=False): self.path = path - self.id = id or get_id() + self.id = id or platform.get_process_id() + self.kill_stale_locks = kill_stale_locks def load(self): try: with open(self.path) as f: data = json.load(f) + + # Just nuke the stale locks early on load + if self.kill_stale_locks: + for key in (SHARED, EXCLUSIVE): + try: + entries = data[key] + except KeyError: + continue + elements = set() + for host, pid, thread in entries: + if platform.process_alive(host, pid, thread): + elements.add((host, pid, thread)) + else: + logger.warning('Removed stale %s roster lock for pid %d.', key, pid) + data[key] = list(elements) except (FileNotFoundError, ValueError): # no or corrupt/empty roster file? data = {} @@ -201,6 +263,9 @@ class LockRoster: roster = self.load() return set(tuple(e) for e in roster.get(key, [])) + def empty(self, *keys): + return all(not self.get(key) for key in keys) + def modify(self, key, op): roster = self.load() try: @@ -216,8 +281,27 @@ class LockRoster: roster[key] = list(list(e) for e in elements) self.save(roster) + def migrate_lock(self, key, old_id, new_id): + """migrate the lock ownership from old_id to new_id""" + assert self.id == old_id + # need to temporarily switch off stale lock killing as we want to + # rather migrate than kill them (at least the one made by old_id). + killing, self.kill_stale_locks = self.kill_stale_locks, False + try: + try: + self.modify(key, REMOVE) + except KeyError: + # entry was not there, so no need to add a new one, but still update our id + self.id = new_id + else: + # old entry removed, update our id and add a updated entry + self.id = new_id + self.modify(key, ADD) + finally: + self.kill_stale_locks = killing -class UpgradableLock: + +class Lock: """ A Lock for a resource that can be accessed in a shared or exclusive way. Typically, write access to a resource needs an exclusive lock (1 writer, @@ -226,24 +310,24 @@ class UpgradableLock: If possible, try to use the contextmanager here like:: - with UpgradableLock(...) as lock: + with Lock(...) as lock: ... This makes sure the lock is released again if the block is left, no matter how (e.g. if an exception occurred). """ - def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None): + def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None, kill_stale_locks=False): self.path = path self.is_exclusive = exclusive self.sleep = sleep self.timeout = timeout - self.id = id or get_id() + self.id = id or platform.get_process_id() # globally keeping track of shared and exclusive lockers: - self._roster = LockRoster(path + '.roster', id=id) + self._roster = LockRoster(path + '.roster', id=id, kill_stale_locks=kill_stale_locks) # an exclusive lock, used for: # - holding while doing roster queries / updates - # - holding while the UpgradableLock itself is exclusive - self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout) + # - holding while the Lock itself is exclusive + self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout, kill_stale_locks=kill_stale_locks) def __enter__(self): return self.acquire() @@ -293,12 +377,18 @@ class UpgradableLock: def release(self): if self.is_exclusive: self._roster.modify(EXCLUSIVE, REMOVE) + if self._roster.empty(EXCLUSIVE, SHARED): + self._roster.remove() self._lock.release() else: with self._lock: self._roster.modify(SHARED, REMOVE) + if self._roster.empty(EXCLUSIVE, SHARED): + self._roster.remove() def upgrade(self): + # WARNING: if multiple read-lockers want to upgrade, it will deadlock because they + # all will wait until the other read locks go away - and that won't happen. if not self.is_exclusive: self.acquire(exclusive=True, remove=SHARED) @@ -306,6 +396,20 @@ class UpgradableLock: if self.is_exclusive: self.acquire(exclusive=False, remove=EXCLUSIVE) + def got_exclusive_lock(self): + return self.is_exclusive and self._lock.is_locked() and self._lock.by_me() + def break_lock(self): self._roster.remove() self._lock.break_lock() + + def migrate_lock(self, old_id, new_id): + assert self.id == old_id + self.id = new_id + if self.is_exclusive: + self._lock.migrate_lock(old_id, new_id) + self._roster.migrate_lock(EXCLUSIVE, old_id, new_id) + else: + with self._lock: + self._lock.migrate_lock(old_id, new_id) + self._roster.migrate_lock(SHARED, old_id, new_id) diff --git a/src/borg/logger.py b/src/borg/logger.py index c75aaef7..7edec891 100644 --- a/src/borg/logger.py +++ b/src/borg/logger.py @@ -31,6 +31,7 @@ The way to use this is as follows: """ import inspect +import json import logging import logging.config import logging.handlers # needed for handlers defined there being configurable in logging.conf file @@ -52,7 +53,7 @@ def _log_warning(message, category, filename, lineno, file=None, line=None): logger.warning(msg) -def setup_logging(stream=None, conf_fname=None, env_var='BORG_LOGGING_CONF', level='info', is_serve=False): +def setup_logging(stream=None, conf_fname=None, env_var='BORG_LOGGING_CONF', level='info', is_serve=False, json=False): """setup logging module according to the arguments provided if conf_fname is given (or the config file name can be determined via @@ -79,6 +80,8 @@ def setup_logging(stream=None, conf_fname=None, env_var='BORG_LOGGING_CONF', lev logging.config.fileConfig(f) configured = True logger = logging.getLogger(__name__) + borg_logger = logging.getLogger('borg') + borg_logger.json = json logger.debug('using logging configuration read from "{0}"'.format(conf_fname)) warnings.showwarning = _log_warning return None @@ -87,11 +90,21 @@ def setup_logging(stream=None, conf_fname=None, env_var='BORG_LOGGING_CONF', lev # if we did not / not successfully load a logging configuration, fallback to this: logger = logging.getLogger('') handler = logging.StreamHandler(stream) - if is_serve: + if is_serve and not json: fmt = '$LOG %(levelname)s %(name)s Remote: %(message)s' else: fmt = '%(message)s' - handler.setFormatter(logging.Formatter(fmt)) + formatter = JsonFormatter(fmt) if json else logging.Formatter(fmt) + handler.setFormatter(formatter) + borg_logger = logging.getLogger('borg') + borg_logger.formatter = formatter + borg_logger.json = json + if configured and logger.handlers: + # The RepositoryServer can call setup_logging a second time to adjust the output + # mode from text-ish is_serve to json is_serve. + # Thus, remove the previously installed handler, if any. + logger.handlers[0].close() + logger.handlers.clear() logger.addHandler(handler) logger.setLevel(level.upper()) configured = True @@ -149,30 +162,81 @@ def create_logger(name=None): if not configured: raise Exception("tried to call a logger before setup_logging() was called") self.__real_logger = logging.getLogger(self.__name) + if self.__name.startswith('borg.debug.') and self.__real_logger.level == logging.NOTSET: + self.__real_logger.setLevel('WARNING') return self.__real_logger + def getChild(self, suffix): + return LazyLogger(self.__name + '.' + suffix) + def setLevel(self, *args, **kw): return self.__logger.setLevel(*args, **kw) def log(self, *args, **kw): + if 'msgid' in kw: + kw.setdefault('extra', {})['msgid'] = kw.pop('msgid') return self.__logger.log(*args, **kw) def exception(self, *args, **kw): + if 'msgid' in kw: + kw.setdefault('extra', {})['msgid'] = kw.pop('msgid') return self.__logger.exception(*args, **kw) def debug(self, *args, **kw): + if 'msgid' in kw: + kw.setdefault('extra', {})['msgid'] = kw.pop('msgid') return self.__logger.debug(*args, **kw) def info(self, *args, **kw): + if 'msgid' in kw: + kw.setdefault('extra', {})['msgid'] = kw.pop('msgid') return self.__logger.info(*args, **kw) def warning(self, *args, **kw): + if 'msgid' in kw: + kw.setdefault('extra', {})['msgid'] = kw.pop('msgid') return self.__logger.warning(*args, **kw) def error(self, *args, **kw): + if 'msgid' in kw: + kw.setdefault('extra', {})['msgid'] = kw.pop('msgid') return self.__logger.error(*args, **kw) def critical(self, *args, **kw): + if 'msgid' in kw: + kw.setdefault('extra', {})['msgid'] = kw.pop('msgid') return self.__logger.critical(*args, **kw) return LazyLogger(name) + + +class JsonFormatter(logging.Formatter): + RECORD_ATTRIBUTES = ( + 'levelname', + 'name', + 'message', + # msgid is an attribute we made up in Borg to expose a non-changing handle for log messages + 'msgid', + ) + + # Other attributes that are not very useful but do exist: + # processName, process, relativeCreated, stack_info, thread, threadName + # msg == message + # *args* are the unformatted arguments passed to the logger function, not useful now, + # become useful if sanitized properly (must be JSON serializable) in the code + + # fixed message IDs are assigned. + # exc_info, exc_text are generally uninteresting because the message will have that + + def format(self, record): + super().format(record) + data = { + 'type': 'log_message', + 'time': record.created, + 'message': '', + 'levelname': 'CRITICAL', + } + for attr in self.RECORD_ATTRIBUTES: + value = getattr(record, attr, None) + if value: + data[attr] = value + return json.dumps(data) diff --git a/src/borg/lrucache.py b/src/borg/lrucache.py index 4d3ba73b..492e18b6 100644 --- a/src/borg/lrucache.py +++ b/src/borg/lrucache.py @@ -1,3 +1,6 @@ +sentinel = object() + + class LRUCache: def __init__(self, capacity, dispose): self._cache = {} @@ -28,6 +31,14 @@ class LRUCache: def __contains__(self, key): return key in self._cache + def get(self, key, default=None): + value = self._cache.get(key, sentinel) + if value is sentinel: + return default + self._lru.remove(key) + self._lru.append(key) + return value + def clear(self): for value in self._cache.values(): self._dispose(value) diff --git a/src/borg/nanorst.py b/src/borg/nanorst.py new file mode 100644 index 00000000..33a5e541 --- /dev/null +++ b/src/borg/nanorst.py @@ -0,0 +1,213 @@ +import io +import sys + +from .helpers import is_terminal + + +class TextPecker: + def __init__(self, s): + self.str = s + self.i = 0 + + def read(self, n): + self.i += n + return self.str[self.i - n:self.i] + + def peek(self, n): + if n >= 0: + return self.str[self.i:self.i + n] + else: + return self.str[self.i + n - 1:self.i - 1] + + def peekline(self): + out = '' + i = self.i + while i < len(self.str) and self.str[i] != '\n': + out += self.str[i] + i += 1 + return out + + def readline(self): + out = self.peekline() + self.i += len(out) + return out + + +def process_directive(directive, arguments, out, state_hook): + if directive == 'container' and arguments == 'experimental': + state_hook('text', '**', out) + out.write('++ Experimental ++') + state_hook('**', 'text', out) + else: + state_hook('text', '**', out) + out.write(directive.title()) + out.write(':\n') + state_hook('**', 'text', out) + if arguments: + out.write(arguments) + out.write('\n') + + +def rst_to_text(text, state_hook=None, references=None): + """ + Convert rST to a more human text form. + + This is a very loose conversion. No advanced rST features are supported. + The generated output directly depends on the input (e.g. indentation of + admonitions). + """ + state_hook = state_hook or (lambda old_state, new_state, out: None) + references = references or {} + state = 'text' + inline_mode = 'replace' + text = TextPecker(text) + out = io.StringIO() + + inline_single = ('*', '`') + + while True: + char = text.read(1) + if not char: + break + next = text.peek(1) # type: str + + if state == 'text': + if char == '\\' and text.peek(1) in inline_single: + continue + if text.peek(-1) != '\\': + if char in inline_single and next != char: + state_hook(state, char, out) + state = char + continue + if char == next == '*': + state_hook(state, '**', out) + state = '**' + text.read(1) + continue + if char == next == '`': + state_hook(state, '``', out) + state = '``' + text.read(1) + continue + if text.peek(-1).isspace() and char == ':' and text.peek(5) == 'ref:`': + # translate reference + text.read(5) + ref = '' + while True: + char = text.peek(1) + if char == '`': + text.read(1) + break + if char == '\n': + text.read(1) + continue # merge line breaks in :ref:`...\n...` + ref += text.read(1) + try: + out.write(references[ref]) + except KeyError: + raise ValueError("Undefined reference in Archiver help: %r — please add reference substitution" + "to 'rst_plain_text_references'" % ref) + continue + if char == ':' and text.peek(2) == ':\n': # End of line code block + text.read(2) + state_hook(state, 'code-block', out) + state = 'code-block' + out.write(':\n') + continue + if text.peek(-2) in ('\n\n', '') and char == next == '.': + text.read(2) + directive, is_directive, arguments = text.readline().partition('::') + text.read(1) + if not is_directive: + # partition: if the separator is not in the text, the leftmost output is the entire input + if directive == 'nanorst: inline-fill': + inline_mode = 'fill' + elif directive == 'nanorst: inline-replace': + inline_mode = 'replace' + continue + process_directive(directive, arguments.strip(), out, state_hook) + continue + if state in inline_single and char == state: + state_hook(state, 'text', out) + state = 'text' + if inline_mode == 'fill': + out.write(2 * ' ') + continue + if state == '``' and char == next == '`': + state_hook(state, 'text', out) + state = 'text' + text.read(1) + if inline_mode == 'fill': + out.write(4 * ' ') + continue + if state == '**' and char == next == '*': + state_hook(state, 'text', out) + state = 'text' + text.read(1) + continue + if state == 'code-block' and char == next == '\n' and text.peek(5)[1:] != ' ': + # Foo:: + # + # *stuff* *code* *ignore .. all markup* + # + # More arcane stuff + # + # Regular text... + state_hook(state, 'text', out) + state = 'text' + out.write(char) + + assert state == 'text', 'Invalid final state %r (This usually indicates unmatched */**)' % state + return out.getvalue() + + +class RstToTextLazy: + def __init__(self, str, state_hook=None, references=None): + self.str = str + self.state_hook = state_hook + self.references = references + self._rst = None + + @property + def rst(self): + if self._rst is None: + self._rst = rst_to_text(self.str, self.state_hook, self.references) + return self._rst + + def __getattr__(self, item): + return getattr(self.rst, item) + + def __str__(self): + return self.rst + + def __add__(self, other): + return self.rst + other + + def __iter__(self): + return iter(self.rst) + + def __contains__(self, item): + return item in self.rst + + +def ansi_escapes(old_state, new_state, out): + if old_state == 'text' and new_state in ('*', '`', '``'): + out.write('\033[4m') + if old_state == 'text' and new_state == '**': + out.write('\033[1m') + if old_state in ('*', '`', '``', '**') and new_state == 'text': + out.write('\033[0m') + + +def rst_to_terminal(rst, references=None, destination=sys.stdout): + """ + Convert *rst* to a lazy string. + + If *destination* is a file-like object connected to a terminal, + enrich text with suitable ANSI escapes. Otherwise return plain text. + """ + if is_terminal(destination): + rst_state_hook = ansi_escapes + else: + rst_state_hook = None + return RstToTextLazy(rst, rst_state_hook, references) diff --git a/src/borg/paperkey.html b/src/borg/paperkey.html new file mode 100644 index 00000000..4e1e859b --- /dev/null +++ b/src/borg/paperkey.html @@ -0,0 +1,2441 @@ + + + + + + +BorgBackup Printable Key Template + + + + + + +
+
+

To create a printable key, either paste the contents of your keyfile or a key export in the text field + below, or select a key export file.

+

To create a key export use

borg key export /path/to/repository exportfile.txt

+

If you are using keyfile mode, keyfiles are usually stored in $HOME/.config/borg/keys/

+

You can edit the parts with light blue border in the print preview below by click into them.

+

Key security: This print template will never send anything to remote servers. But keep in mind, that printing + might involve computers that can store the printed image, for example with cloud printing services, or + networked printers.

+

+
+ + +
+
+ QR error correction: +
+ QR code size
+ Text size
+ Text columns +
+ +
+
+ + +
+
+ +
+
BorgBackup Printable Key Backup
+
To restore either scan the QR code below, decode it and import it using +
borg key import /path/to/repo scannedfile
+ +Or run +
borg key import --paper /path/to/repo
and type in the text below.

+
+
+
+
Notes:
+
+
+ + + + + \ No newline at end of file diff --git a/src/borg/patterns.py b/src/borg/patterns.py new file mode 100644 index 00000000..897c75e2 --- /dev/null +++ b/src/borg/patterns.py @@ -0,0 +1,396 @@ +import argparse +import fnmatch +import os.path +import re +import sys +import unicodedata +from collections import namedtuple +from enum import Enum + +from . import shellpattern +from .helpers import clean_lines + + +def parse_patternfile_line(line, roots, ie_commands, fallback): + """Parse a pattern-file line and act depending on which command it represents.""" + ie_command = parse_inclexcl_command(line, fallback=fallback) + if ie_command.cmd is IECommand.RootPath: + roots.append(ie_command.val) + elif ie_command.cmd is IECommand.PatternStyle: + fallback = ie_command.val + else: + # it is some kind of include/exclude command + ie_commands.append(ie_command) + return fallback + + +def load_pattern_file(fileobj, roots, ie_commands, fallback=None): + if fallback is None: + fallback = ShellPattern # ShellPattern is defined later in this module + for line in clean_lines(fileobj): + fallback = parse_patternfile_line(line, roots, ie_commands, fallback) + + +def load_exclude_file(fileobj, patterns): + for patternstr in clean_lines(fileobj): + patterns.append(parse_exclude_pattern(patternstr)) + + +class ArgparsePatternAction(argparse.Action): + def __init__(self, nargs=1, **kw): + super().__init__(nargs=nargs, **kw) + + def __call__(self, parser, args, values, option_string=None): + parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern) + + +class ArgparsePatternFileAction(argparse.Action): + def __init__(self, nargs=1, **kw): + super().__init__(nargs=nargs, **kw) + + def __call__(self, parser, args, values, option_string=None): + """Load and parse patterns from a file. + Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. + """ + filename = values[0] + with open(filename) as f: + self.parse(f, args) + + def parse(self, fobj, args): + load_pattern_file(fobj, args.paths, args.patterns) + + +class ArgparseExcludeFileAction(ArgparsePatternFileAction): + def parse(self, fobj, args): + load_exclude_file(fobj, args.patterns) + + +class PatternMatcher: + """Represents a collection of pattern objects to match paths against. + + *fallback* is a boolean value that *match()* returns if no matching patterns are found. + + """ + def __init__(self, fallback=None): + self._items = [] + + # Value to return from match function when none of the patterns match. + self.fallback = fallback + + # optimizations + self._path_full_patterns = {} # full path -> return value + + # indicates whether the last match() call ended on a pattern for which + # we should recurse into any matching folder. Will be set to True or + # False when calling match(). + self.recurse_dir = None + + # whether to recurse into directories when no match is found + # TODO: allow modification as a config option? + self.recurse_dir_default = True + + self.include_patterns = [] + + # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass? + self.is_include_cmd = { + IECommand.Exclude: False, + IECommand.ExcludeNoRecurse: False, + IECommand.Include: True + } + + def empty(self): + return not len(self._items) and not len(self._path_full_patterns) + + def _add(self, pattern, cmd): + """*cmd* is an IECommand value. + """ + if isinstance(pattern, PathFullPattern): + key = pattern.pattern # full, normalized path + self._path_full_patterns[key] = cmd + else: + self._items.append((pattern, cmd)) + + def add(self, patterns, cmd): + """Add list of patterns to internal list. *cmd* indicates whether the + pattern is an include/exclude pattern, and whether recursion should be + done on excluded folders. + """ + for pattern in patterns: + self._add(pattern, cmd) + + def add_includepaths(self, include_paths): + """Used to add inclusion-paths from args.paths (from commandline). + """ + include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths] + self.add(include_patterns, IECommand.Include) + self.fallback = not include_patterns + self.include_patterns = include_patterns + + def get_unmatched_include_patterns(self): + "Note that this only returns patterns added via *add_includepaths*." + return [p for p in self.include_patterns if p.match_count == 0] + + def add_inclexcl(self, patterns): + """Add list of patterns (of type CmdTuple) to internal list. + """ + for pattern, cmd in patterns: + self._add(pattern, cmd) + + def match(self, path): + """Return True or False depending on whether *path* is matched. + + If no match is found among the patterns in this matcher, then the value + in self.fallback is returned (defaults to None). + + """ + path = normalize_path(path) + # do a fast lookup for full path matches (note: we do not count such matches): + non_existent = object() + value = self._path_full_patterns.get(path, non_existent) + + if value is not non_existent: + # we have a full path match! + self.recurse_dir = command_recurses_dir(value) + return self.is_include_cmd[value] + + # this is the slow way, if we have many patterns in self._items: + for (pattern, cmd) in self._items: + if pattern.match(path, normalize=False): + self.recurse_dir = pattern.recurse_dir + return self.is_include_cmd[cmd] + + # by default we will recurse if there is no match + self.recurse_dir = self.recurse_dir_default + return self.fallback + + +def normalize_path(path): + """normalize paths for MacOS (but do nothing on other platforms)""" + # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match. + # Windows and Unix filesystems allow different forms, so users always have to enter an exact match. + return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path + + +class PatternBase: + """Shared logic for inclusion/exclusion patterns. + """ + PREFIX = NotImplemented + + def __init__(self, pattern, recurse_dir=False): + self.pattern_orig = pattern + self.match_count = 0 + pattern = normalize_path(pattern) + self._prepare(pattern) + self.recurse_dir = recurse_dir + + def match(self, path, normalize=True): + """Return a boolean indicating whether *path* is matched by this pattern. + + If normalize is True (default), the path will get normalized using normalize_path(), + otherwise it is assumed that it already is normalized using that function. + """ + if normalize: + path = normalize_path(path) + matches = self._match(path) + if matches: + self.match_count += 1 + return matches + + def __repr__(self): + return '%s(%s)' % (type(self), self.pattern) + + def __str__(self): + return self.pattern_orig + + def _prepare(self, pattern): + "Should set the value of self.pattern" + raise NotImplementedError + + def _match(self, path): + raise NotImplementedError + + +class PathFullPattern(PatternBase): + """Full match of a path.""" + PREFIX = "pf" + + def _prepare(self, pattern): + self.pattern = os.path.normpath(pattern) + + def _match(self, path): + return path == self.pattern + + +# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path +# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path +# separator to the end of the path before matching. + + +class PathPrefixPattern(PatternBase): + """Literal files or directories listed on the command line + for some operations (e.g. extract, but not create). + If a directory is specified, all paths that start with that + path match as well. A trailing slash makes no difference. + """ + PREFIX = "pp" + + def _prepare(self, pattern): + self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + + def _match(self, path): + return (path + os.path.sep).startswith(self.pattern) + + +class FnmatchPattern(PatternBase): + """Shell glob patterns to exclude. A trailing slash means to + exclude the contents of a directory, but not the directory itself. + """ + PREFIX = "fm" + + def _prepare(self, pattern): + if pattern.endswith(os.path.sep): + pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep + else: + pattern = os.path.normpath(pattern) + os.path.sep + '*' + + self.pattern = pattern + + # fnmatch and re.match both cache compiled regular expressions. + # Nevertheless, this is about 10 times faster. + self.regex = re.compile(fnmatch.translate(self.pattern)) + + def _match(self, path): + return (self.regex.match(path + os.path.sep) is not None) + + +class ShellPattern(PatternBase): + """Shell glob patterns to exclude. A trailing slash means to + exclude the contents of a directory, but not the directory itself. + """ + PREFIX = "sh" + + def _prepare(self, pattern): + sep = os.path.sep + + if pattern.endswith(sep): + pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep + else: + pattern = os.path.normpath(pattern) + sep + "**" + sep + "*" + + self.pattern = pattern + self.regex = re.compile(shellpattern.translate(self.pattern)) + + def _match(self, path): + return (self.regex.match(path + os.path.sep) is not None) + + +class RegexPattern(PatternBase): + """Regular expression to exclude. + """ + PREFIX = "re" + + def _prepare(self, pattern): + self.pattern = pattern + self.regex = re.compile(pattern) + + def _match(self, path): + # Normalize path separators + if os.path.sep != '/': + path = path.replace(os.path.sep, '/') + + return (self.regex.search(path) is not None) + + +_PATTERN_CLASSES = { + FnmatchPattern, + PathFullPattern, + PathPrefixPattern, + RegexPattern, + ShellPattern, +} + +_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES) + +CmdTuple = namedtuple('CmdTuple', 'val cmd') + + +class IECommand(Enum): + """A command that an InclExcl file line can represent. + """ + RootPath = 1 + PatternStyle = 2 + Include = 3 + Exclude = 4 + ExcludeNoRecurse = 5 + + +def command_recurses_dir(cmd): + # TODO?: raise error or return None if *cmd* is RootPath or PatternStyle + return cmd not in [IECommand.ExcludeNoRecurse] + + +def get_pattern_class(prefix): + try: + return _PATTERN_CLASS_BY_PREFIX[prefix] + except KeyError: + raise ValueError("Unknown pattern style: {}".format(prefix)) from None + + +def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True): + """Read pattern from string and return an instance of the appropriate implementation class. + + """ + if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum(): + (style, pattern) = (pattern[:2], pattern[3:]) + cls = get_pattern_class(style) + else: + cls = fallback + return cls(pattern, recurse_dir) + + +def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern): + """Read pattern from string and return an instance of the appropriate implementation class. + """ + epattern_obj = parse_pattern(pattern_str, fallback, recurse_dir=False) + return CmdTuple(epattern_obj, IECommand.ExcludeNoRecurse) + + +def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern): + """Read a --patterns-from command from string and return a CmdTuple object.""" + + cmd_prefix_map = { + '-': IECommand.Exclude, + '!': IECommand.ExcludeNoRecurse, + '+': IECommand.Include, + 'R': IECommand.RootPath, + 'r': IECommand.RootPath, + 'P': IECommand.PatternStyle, + 'p': IECommand.PatternStyle, + } + + try: + cmd = cmd_prefix_map[cmd_line_str[0]] + + # remaining text on command-line following the command character + remainder_str = cmd_line_str[1:].lstrip() + + if not remainder_str: + raise ValueError("Missing pattern/information!") + except (IndexError, KeyError, ValueError): + raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str)) + + if cmd is IECommand.RootPath: + # TODO: validate string? + val = remainder_str + elif cmd is IECommand.PatternStyle: + # then remainder_str is something like 're' or 'sh' + try: + val = get_pattern_class(remainder_str) + except ValueError: + raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str)) + else: + # determine recurse_dir based on command type + recurse_dir = command_recurses_dir(cmd) + val = parse_pattern(remainder_str, fallback, recurse_dir) + + return CmdTuple(val, cmd) diff --git a/src/borg/platform/__init__.py b/src/borg/platform/__init__.py index 05934895..b8bc58bd 100644 --- a/src/borg/platform/__init__.py +++ b/src/borg/platform/__init__.py @@ -8,20 +8,29 @@ Public APIs are documented in platform.base. from .base import acl_get, acl_set from .base import set_flags, get_flags -from .base import SyncFile, sync_dir, fdatasync +from .base import SaveFile, SyncFile, sync_dir, fdatasync, safe_fadvise from .base import swidth, API_VERSION +from .base import process_alive, get_process_id, local_pid_alive + +OS_API_VERSION = API_VERSION + +if not sys.platform.startswith(('win32', )): + from .posix import process_alive, get_process_id, local_pid_alive if sys.platform.startswith('linux'): # pragma: linux only + from .linux import API_VERSION as OS_API_VERSION from .linux import acl_get, acl_set from .linux import set_flags, get_flags from .linux import SyncFile - from .linux import swidth, API_VERSION + from .linux import swidth elif sys.platform.startswith('freebsd'): # pragma: freebsd only + from .freebsd import API_VERSION as OS_API_VERSION from .freebsd import acl_get, acl_set - from .freebsd import swidth, API_VERSION + from .freebsd import swidth elif sys.platform == 'darwin': # pragma: darwin only + from .darwin import API_VERSION as OS_API_VERSION from .darwin import acl_get, acl_set - from .darwin import swidth, API_VERSION + from .darwin import swidth elif sys.platform == 'win32': # pragma: windows only from .windows import acl_get, acl_set from .windows import API_VERSION @@ -29,3 +38,5 @@ elif sys.platform == 'win32': # pragma: windows only from .windows import get_owner, set_owner from .windows import get_ads from .windows import select + from .windows import get_process_id + from .windows import process_alive diff --git a/src/borg/platform/base.py b/src/borg/platform/base.py index ef8853e3..f56c03be 100644 --- a/src/borg/platform/base.py +++ b/src/borg/platform/base.py @@ -1,6 +1,8 @@ import errno import os +from borg.helpers import truncate_and_unlink + """ platform base module ==================== @@ -13,7 +15,7 @@ platform API: that way platform APIs provided by the platform-specific support m are correctly composed into the base functionality. """ -API_VERSION = 3 +API_VERSION = '1.1_02' fdatasync = getattr(os, 'fdatasync', os.fsync) @@ -34,6 +36,7 @@ def acl_set(path, item, numeric_owner=False): of the user/group names """ + try: from os import lchflags @@ -62,6 +65,22 @@ def sync_dir(path): os.close(fd) +def safe_fadvise(fd, offset, len, advice): + if hasattr(os, 'posix_fadvise'): + advice = getattr(os, 'POSIX_FADV_' + advice) + try: + os.posix_fadvise(fd, offset, len, advice) + except OSError: + # usually, posix_fadvise can't fail for us, but there seem to + # be failures when running borg under docker on ARM, likely due + # to a bug outside of borg. + # also, there is a python wrapper bug, always giving errno = 0. + # https://github.com/borgbackup/borg/issues/2095 + # as this call is not critical for correct function (just to + # optimize cache usage), we ignore these errors. + pass + + class SyncFile: """ A file class that is supposed to enable write ordering (one way or another) and data durability after close(). @@ -80,8 +99,9 @@ class SyncFile: TODO: A Windows implementation should use CreateFile with FILE_FLAG_WRITE_THROUGH. """ - def __init__(self, path): - self.fd = open(path, 'xb') + def __init__(self, path, binary=False): + mode = 'xb' if binary else 'x' + self.fd = open(path, mode) self.fileno = self.fd.fileno() def __enter__(self): @@ -101,15 +121,58 @@ class SyncFile: from .. import platform self.fd.flush() platform.fdatasync(self.fileno) - if hasattr(os, 'posix_fadvise'): - os.posix_fadvise(self.fileno, 0, 0, os.POSIX_FADV_DONTNEED) + # tell the OS that it does not need to cache what we just wrote, + # avoids spoiling the cache for the OS and other processes. + safe_fadvise(self.fileno, 0, 0, 'DONTNEED') def close(self): """sync() and close.""" from .. import platform - self.sync() + dirname = None + try: + dirname = os.path.dirname(self.fd.name) + self.sync() + finally: + self.fd.close() + if dirname: + platform.sync_dir(dirname) + + +class SaveFile: + """ + Update file contents atomically. + + Must be used as a context manager (defining the scope of the transaction). + + On a journaling file system the file contents are always updated + atomically and won't become corrupted, even on power failures or + crashes (for caveats see SyncFile). + """ + + SUFFIX = '.tmp' + + def __init__(self, path, binary=False): + self.binary = binary + self.path = path + self.tmppath = self.path + self.SUFFIX + + def __enter__(self): + from .. import platform + try: + truncate_and_unlink(self.tmppath) + except FileNotFoundError: + pass + self.fd = platform.SyncFile(self.tmppath, self.binary) + return self.fd + + def __exit__(self, exc_type, exc_val, exc_tb): + from .. import platform self.fd.close() - platform.sync_dir(os.path.dirname(self.fd.name)) + if exc_type is not None: + truncate_and_unlink(self.tmppath) + return + os.replace(self.tmppath, self.path) + platform.sync_dir(os.path.dirname(self.path)) def swidth(s): @@ -118,3 +181,23 @@ def swidth(s): For western scripts, this is just len(s), but for cjk glyphs, 2 cells are used. """ return len(s) + + +def get_process_id(): + """ + Return identification tuple (hostname, pid, thread_id) for 'us'. If this is a FUSE process, then the PID will be + that of the parent, not the forked FUSE child. + """ + raise NotImplementedError + + +def process_alive(host, pid, thread): + """ + Check if the (host, pid, thread_id) combination corresponds to a potentially alive process. + """ + raise NotImplementedError + + +def local_pid_alive(pid): + """Return whether *pid* is alive.""" + raise NotImplementedError diff --git a/src/borg/platform/darwin.pyx b/src/borg/platform/darwin.pyx index 188e5f4f..b7e439ab 100644 --- a/src/borg/platform/darwin.pyx +++ b/src/borg/platform/darwin.pyx @@ -4,7 +4,7 @@ from ..helpers import user2uid, group2gid from ..helpers import safe_decode, safe_encode from .posix import swidth -API_VERSION = 3 +API_VERSION = '1.1_02' cdef extern from "sys/acl.h": ctypedef struct _acl_t: diff --git a/src/borg/platform/freebsd.pyx b/src/borg/platform/freebsd.pyx index 0a02ed8b..3344de16 100644 --- a/src/borg/platform/freebsd.pyx +++ b/src/borg/platform/freebsd.pyx @@ -4,7 +4,7 @@ from ..helpers import posix_acl_use_stored_uid_gid from ..helpers import safe_encode, safe_decode from .posix import swidth -API_VERSION = 3 +API_VERSION = '1.1_02' cdef extern from "errno.h": int errno diff --git a/src/borg/platform/linux.pyx b/src/borg/platform/linux.pyx index 4bbdcc35..25f71fa1 100644 --- a/src/borg/platform/linux.pyx +++ b/src/borg/platform/linux.pyx @@ -1,18 +1,19 @@ import os import re -import resource import stat +import subprocess from ..helpers import posix_acl_use_stored_uid_gid from ..helpers import user2uid, group2gid from ..helpers import safe_decode, safe_encode from .base import SyncFile as BaseSyncFile +from .base import safe_fadvise from .posix import swidth from libc cimport errno from libc.stdint cimport int64_t -API_VERSION = 3 +API_VERSION = '1.1_02' cdef extern from "sys/types.h": int ACL_TYPE_ACCESS @@ -52,6 +53,10 @@ cdef extern from "linux/fs.h": cdef extern from "sys/ioctl.h": int ioctl(int fildes, int request, ...) +cdef extern from "unistd.h": + int _SC_PAGESIZE + long sysconf(int name) + cdef extern from "string.h": char *strerror(int errnum) @@ -67,8 +72,11 @@ BSD_TO_LINUX_FLAGS = { def set_flags(path, bsd_flags, fd=None): - if fd is None and stat.S_ISLNK(os.lstat(path).st_mode): - return + if fd is None: + st = os.stat(path, follow_symlinks=False) + if stat.S_ISBLK(st.st_mode) or stat.S_ISCHR(st.st_mode) or stat.S_ISLNK(st.st_mode): + # see comment in get_flags() + return cdef int flags = 0 for bsd_flag, linux_flag in BSD_TO_LINUX_FLAGS.items(): if bsd_flags & bsd_flag: @@ -87,6 +95,10 @@ def set_flags(path, bsd_flags, fd=None): def get_flags(path, st): + if stat.S_ISBLK(st.st_mode) or stat.S_ISCHR(st.st_mode) or stat.S_ISLNK(st.st_mode): + # avoid opening devices files - trying to open non-present devices can be rather slow. + # avoid opening symlinks, O_NOFOLLOW would make the open() fail anyway. + return 0 cdef int linux_flags try: fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) @@ -215,9 +227,9 @@ cdef _sync_file_range(fd, offset, length, flags): assert length & PAGE_MASK == 0, "length %d not page-aligned" % length if sync_file_range(fd, offset, length, flags) != 0: raise OSError(errno.errno, os.strerror(errno.errno)) - os.posix_fadvise(fd, offset, length, os.POSIX_FADV_DONTNEED) + safe_fadvise(fd, offset, length, 'DONTNEED') -cdef unsigned PAGE_MASK = resource.getpagesize() - 1 +cdef unsigned PAGE_MASK = sysconf(_SC_PAGESIZE) - 1 class SyncFile(BaseSyncFile): @@ -228,8 +240,8 @@ class SyncFile(BaseSyncFile): disk in the immediate future. """ - def __init__(self, path): - super().__init__(path) + def __init__(self, path, binary=False): + super().__init__(path, binary) self.offset = 0 self.write_window = (16 * 1024 ** 2) & ~PAGE_MASK self.last_sync = 0 @@ -250,4 +262,6 @@ class SyncFile(BaseSyncFile): def sync(self): self.fd.flush() os.fdatasync(self.fileno) - os.posix_fadvise(self.fileno, 0, 0, os.POSIX_FADV_DONTNEED) + # tell the OS that it does not need to cache what we just wrote, + # avoids spoiling the cache for the OS and other processes. + safe_fadvise(self.fileno, 0, 0, 'DONTNEED') diff --git a/src/borg/platform/posix.pyx b/src/borg/platform/posix.pyx index 8d74f19e..0144e1aa 100644 --- a/src/borg/platform/posix.pyx +++ b/src/borg/platform/posix.pyx @@ -1,6 +1,14 @@ +import errno +import os +import uuid +import socket +import subprocess + + cdef extern from "wchar.h": cdef int wcswidth(const Py_UNICODE *str, size_t n) - + + def swidth(s): str_len = len(s) terminal_width = wcswidth(s, str_len) @@ -8,3 +16,57 @@ def swidth(s): return terminal_width else: return str_len + + +# for performance reasons, only determine the hostname once. +# XXX this sometimes requires live internet access for issuing a DNS query in the background. +_hostname = '%s@%s' % (socket.getfqdn(), uuid.getnode()) + + +def get_process_id(): + """ + Return identification tuple (hostname, pid, thread_id) for 'us'. + This always returns the current pid, which might be different from before, e.g. if daemonize() was used. + + Note: Currently thread_id is *always* zero. + """ + thread_id = 0 + pid = os.getpid() + return _hostname, pid, thread_id + + +def process_alive(host, pid, thread): + """ + Check if the (host, pid, thread_id) combination corresponds to a potentially alive process. + + If the process is local, then this will be accurate. If the process is not local, then this + returns always True, since there is no real way to check. + """ + from . import local_pid_alive + + if host != _hostname: + return True + + if thread != 0: + # Currently thread is always 0, if we ever decide to set this to a non-zero value, + # this code needs to be revisited, too, to do a sensible thing + return True + + return local_pid_alive(pid) + + +def local_pid_alive(pid): + """Return whether *pid* is alive.""" + try: + # This doesn't work on Windows. + # This does not kill anything, 0 means "see if we can send a signal to this process or not". + # Possible errors: No such process (== stale lock) or permission denied (not a stale lock). + # If the exception is not raised that means such a pid is valid and we can send a signal to it. + os.kill(pid, 0) + return True + except OSError as err: + if err.errno == errno.ESRCH: + # ESRCH = no such process + return False + # Any other error (eg. permissions) means that the process ID refers to a live process. + return True diff --git a/src/borg/platform/windows.pyx b/src/borg/platform/windows.pyx index 1cf9004b..1babd58c 100644 --- a/src/borg/platform/windows.pyx +++ b/src/borg/platform/windows.pyx @@ -99,6 +99,8 @@ cdef extern from 'windows.h': BOOL OpenThreadToken(HANDLE, DWORD, BOOL, HANDLE*) BOOL LookupPrivilegeValueW(wchar_t*, wchar_t*, _LUID*) BOOL AdjustTokenPrivileges(HANDLE, BOOL, _TOKEN_PRIVILEGES*, DWORD, _TOKEN_PRIVILEGES*, DWORD*) + + HANDLE OpenProcess(DWORD dwDesiredAccess, BOOL bInheritHandle, DWORD dwProcessId) HANDLE GetCurrentThread() HANDLE GetCurrentProcess() @@ -134,6 +136,8 @@ cdef extern from 'windows.h': cdef extern int TOKEN_ADJUST_PRIVILEGES cdef extern int TOKEN_QUERY + + cdef extern int PROCESS_QUERY_INFORMATION cdef extern from 'accctrl.h': ctypedef enum _SE_OBJECT_TYPE: @@ -567,3 +571,16 @@ def select(rlist, wlist, xlist, timeout=0): if size.value > 0: retRlist.append(pipe) return retRlist, wlist, retXlist + + +def get_process_id(): + #return hostname, pid, thread_id + return platform.node().lower(), os.getpid(), 0 + + +def process_alive(host, pid, thread): + if host.lower() != platform.node().lower(): + return True + return (OpenProcess(PROCESS_QUERY_INFORMATION, False, pid) != NULL) + + \ No newline at end of file diff --git a/src/borg/remote.py b/src/borg/remote.py index 4f2bfbc1..ac4ec961 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -4,35 +4,78 @@ if sys.platform != 'win32': import fcntl else: import shutil +import functools +import inspect +import json import logging import os import select import shlex +import shutil +import struct import sys import tempfile +import textwrap +import time +import traceback from subprocess import Popen, PIPE import msgpack from . import __version__ +from .compress import LZ4 +from .constants import * # NOQA from .helpers import Error, IntegrityError -from .helpers import get_home_dir -from .helpers import sysinfo from .helpers import bin_to_hex +from .helpers import get_home_dir +from .helpers import get_limited_unpacker +from .helpers import hostname_is_unique +from .helpers import replace_placeholders +from .helpers import sysinfo +from .helpers import format_file_size +from .helpers import truncate_and_unlink +from .helpers import prepare_subprocess_env +from .logger import create_logger, setup_logging from .repository import Repository +from .version import parse_version, format_version +from .algorithms.checksums import xxh64 if sys.platform == 'win32': from .platform import select as windowsSelect select.select = windowsSelect + class NoSSHClient(Error): """Could not find supported ssh client. Supported clients are {}.""" -RPC_PROTOCOL_VERSION = 2 +logger = create_logger(__name__) -BUFSIZE = 10 * 1024 * 1024 +RPC_PROTOCOL_VERSION = 2 +BORG_VERSION = parse_version(__version__) +MSGID, MSG, ARGS, RESULT = b'i', b'm', b'a', b'r' MAX_INFLIGHT = 100 +RATELIMIT_PERIOD = 0.1 + + +def os_write(fd, data): + """os.write wrapper so we do not lose data for partial writes.""" + # TODO: this issue is fixed in cygwin since at least 2.8.0, remove this + # wrapper / workaround when this version is considered ancient. + # This is happening frequently on cygwin due to its small pipe buffer size of only 64kiB + # and also due to its different blocking pipe behaviour compared to Linux/*BSD. + # Neither Linux nor *BSD ever do partial writes on blocking pipes, unless interrupted by a + # signal, in which case serve() would terminate. + amount = remaining = len(data) + while remaining: + count = os.write(fd, data) + remaining -= count + if not remaining: + break + data = data[count:] + time.sleep(count * 1e-09) + return amount + class ConnectionClosed(Error): """Connection closed by remote host""" @@ -43,13 +86,75 @@ class ConnectionClosedWithHint(ConnectionClosed): class PathNotAllowed(Error): - """Repository path not allowed""" + """Repository path not allowed: {}""" class InvalidRPCMethod(Error): """RPC method {} is not valid""" +class UnexpectedRPCDataFormatFromClient(Error): + """Borg {}: Got unexpected RPC data format from client.""" + + +class UnexpectedRPCDataFormatFromServer(Error): + """Got unexpected RPC data format from server:\n{}""" + + def __init__(self, data): + try: + data = data.decode()[:128] + except UnicodeDecodeError: + data = data[:128] + data = ['%02X' % byte for byte in data] + data = textwrap.fill(' '.join(data), 16 * 3) + super().__init__(data) + + +# Protocol compatibility: +# In general the server is responsible for rejecting too old clients and the client it responsible for rejecting +# too old servers. This ensures that the knowledge what is compatible is always held by the newer component. +# +# The server can do checks for the client version in RepositoryServer.negotiate. If the client_data is 2 then +# client is in the version range [0.29.0, 1.0.x] inclusive. For newer clients client_data is a dict which contains +# client_version. +# +# For the client the return of the negotiate method is either 2 if the server is in the version range [0.29.0, 1.0.x] +# inclusive, or it is a dict which includes the server version. +# +# All method calls on the remote repository object must be whitelisted in RepositoryServer.rpc_methods and have api +# stubs in RemoteRepository. The @api decorator on these stubs is used to set server version requirements. +# +# Method parameters are identified only by name and never by position. Unknown parameters are ignored by the server side. +# If a new parameter is important and may not be ignored, on the client a parameter specific version requirement needs +# to be added. +# When parameters are removed, they need to be preserved as defaulted parameters on the client stubs so that older +# servers still get compatible input. + + +compatMap = { + 'check': ('repair', 'save_space', ), + 'commit': ('save_space', ), + 'rollback': (), + 'destroy': (), + '__len__': (), + 'list': ('limit', 'marker', ), + 'put': ('id', 'data', ), + 'get': ('id', ), + 'delete': ('id', ), + 'save_key': ('keydata', ), + 'load_key': (), + 'break_lock': (), + 'negotiate': ('client_data', ), + 'open': ('path', 'create', 'lock_wait', 'lock', 'exclusive', 'append_only', ), + 'get_free_nonce': (), + 'commit_nonce_reservation': ('next_unreserved', 'start_nonce', ), +} + + +def decode_keys(d): + return {k.decode(): d[k] for k in d} + + class RepositoryServer: # pragma: no cover rpc_methods = ( '__len__', @@ -59,6 +164,7 @@ class RepositoryServer: # pragma: no cover 'destroy', 'get', 'list', + 'scan', 'negotiate', 'open', 'put', @@ -66,41 +172,76 @@ class RepositoryServer: # pragma: no cover 'save_key', 'load_key', 'break_lock', + 'get_free_nonce', + 'commit_nonce_reservation', + 'inject_exception', ) - def __init__(self, restrict_to_paths, append_only): + def __init__(self, restrict_to_paths, restrict_to_repositories, append_only, storage_quota): self.repository = None self.restrict_to_paths = restrict_to_paths + self.restrict_to_repositories = restrict_to_repositories + # This flag is parsed from the serve command line via Archiver.do_serve, + # i.e. it reflects local system policy and generally ranks higher than + # whatever the client wants, except when initializing a new repository + # (see RepositoryServer.open below). self.append_only = append_only + self.storage_quota = storage_quota + self.client_version = parse_version('1.0.8') # fallback version if client is too old to send version information + + def positional_to_named(self, method, argv): + """Translate from positional protocol to named protocol.""" + try: + return {name: argv[pos] for pos, name in enumerate(compatMap[method])} + except IndexError: + if method == 'open' and len(argv) == 4: + # borg clients < 1.0.7 use open() with 4 args + mapping = compatMap[method][:4] + else: + raise + return {name: argv[pos] for pos, name in enumerate(mapping)} + + def filter_args(self, f, kwargs): + """Remove unknown named parameters from call, because client did (implicitly) say it's ok.""" + known = set(inspect.signature(f).parameters) + return {name: kwargs[name] for name in kwargs if name in known} def serve(self): stdin_fd = sys.stdin.fileno() stdout_fd = sys.stdout.fileno() stderr_fd = sys.stdout.fileno() - # Make stdin non-blocking - fl = fcntl.fcntl(stdin_fd, fcntl.F_GETFL) - fcntl.fcntl(stdin_fd, fcntl.F_SETFL, fl | os.O_NONBLOCK) - # Make stdout blocking - fl = fcntl.fcntl(stdout_fd, fcntl.F_GETFL) - fcntl.fcntl(stdout_fd, fcntl.F_SETFL, fl & ~os.O_NONBLOCK) - # Make stderr blocking - fl = fcntl.fcntl(stderr_fd, fcntl.F_GETFL) - fcntl.fcntl(stderr_fd, fcntl.F_SETFL, fl & ~os.O_NONBLOCK) - unpacker = msgpack.Unpacker(use_list=False) + os.set_blocking(stdin_fd, False) + os.set_blocking(stdout_fd, True) + os.set_blocking(stderr_fd, True) + unpacker = get_limited_unpacker('server') while True: r, w, es = select.select([stdin_fd], [], [], 10) if r: data = os.read(stdin_fd, BUFSIZE) if not data: - self.repository.close() + if self.repository is not None: + self.repository.close() + else: + os_write(stderr_fd, 'Borg {}: Got connection close before repository was opened.\n' + .format(__version__).encode()) return unpacker.feed(data) for unpacked in unpacker: - if not (isinstance(unpacked, tuple) and len(unpacked) == 4): - self.repository.close() - raise Exception("Unexpected RPC data format.") - type, msgid, method, args = unpacked - method = method.decode('ascii') + if isinstance(unpacked, dict): + dictFormat = True + msgid = unpacked[MSGID] + method = unpacked[MSG].decode() + args = decode_keys(unpacked[ARGS]) + elif isinstance(unpacked, tuple) and len(unpacked) == 4: + dictFormat = False + # The first field 'type' was always 1 and has always been ignored + _, msgid, method, args = unpacked + method = method.decode() + args = self.positional_to_named(method, args) + else: + if self.repository is not None: + self.repository.close() + raise UnexpectedRPCDataFormatFromClient(__version__) try: if method not in self.rpc_methods: raise InvalidRPCMethod(method) @@ -108,94 +249,379 @@ class RepositoryServer: # pragma: no cover f = getattr(self, method) except AttributeError: f = getattr(self.repository, method) - res = f(*args) + args = self.filter_args(f, args) + res = f(**args) except BaseException as e: - # These exceptions are reconstructed on the client end in RemoteRepository.call_many(), - # and will be handled just like locally raised exceptions. Suppress the remote traceback - # for these, except ErrorWithTraceback, which should always display a traceback. - if not isinstance(e, (Repository.DoesNotExist, Repository.AlreadyExists, PathNotAllowed)): - logging.exception('Borg %s: exception in RPC call:', __version__) - logging.error(sysinfo()) - exc = "Remote Exception (see remote log for the traceback)" - os.write(stdout_fd, msgpack.packb((1, msgid, e.__class__.__name__, exc))) + if dictFormat: + ex_short = traceback.format_exception_only(e.__class__, e) + ex_full = traceback.format_exception(*sys.exc_info()) + ex_trace = True + if isinstance(e, Error): + ex_short = [e.get_message()] + ex_trace = e.traceback + if isinstance(e, (Repository.DoesNotExist, Repository.AlreadyExists, PathNotAllowed)): + # These exceptions are reconstructed on the client end in RemoteRepository.call_many(), + # and will be handled just like locally raised exceptions. Suppress the remote traceback + # for these, except ErrorWithTraceback, which should always display a traceback. + pass + else: + logging.debug('\n'.join(ex_full)) + + try: + msg = msgpack.packb({MSGID: msgid, + b'exception_class': e.__class__.__name__, + b'exception_args': e.args, + b'exception_full': ex_full, + b'exception_short': ex_short, + b'exception_trace': ex_trace, + b'sysinfo': sysinfo()}) + except TypeError: + msg = msgpack.packb({MSGID: msgid, + b'exception_class': e.__class__.__name__, + b'exception_args': [x if isinstance(x, (str, bytes, int)) else None + for x in e.args], + b'exception_full': ex_full, + b'exception_short': ex_short, + b'exception_trace': ex_trace, + b'sysinfo': sysinfo()}) + + os_write(stdout_fd, msg) + else: + if isinstance(e, (Repository.DoesNotExist, Repository.AlreadyExists, PathNotAllowed)): + # These exceptions are reconstructed on the client end in RemoteRepository.call_many(), + # and will be handled just like locally raised exceptions. Suppress the remote traceback + # for these, except ErrorWithTraceback, which should always display a traceback. + pass + else: + if isinstance(e, Error): + tb_log_level = logging.ERROR if e.traceback else logging.DEBUG + msg = e.get_message() + else: + tb_log_level = logging.ERROR + msg = '%s Exception in RPC call' % e.__class__.__name__ + tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) + logging.error(msg) + logging.log(tb_log_level, tb) + exc = 'Remote Exception (see remote log for the traceback)' + os_write(stdout_fd, msgpack.packb((1, msgid, e.__class__.__name__, exc))) else: - os.write(stdout_fd, msgpack.packb((1, msgid, None, res))) + if dictFormat: + os_write(stdout_fd, msgpack.packb({MSGID: msgid, RESULT: res})) + else: + os_write(stdout_fd, msgpack.packb((1, msgid, None, res))) if es: self.repository.close() return - def negotiate(self, versions): - return RPC_PROTOCOL_VERSION + def negotiate(self, client_data): + # old format used in 1.0.x + if client_data == RPC_PROTOCOL_VERSION: + return RPC_PROTOCOL_VERSION + # clients since 1.1.0b3 use a dict as client_data + # clients since 1.1.0b6 support json log format from server + if isinstance(client_data, dict): + self.client_version = client_data[b'client_version'] + level = logging.getLevelName(logging.getLogger('').level) + setup_logging(is_serve=True, json=True, level=level) + logger.debug('Initialized logging system for JSON-based protocol') + else: + self.client_version = BORG_VERSION # seems to be newer than current version (no known old format) - def open(self, path, create=False, lock_wait=None, lock=True): - path = os.fsdecode(path) - if path.startswith('/~'): + # not a known old format, send newest negotiate this version knows + return {'server_version': BORG_VERSION} + + def _resolve_path(self, path): + if isinstance(path, bytes): + path = os.fsdecode(path) + # Leading slash is always present with URI (ssh://), but not with short-form (who@host:path). + if path.startswith('/~/'): # /~/x = path x relative to home dir + path = os.path.join(get_home_dir(), path[3:]) + elif path.startswith('~/'): path = os.path.join(get_home_dir(), path[2:]) - path = os.path.realpath(path) + elif path.startswith('/~'): # /~username/x = relative to "user" home dir + path = os.path.expanduser(path[1:]) + elif path.startswith('~'): + path = os.path.expanduser(path) + elif path.startswith('/./'): # /./x = path x relative to cwd + path = path[3:] + return os.path.realpath(path) + + def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, append_only=False): + logging.debug('Resolving repository path %r', path) + path = self._resolve_path(path) + logging.debug('Resolved repository path to %r', path) + path_with_sep = os.path.join(path, '') # make sure there is a trailing slash (os.sep) if self.restrict_to_paths: + # if --restrict-to-path P is given, we make sure that we only operate in/below path P. + # for the prefix check, it is important that the compared paths both have trailing slashes, + # so that a path /foobar will NOT be accepted with --restrict-to-path /foo option. for restrict_to_path in self.restrict_to_paths: - if path.startswith(os.path.realpath(restrict_to_path)): + restrict_to_path_with_sep = os.path.join(os.path.realpath(restrict_to_path), '') # trailing slash + if path_with_sep.startswith(restrict_to_path_with_sep): break else: raise PathNotAllowed(path) - self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock, append_only=self.append_only) + if self.restrict_to_repositories: + for restrict_to_repository in self.restrict_to_repositories: + restrict_to_repository_with_sep = os.path.join(os.path.realpath(restrict_to_repository), '') + if restrict_to_repository_with_sep == path_with_sep: + break + else: + raise PathNotAllowed(path) + # "borg init" on "borg serve --append-only" (=self.append_only) does not create an append only repo, + # while "borg init --append-only" (=append_only) does, regardless of the --append-only (self.append_only) + # flag for serve. + append_only = (not create and self.append_only) or append_only + self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock, + append_only=append_only, + storage_quota=self.storage_quota, + exclusive=exclusive) self.repository.__enter__() # clean exit handled by serve() method return self.repository.id + def inject_exception(self, kind): + kind = kind.decode() + s1 = 'test string' + s2 = 'test string2' + if kind == 'DoesNotExist': + raise Repository.DoesNotExist(s1) + elif kind == 'AlreadyExists': + raise Repository.AlreadyExists(s1) + elif kind == 'CheckNeeded': + raise Repository.CheckNeeded(s1) + elif kind == 'IntegrityError': + raise IntegrityError(s1) + elif kind == 'PathNotAllowed': + raise PathNotAllowed('foo') + elif kind == 'ObjectNotFound': + raise Repository.ObjectNotFound(s1, s2) + elif kind == 'InvalidRPCMethod': + raise InvalidRPCMethod(s1) + elif kind == 'divide': + 0 // 0 + + +class SleepingBandwidthLimiter: + def __init__(self, limit): + if limit: + self.ratelimit = int(limit * RATELIMIT_PERIOD) + self.ratelimit_last = time.monotonic() + self.ratelimit_quota = self.ratelimit + else: + self.ratelimit = None + + def write(self, fd, to_send): + if self.ratelimit: + now = time.monotonic() + if self.ratelimit_last + RATELIMIT_PERIOD <= now: + self.ratelimit_quota += self.ratelimit + if self.ratelimit_quota > 2 * self.ratelimit: + self.ratelimit_quota = 2 * self.ratelimit + self.ratelimit_last = now + if self.ratelimit_quota == 0: + tosleep = self.ratelimit_last + RATELIMIT_PERIOD - now + time.sleep(tosleep) + self.ratelimit_quota += self.ratelimit + self.ratelimit_last = time.monotonic() + if len(to_send) > self.ratelimit_quota: + to_send = to_send[:self.ratelimit_quota] + written = os.write(fd, to_send) + if self.ratelimit: + self.ratelimit_quota -= written + return written + + +def api(*, since, **kwargs_decorator): + """Check version requirements and use self.call to do the remote method call. + + specifies the version in which borg introduced this method, + calling this method when connected to an older version will fail without transmiting + anything to the server. + + Further kwargs can be used to encode version specific restrictions. + If a previous hardcoded behaviour is parameterized in a version, this allows calls that + use the previously hardcoded behaviour to pass through and generates an error if another + behaviour is requested by the client. + + e.g. when 'append_only' was introduced in 1.0.7 the previous behaviour was what now is append_only=False. + Thus @api(..., append_only={'since': parse_version('1.0.7'), 'previously': False}) allows calls + with append_only=False for all version but rejects calls using append_only=True on versions older than 1.0.7. + """ + def decorator(f): + @functools.wraps(f) + def do_rpc(self, *args, **kwargs): + sig = inspect.signature(f) + bound_args = sig.bind(self, *args, **kwargs) + named = {} # Arguments for the remote process + extra = {} # Arguments for the local process + for name, param in sig.parameters.items(): + if name == 'self': + continue + if name in bound_args.arguments: + if name == 'wait': + extra[name] = bound_args.arguments[name] + else: + named[name] = bound_args.arguments[name] + else: + if param.default is not param.empty: + named[name] = param.default + + if self.server_version < since: + raise self.RPCServerOutdated(f.__name__, format_version(since)) + + for name, restriction in kwargs_decorator.items(): + if restriction['since'] <= self.server_version: + continue + if 'previously' in restriction and named[name] == restriction['previously']: + continue + + raise self.RPCServerOutdated("{0} {1}={2!s}".format(f.__name__, name, named[name]), + format_version(restriction['since'])) + + return self.call(f.__name__, named, **extra) + return do_rpc + return decorator + class RemoteRepository: extra_test_args = [] class RPCError(Exception): - def __init__(self, name): - self.name = name + def __init__(self, unpacked): + # for borg < 1.1: unpacked only has b'exception_class' as key + # for borg 1.1+: unpacked has keys: b'exception_args', b'exception_full', b'exception_short', b'sysinfo' + self.unpacked = unpacked - def __init__(self, location, create=False, lock_wait=None, lock=True, args=None): + def get_message(self): + if b'exception_short' in self.unpacked: + return b'\n'.join(self.unpacked[b'exception_short']).decode() + else: + return self.exception_class + + @property + def traceback(self): + return self.unpacked.get(b'exception_trace', True) + + @property + def exception_class(self): + return self.unpacked[b'exception_class'].decode() + + @property + def exception_full(self): + if b'exception_full' in self.unpacked: + return b'\n'.join(self.unpacked[b'exception_full']).decode() + else: + return self.get_message() + '\nRemote Exception (see remote log for the traceback)' + + @property + def sysinfo(self): + if b'sysinfo' in self.unpacked: + return self.unpacked[b'sysinfo'].decode() + else: + return '' + + class RPCServerOutdated(Error): + """Borg server is too old for {}. Required version {}""" + + @property + def method(self): + return self.args[0] + + @property + def required_version(self): + return self.args[1] + + # If compatibility with 1.0.x is not longer needed, replace all checks of this with True and simplify the code + dictFormat = False # outside of __init__ for testing of legacy free protocol + + def __init__(self, location, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False, args=None): self.location = self._location = location self.preload_ids = [] self.msgid = 0 + self.rx_bytes = 0 + self.tx_bytes = 0 self.to_send = b'' - self.cache = {} + self.stderr_received = b'' # incomplete stderr line bytes received (no \n yet) + self.chunkid_to_msgids = {} self.ignore_responses = set() self.responses = {} - self.unpacker = msgpack.Unpacker(use_list=False) + self.async_responses = {} + self.shutdown_time = None + self.ratelimit = SleepingBandwidthLimiter(args.remote_ratelimit * 1024 if args and args.remote_ratelimit else 0) + self.unpacker = get_limited_unpacker('client') + self.server_version = parse_version('1.0.8') # fallback version if server is too old to send version information self.p = None testing = location.host == '__testsuite__' + # when testing, we invoke and talk to a borg process directly (no ssh). + # when not testing, we invoke the system-installed ssh binary to talk to a remote borg. + env = prepare_subprocess_env(system=not testing) borg_cmd = self.borg_cmd(args, testing) - env = dict(os.environ) if not testing: borg_cmd = self.ssh_cmd(location) + borg_cmd - # pyinstaller binary adds LD_LIBRARY_PATH=/tmp/_ME... but we do not want - # that the system's ssh binary picks up (non-matching) libraries from there - env.pop('LD_LIBRARY_PATH', None) - env.pop('BORG_PASSPHRASE', None) # security: do not give secrets to subprocess + logger.debug('SSH command line: %s', borg_cmd) self.p = Popen(borg_cmd, bufsize=0, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() self.stderr_fd = self.p.stderr.fileno() if sys.platform != 'win32': - fcntl.fcntl(self.stdin_fd, fcntl.F_SETFL, fcntl.fcntl(self.stdin_fd, fcntl.F_GETFL) | os.O_NONBLOCK) - fcntl.fcntl(self.stdout_fd, fcntl.F_SETFL, fcntl.fcntl(self.stdout_fd, fcntl.F_GETFL) | os.O_NONBLOCK) - fcntl.fcntl(self.stderr_fd, fcntl.F_SETFL, fcntl.fcntl(self.stderr_fd, fcntl.F_GETFL) | os.O_NONBLOCK) + os.set_blocking(self.stdin_fd, False) + os.set_blocking(self.stdout_fd, False) + os.set_blocking(self.stderr_fd, False) self.r_fds = [self.stdout_fd, self.stderr_fd] self.x_fds = [self.stdin_fd, self.stdout_fd, self.stderr_fd] try: - version = self.call('negotiate', RPC_PROTOCOL_VERSION) - except ConnectionClosed: - raise ConnectionClosedWithHint('Is borg working on the server?') from None - if version != RPC_PROTOCOL_VERSION: - raise Exception('Server insisted on using unsupported protocol version %d' % version) - try: - self.id = self.call('open', self.location.path, create, lock_wait, lock) + try: + version = self.call('negotiate', {'client_data': { + b'client_version': BORG_VERSION, + }}) + except ConnectionClosed: + raise ConnectionClosedWithHint('Is borg working on the server?') from None + if version == RPC_PROTOCOL_VERSION: + self.dictFormat = False + elif isinstance(version, dict) and b'server_version' in version: + self.dictFormat = True + self.server_version = version[b'server_version'] + else: + raise Exception('Server insisted on using unsupported protocol version %s' % version) + + def do_open(): + self.id = self.open(path=self.location.path, create=create, lock_wait=lock_wait, + lock=lock, exclusive=exclusive, append_only=append_only) + + if self.dictFormat: + do_open() + else: + # Ugly detection of versions prior to 1.0.7: If open throws it has to be 1.0.6 or lower + try: + do_open() + except self.RPCError as err: + if err.exception_class != 'TypeError': + raise + msg = """\ +Please note: +If you see a TypeError complaining about the number of positional arguments +given to open(), you can ignore it if it comes from a borg version < 1.0.7. +This TypeError is a cosmetic side effect of the compatibility code borg +clients >= 1.0.7 have to support older borg servers. +This problem will go away as soon as the server has been upgraded to 1.0.7+. +""" + # emit this msg in the same way as the 'Remote: ...' lines that show the remote TypeError + sys.stderr.write(msg) + self.server_version = parse_version('1.0.6') + compatMap['open'] = ('path', 'create', 'lock_wait', 'lock', ), + # try again with corrected version and compatMap + do_open() except Exception: self.close() raise def __del__(self): + if len(self.responses): + logging.debug('still %d cached responses left in RemoteRepository' % (len(self.responses),)) if self.p: self.close() - assert False, "cleanup happened in Repository.__del__" + assert False, 'cleanup happened in Repository.__del__' def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.location.canonical_path()) @@ -206,11 +632,14 @@ class RemoteRepository: def __exit__(self, exc_type, exc_val, exc_tb): try: if exc_type is not None: + self.shutdown_time = time.monotonic() + 30 self.rollback() finally: # in any case, we want to cleanly close the repo, even if the # rollback can not succeed (e.g. because the connection was # already closed) and raised another exception: + logger.debug('RemoteRepository: %s bytes sent, %s bytes received, %d messages sent', + format_file_size(self.tx_bytes), format_file_size(self.rx_bytes), self.msgid) self.close() @property @@ -219,7 +648,7 @@ class RemoteRepository: def borg_cmd(self, args, testing): """return a borg serve command line""" - # give some args/options to "borg serve" process as they were given to us + # give some args/options to 'borg serve' process as they were given to us opts = [] if args is not None: opts.append('--umask=%03o' % args.umask) @@ -236,11 +665,35 @@ class RemoteRepository: opts.append('--critical') else: raise ValueError('log level missing, fix this code') + + # Tell the remote server about debug topics it may need to consider. + # Note that debug topics are usable for "spew" or "trace" logs which would + # be too plentiful to transfer for normal use, so the server doesn't send + # them unless explicitly enabled. + # + # Needless to say, if you do --debug-topic=repository.compaction, for example, + # with a 1.0.x server it won't work, because the server does not recognize the + # option. + # + # This is not considered a problem, since this is a debugging feature that + # should not be used for regular use. + for topic in args.debug_topics: + if '.' not in topic: + topic = 'borg.debug.' + topic + if 'repository' in topic: + opts.append('--debug-topic=%s' % topic) + + if 'storage_quota' in args and args.storage_quota: + opts.append('--storage-quota=%s' % args.storage_quota) + env_vars = [] + if not hostname_is_unique(): + env_vars.append('BORG_HOSTNAME_IS_UNIQUE=no') if testing: - return [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args + return env_vars + [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args else: # pragma: no cover remote_path = args.remote_path or os.environ.get('BORG_REMOTE_PATH', 'borg') - return [remote_path, 'serve'] + opts + remote_path = replace_placeholders(remote_path) + return env_vars + [remote_path, 'serve'] + opts def ssh_cmd(self, location): """return a ssh command line that can be prefixed to a borg command line""" @@ -268,53 +721,99 @@ class RemoteRepository: args.append('%s' % location.host) return args - def call(self, cmd, *args, **kw): + def named_to_positional(self, method, kwargs): + return [kwargs[name] for name in compatMap[method]] + + def call(self, cmd, args, **kw): for resp in self.call_many(cmd, [args], **kw): return resp - def call_many(self, cmd, calls, wait=True, is_preloaded=False): - if not calls: + def call_many(self, cmd, calls, wait=True, is_preloaded=False, async_wait=True): + if not calls and cmd != 'async_responses': return - def fetch_from_cache(args): - msgid = self.cache[args].pop(0) - if not self.cache[args]: - del self.cache[args] + def pop_preload_msgid(chunkid): + msgid = self.chunkid_to_msgids[chunkid].pop(0) + if not self.chunkid_to_msgids[chunkid]: + del self.chunkid_to_msgids[chunkid] return msgid - def handle_error(error, res): - if error == b'DoesNotExist': + def handle_error(unpacked): + error = unpacked[b'exception_class'].decode() + old_server = b'exception_args' not in unpacked + args = unpacked.get(b'exception_args') + + if error == 'DoesNotExist': raise Repository.DoesNotExist(self.location.orig) - elif error == b'AlreadyExists': + elif error == 'AlreadyExists': raise Repository.AlreadyExists(self.location.orig) - elif error == b'CheckNeeded': + elif error == 'CheckNeeded': raise Repository.CheckNeeded(self.location.orig) - elif error == b'IntegrityError': - raise IntegrityError(res) - elif error == b'PathNotAllowed': - raise PathNotAllowed(*res) - elif error == b'ObjectNotFound': - raise Repository.ObjectNotFound(res[0], self.location.orig) - elif error == b'InvalidRPCMethod': - raise InvalidRPCMethod(*res) + elif error == 'IntegrityError': + if old_server: + raise IntegrityError('(not available)') + else: + raise IntegrityError(args[0].decode()) + elif error == 'AtticRepository': + if old_server: + raise Repository.AtticRepository('(not available)') + else: + raise Repository.AtticRepository(args[0].decode()) + elif error == 'PathNotAllowed': + if old_server: + raise PathNotAllowed('(unknown)') + else: + raise PathNotAllowed(args[0].decode()) + elif error == 'ObjectNotFound': + if old_server: + raise Repository.ObjectNotFound('(not available)', self.location.orig) + else: + raise Repository.ObjectNotFound(args[0].decode(), self.location.orig) + elif error == 'InvalidRPCMethod': + if old_server: + raise InvalidRPCMethod('(not available)') + else: + raise InvalidRPCMethod(args[0].decode()) else: - raise self.RPCError(res.decode('utf-8')) + raise self.RPCError(unpacked) calls = list(calls) waiting_for = [] while wait or calls: + if self.shutdown_time and time.monotonic() > self.shutdown_time: + # we are shutting this RemoteRepository down already, make sure we do not waste + # a lot of time in case a lot of async stuff is coming in or remote is gone or slow. + logger.debug('shutdown_time reached, shutting down with %d waiting_for and %d async_responses.', + len(waiting_for), len(self.async_responses)) + return while waiting_for: try: - error, res = self.responses.pop(waiting_for[0]) + unpacked = self.responses.pop(waiting_for[0]) waiting_for.pop(0) - if error: - handle_error(error, res) + if b'exception_class' in unpacked: + handle_error(unpacked) else: - yield res + yield unpacked[RESULT] if not waiting_for and not calls: return except KeyError: break + if cmd == 'async_responses': + while True: + try: + msgid, unpacked = self.async_responses.popitem() + except KeyError: + # there is nothing left what we already have received + if async_wait and self.ignore_responses: + # but do not return if we shall wait and there is something left to wait for: + break + else: + return + else: + if b'exception_class' in unpacked: + handle_error(unpacked) + else: + yield unpacked[RESULT] if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT): w_fds = [self.stdin_fd] else: @@ -327,94 +826,156 @@ class RemoteRepository: data = os.read(fd, BUFSIZE) if not data: raise ConnectionClosed() + self.rx_bytes += len(data) self.unpacker.feed(data) for unpacked in self.unpacker: - if not (isinstance(unpacked, tuple) and len(unpacked) == 4): - raise Exception("Unexpected RPC data format.") - type, msgid, error, res = unpacked + if isinstance(unpacked, dict): + msgid = unpacked[MSGID] + elif isinstance(unpacked, tuple) and len(unpacked) == 4: + # The first field 'type' was always 1 and has always been ignored + _, msgid, error, res = unpacked + if error: + # ignore res, because it is only a fixed string anyway. + unpacked = {MSGID: msgid, b'exception_class': error} + else: + unpacked = {MSGID: msgid, RESULT: res} + else: + raise UnexpectedRPCDataFormatFromServer(data) if msgid in self.ignore_responses: self.ignore_responses.remove(msgid) - if error: - handle_error(error, res) + # async methods never return values, but may raise exceptions. + if b'exception_class' in unpacked: + self.async_responses[msgid] = unpacked + else: + # we currently do not have async result values except "None", + # so we do not add them into async_responses. + if unpacked[RESULT] is not None: + self.async_responses[msgid] = unpacked else: - self.responses[msgid] = error, res + self.responses[msgid] = unpacked elif fd is self.stderr_fd: data = os.read(fd, 32768) if not data: raise ConnectionClosed() - data = data.decode('utf-8') - for line in data.splitlines(keepends=True): - handle_remote_line(line) + self.rx_bytes += len(data) + # deal with incomplete lines (may appear due to block buffering) + if self.stderr_received: + data = self.stderr_received + data + self.stderr_received = b'' + lines = data.splitlines(keepends=True) + if lines and not lines[-1].endswith((b'\r', b'\n')): + self.stderr_received = lines.pop() + # now we have complete lines in and any partial line in self.stderr_received. + for line in lines: + handle_remote_line(line.decode('utf-8')) # decode late, avoid partial utf-8 sequences if w: while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT: if calls: if is_preloaded: - if calls[0] in self.cache: - waiting_for.append(fetch_from_cache(calls.pop(0))) + assert cmd == 'get', "is_preload is only supported for 'get'" + if calls[0]['id'] in self.chunkid_to_msgids: + waiting_for.append(pop_preload_msgid(calls.pop(0)['id'])) else: args = calls.pop(0) - if cmd == 'get' and args in self.cache: - waiting_for.append(fetch_from_cache(args)) + if cmd == 'get' and args['id'] in self.chunkid_to_msgids: + waiting_for.append(pop_preload_msgid(args['id'])) else: self.msgid += 1 waiting_for.append(self.msgid) - self.to_send = msgpack.packb((1, self.msgid, cmd, args)) + if self.dictFormat: + self.to_send = msgpack.packb({MSGID: self.msgid, MSG: cmd, ARGS: args}) + else: + self.to_send = msgpack.packb((1, self.msgid, cmd, self.named_to_positional(cmd, args))) if not self.to_send and self.preload_ids: - args = (self.preload_ids.pop(0),) + chunk_id = self.preload_ids.pop(0) + args = {'id': chunk_id} self.msgid += 1 - self.cache.setdefault(args, []).append(self.msgid) - self.to_send = msgpack.packb((1, self.msgid, cmd, args)) + self.chunkid_to_msgids.setdefault(chunk_id, []).append(self.msgid) + if self.dictFormat: + self.to_send = msgpack.packb({MSGID: self.msgid, MSG: 'get', ARGS: args}) + else: + self.to_send = msgpack.packb((1, self.msgid, 'get', self.named_to_positional(cmd, args))) if self.to_send: try: - self.to_send = self.to_send[os.write(self.stdin_fd, self.to_send):] + written = self.ratelimit.write(self.stdin_fd, self.to_send) + self.tx_bytes += written + self.to_send = self.to_send[written:] except OSError as e: # io.write might raise EAGAIN even though select indicates # that the fd should be writable if e.errno != errno.EAGAIN: raise - self.ignore_responses |= set(waiting_for) + self.ignore_responses |= set(waiting_for) # we lose order here + @api(since=parse_version('1.0.0'), + append_only={'since': parse_version('1.0.7'), 'previously': False}) + def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, append_only=False): + """actual remoting is done via self.call in the @api decorator""" + + @api(since=parse_version('1.0.0')) def check(self, repair=False, save_space=False): - return self.call('check', repair, save_space) + """actual remoting is done via self.call in the @api decorator""" + @api(since=parse_version('1.0.0')) def commit(self, save_space=False): - return self.call('commit', save_space) + """actual remoting is done via self.call in the @api decorator""" - def rollback(self, *args): - return self.call('rollback') + @api(since=parse_version('1.0.0')) + def rollback(self): + """actual remoting is done via self.call in the @api decorator""" + @api(since=parse_version('1.0.0')) def destroy(self): - return self.call('destroy') + """actual remoting is done via self.call in the @api decorator""" + @api(since=parse_version('1.0.0')) def __len__(self): - return self.call('__len__') + """actual remoting is done via self.call in the @api decorator""" + @api(since=parse_version('1.0.0')) def list(self, limit=None, marker=None): - return self.call('list', limit, marker) + """actual remoting is done via self.call in the @api decorator""" - def get(self, id_): - for resp in self.get_many([id_]): + @api(since=parse_version('1.1.0b3')) + def scan(self, limit=None, marker=None): + """actual remoting is done via self.call in the @api decorator""" + + def get(self, id): + for resp in self.get_many([id]): return resp def get_many(self, ids, is_preloaded=False): - for resp in self.call_many('get', [(id_,) for id_ in ids], is_preloaded=is_preloaded): + for resp in self.call_many('get', [{'id': id} for id in ids], is_preloaded=is_preloaded): yield resp - def put(self, id_, data, wait=True): - return self.call('put', id_, data, wait=wait) + @api(since=parse_version('1.0.0')) + def put(self, id, data, wait=True): + """actual remoting is done via self.call in the @api decorator""" - def delete(self, id_, wait=True): - return self.call('delete', id_, wait=wait) + @api(since=parse_version('1.0.0')) + def delete(self, id, wait=True): + """actual remoting is done via self.call in the @api decorator""" + @api(since=parse_version('1.0.0')) def save_key(self, keydata): - return self.call('save_key', keydata) + """actual remoting is done via self.call in the @api decorator""" + @api(since=parse_version('1.0.0')) def load_key(self): - return self.call('load_key') + """actual remoting is done via self.call in the @api decorator""" + @api(since=parse_version('1.0.0')) + def get_free_nonce(self): + """actual remoting is done via self.call in the @api decorator""" + + @api(since=parse_version('1.0.0')) + def commit_nonce_reservation(self, next_unreserved, start_nonce): + """actual remoting is done via self.call in the @api decorator""" + + @api(since=parse_version('1.0.0')) def break_lock(self): - return self.call('break_lock') + """actual remoting is done via self.call in the @api decorator""" def close(self): if self.p: @@ -423,12 +984,67 @@ class RemoteRepository: self.p.wait() self.p = None + def async_response(self, wait=True): + for resp in self.call_many('async_responses', calls=[], wait=True, async_wait=wait): + return resp + def preload(self, ids): self.preload_ids += ids def handle_remote_line(line): - if line.startswith('$LOG '): + """ + Handle a remote log line. + + This function is remarkably complex because it handles multiple wire formats. + """ + assert line.endswith(('\r', '\n')) + if line.startswith('{'): + # This format is used by Borg since 1.1.0b6 for new-protocol clients. + # It is the same format that is exposed by --log-json. + msg = json.loads(line) + + if msg['type'] not in ('progress_message', 'progress_percent', 'log_message'): + logger.warning('Dropped remote log message with unknown type %r: %s', msg['type'], line) + return + + if msg['type'] == 'log_message': + # Re-emit log messages on the same level as the remote to get correct log suppression and verbosity. + level = getattr(logging, msg['levelname'], logging.CRITICAL) + assert isinstance(level, int) + target_logger = logging.getLogger(msg['name']) + msg['message'] = 'Remote: ' + msg['message'] + # In JSON mode, we manually check whether the log message should be propagated. + if logging.getLogger('borg').json and level >= target_logger.getEffectiveLevel(): + sys.stderr.write(json.dumps(msg) + '\n') + else: + target_logger.log(level, '%s', msg['message']) + elif msg['type'].startswith('progress_'): + # Progress messages are a bit more complex. + # First of all, we check whether progress output is enabled. This is signalled + # through the effective level of the borg.output.progress logger + # (also see ProgressIndicatorBase in borg.helpers). + progress_logger = logging.getLogger('borg.output.progress') + if progress_logger.getEffectiveLevel() == logging.INFO: + # When progress output is enabled, we check whether the client is in + # --log-json mode, as signalled by the "json" attribute on the "borg" logger. + if logging.getLogger('borg').json: + # In --log-json mode we re-emit the progress JSON line as sent by the server, + # with the message, if any, prefixed with "Remote: ". + if 'message' in msg: + msg['message'] = 'Remote: ' + msg['message'] + sys.stderr.write(json.dumps(msg) + '\n') + elif 'message' in msg: + # In text log mode we write only the message to stderr and terminate with \r + # (carriage return, i.e. move the write cursor back to the beginning of the line) + # so that the next message, progress or not, overwrites it. This mirrors the behaviour + # of local progress displays. + sys.stderr.write('Remote: ' + msg['message'] + '\r') + elif line.startswith('$LOG '): + # This format is used by borg serve 0.xx, 1.0.x and 1.1.0b1..b5. + # It prefixed log lines with $LOG as a marker, followed by the log level + # and optionally a logger name, then "Remote:" as a separator followed by the original + # message. _, level, msg = line.split(' ', 2) level = getattr(logging, level, logging.CRITICAL) # str -> int if msg.startswith('Remote:'): @@ -439,16 +1055,29 @@ def handle_remote_line(line): logname, msg = msg.split(' ', 1) logging.getLogger(logname).log(level, msg.rstrip()) else: - sys.stderr.write("Remote: " + line) + # Plain 1.0.x and older format - re-emit to stderr (mirroring what the 1.0.x + # client did) or as a generic log message. + # We don't know what priority the line had. + if logging.getLogger('borg').json: + logging.getLogger('').warning('Remote: ' + line.strip()) + else: + # In non-JSON mode we circumvent logging to preserve carriage returns (\r) + # which are generated by remote progress displays. + sys.stderr.write('Remote: ' + line) class RepositoryNoCache: """A not caching Repository wrapper, passes through to repository. Just to have same API (including the context manager) as RepositoryCache. + + *transform* is a callable taking two arguments, key and raw repository data. + The return value is returned from get()/get_many(). By default, the raw + repository data is returned. """ - def __init__(self, repository): + def __init__(self, repository, transform=None): self.repository = repository + self.transform = transform or (lambda key, data: data) def close(self): pass @@ -460,52 +1089,166 @@ class RepositoryNoCache: self.close() def get(self, key): - return next(self.get_many([key])) + return next(self.get_many([key], cache=False)) - def get_many(self, keys): - for data in self.repository.get_many(keys): - yield data + def get_many(self, keys, cache=True): + for key, data in zip(keys, self.repository.get_many(keys)): + yield self.transform(key, data) + + def log_instrumentation(self): + pass class RepositoryCache(RepositoryNoCache): - """A caching Repository wrapper - - Caches Repository GET operations using a local temporary Repository. """ - # maximum object size that will be cached, 64 kiB. - THRESHOLD = 2**16 + A caching Repository wrapper. - def __init__(self, repository): - super().__init__(repository) - tmppath = tempfile.mkdtemp(prefix='borg-tmp') - self.caching_repo = Repository(tmppath, create=True, exclusive=True) - self.caching_repo.__enter__() # handled by context manager in base class + Caches Repository GET operations locally. + + *pack* and *unpack* complement *transform* of the base class. + *pack* receives the output of *transform* and should return bytes, + which are stored in the cache. *unpack* receives these bytes and + should return the initial data (as returned by *transform*). + """ + + def __init__(self, repository, pack=None, unpack=None, transform=None): + super().__init__(repository, transform) + self.pack = pack or (lambda data: data) + self.unpack = unpack or (lambda data: data) + self.cache = set() + self.basedir = tempfile.mkdtemp(prefix='borg-cache-') + self.query_size_limit() + self.size = 0 + # Instrumentation + self.hits = 0 + self.misses = 0 + self.slow_misses = 0 + self.slow_lat = 0.0 + self.evictions = 0 + self.enospc = 0 + + def query_size_limit(self): + stat_fs = os.statvfs(self.basedir) + available_space = stat_fs.f_bsize * stat_fs.f_bavail + self.size_limit = int(min(available_space * 0.25, 2**31)) + + def key_filename(self, key): + return os.path.join(self.basedir, bin_to_hex(key)) + + def backoff(self): + self.query_size_limit() + target_size = int(0.9 * self.size_limit) + while self.size > target_size and self.cache: + key = self.cache.pop() + file = self.key_filename(key) + self.size -= os.stat(file).st_size + os.unlink(file) + self.evictions += 1 + + def add_entry(self, key, data, cache): + transformed = self.transform(key, data) + if not cache: + return transformed + packed = self.pack(transformed) + file = self.key_filename(key) + try: + with open(file, 'wb') as fd: + fd.write(packed) + except OSError as os_error: + try: + truncate_and_unlink(file) + except FileNotFoundError: + pass # open() could have failed as well + if os_error.errno == errno.ENOSPC: + self.enospc += 1 + self.backoff() + else: + raise + else: + self.size += len(packed) + self.cache.add(key) + if self.size > self.size_limit: + self.backoff() + return transformed + + def log_instrumentation(self): + logger.debug('RepositoryCache: current items %d, size %s / %s, %d hits, %d misses, %d slow misses (+%.1fs), ' + '%d evictions, %d ENOSPC hit', + len(self.cache), format_file_size(self.size), format_file_size(self.size_limit), + self.hits, self.misses, self.slow_misses, self.slow_lat, + self.evictions, self.enospc) def close(self): - if self.caching_repo is not None: - self.caching_repo.destroy() - self.caching_repo = None + self.log_instrumentation() + self.cache.clear() + shutil.rmtree(self.basedir) - def get_many(self, keys): - unknown_keys = [key for key in keys if key not in self.caching_repo] + def get_many(self, keys, cache=True): + unknown_keys = [key for key in keys if key not in self.cache] repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys)) for key in keys: - try: - yield self.caching_repo.get(key) - except Repository.ObjectNotFound: + if key in self.cache: + file = self.key_filename(key) + with open(file, 'rb') as fd: + self.hits += 1 + yield self.unpack(fd.read()) + else: for key_, data in repository_iterator: if key_ == key: - if len(data) <= self.THRESHOLD: - self.caching_repo.put(key, data) - yield data + transformed = self.add_entry(key, data, cache) + self.misses += 1 + yield transformed break + else: + # slow path: eviction during this get_many removed this key from the cache + t0 = time.perf_counter() + data = self.repository.get(key) + self.slow_lat += time.perf_counter() - t0 + transformed = self.add_entry(key, data, cache) + self.slow_misses += 1 + yield transformed # Consume any pending requests for _ in repository_iterator: pass -def cache_if_remote(repository): - if isinstance(repository, RemoteRepository): - return RepositoryCache(repository) +def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None, transform=None, force_cache=False): + """ + Return a Repository(No)Cache for *repository*. + + If *decrypted_cache* is a key object, then get and get_many will return a tuple + (csize, plaintext) instead of the actual data in the repository. The cache will + store decrypted data, which increases CPU efficiency (by avoiding repeatedly decrypting + and more importantly MAC and ID checking cached objects). + Internally, objects are compressed with LZ4. + """ + if decrypted_cache and (pack or unpack or transform): + raise ValueError('decrypted_cache and pack/unpack/transform are incompatible') + elif decrypted_cache: + key = decrypted_cache + # 32 bit csize, 64 bit (8 byte) xxh64 + cache_struct = struct.Struct('=I8s') + compressor = LZ4() + + def pack(data): + csize, decrypted = data + compressed = compressor.compress(decrypted) + return cache_struct.pack(csize, xxh64(compressed)) + compressed + + def unpack(data): + data = memoryview(data) + csize, checksum = cache_struct.unpack(data[:cache_struct.size]) + compressed = data[cache_struct.size:] + if checksum != xxh64(compressed): + raise IntegrityError('detected corrupted data in metadata cache') + return csize, compressor.decompress(compressed) + + def transform(id_, data): + csize = len(data) + decrypted = key.decrypt(id_, data) + return csize, decrypted + + if isinstance(repository, RemoteRepository) or force_cache: + return RepositoryCache(repository, pack, unpack, transform) else: - return RepositoryNoCache(repository) + return RepositoryNoCache(repository, transform) diff --git a/src/borg/repository.py b/src/borg/repository.py index 6af3f87d..42235573 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -1,33 +1,39 @@ import errno +import mmap import os +import sys import shutil import struct -from binascii import unhexlify +from binascii import hexlify, unhexlify from collections import defaultdict from configparser import ConfigParser from datetime import datetime from functools import partial from itertools import islice -from zlib import crc32 import msgpack -import logging -logger = logging.getLogger(__name__) - from .constants import * # NOQA from .hashindex import NSIndex -from .helpers import Error, ErrorWithTraceback, IntegrityError +from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size, parse_file_size from .helpers import Location from .helpers import ProgressIndicatorPercent from .helpers import bin_to_hex -from .locking import UpgradableLock, LockError, LockErrorT +from .helpers import hostname_is_unique +from .helpers import secure_erase, truncate_and_unlink +from .locking import Lock, LockError, LockErrorT +from .logger import create_logger from .lrucache import LRUCache -from .platform import SyncFile, sync_dir +from .platform import SaveFile, SyncFile, sync_dir, safe_fadvise +from .algorithms.checksums import crc32 +from .crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError + +logger = create_logger(__name__) -MAX_OBJECT_SIZE = 20 * 1024 * 1024 MAGIC = b'BORG_SEG' MAGIC_LEN = len(MAGIC) +ATTIC_MAGIC = b'ATTICSEG' +assert len(ATTIC_MAGIC) == MAGIC_LEN TAG_PUT = 0 TAG_DELETE = 1 TAG_COMMIT = 2 @@ -79,35 +85,83 @@ class Repository: dir/data// dir/index.X dir/hints.X + + File system interaction + ----------------------- + + LoggedIO generally tries to rely on common behaviours across transactional file systems. + + Segments that are deleted are truncated first, which avoids problems if the FS needs to + allocate space to delete the dirent of the segment. This mostly affects CoW file systems, + traditional journaling file systems have a fairly good grip on this problem. + + Note that deletion, i.e. unlink(2), is atomic on every file system that uses inode reference + counts, which includes pretty much all of them. To remove a dirent the inodes refcount has + to be decreased, but you can't decrease the refcount before removing the dirent nor can you + decrease the refcount after removing the dirent. File systems solve this with a lock, + and by ensuring it all stays within the same FS transaction. + + Truncation is generally not atomic in itself, and combining truncate(2) and unlink(2) is of + course never guaranteed to be atomic. Truncation in a classic extent-based FS is done in + roughly two phases, first the extents are removed then the inode is updated. (In practice + this is of course way more complex). + + LoggedIO gracefully handles truncate/unlink splits as long as the truncate resulted in + a zero length file. Zero length segments are considered to not exist, while LoggedIO.cleanup() + will still get rid of them. """ class DoesNotExist(Error): """Repository {} does not exist.""" class AlreadyExists(Error): - """Repository {} already exists.""" + """A repository already exists at {}.""" class InvalidRepository(Error): """{} is not a valid repository. Check repo config.""" + class AtticRepository(Error): + """Attic repository detected. Please run "borg upgrade {}".""" + class CheckNeeded(ErrorWithTraceback): """Inconsistency detected. Please run "borg check {}".""" class ObjectNotFound(ErrorWithTraceback): """Object with key {} not found in repository {}.""" - def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False): + def __init__(self, id, repo): + if isinstance(id, bytes): + id = bin_to_hex(id) + super().__init__(id, repo) + + class InsufficientFreeSpaceError(Error): + """Insufficient free space to complete transaction (required: {}, available: {}).""" + + class StorageQuotaExceeded(Error): + """The storage quota ({}) has been exceeded ({}). Try deleting some archives.""" + + def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, + append_only=False, storage_quota=None, check_segment_magic=True): self.path = os.path.abspath(path) self._location = Location('file://%s' % self.path) - self.io = None + self.io = None # type: LoggedIO self.lock = None self.index = None + # This is an index of shadowed log entries during this transaction. Consider the following sequence: + # segment_n PUT A, segment_x DELETE A + # After the "DELETE A" in segment_x the shadow index will contain "A -> [n]". + self.shadow_index = {} self._active_txn = False self.lock_wait = lock_wait self.do_lock = lock self.do_create = create + self.created = False self.exclusive = exclusive self.append_only = append_only + self.storage_quota = storage_quota + self.storage_quota_use = 0 + self.transaction_doomed = None + self.check_segment_magic = check_segment_magic def __del__(self): if self.lock: @@ -121,7 +175,8 @@ class Repository: if self.do_create: self.do_create = False self.create(self.path) - self.open(self.path, self.exclusive, lock_wait=self.lock_wait, lock=self.do_lock) + self.created = True + self.open(self.path, bool(self.exclusive), lock_wait=self.lock_wait, lock=self.do_lock) return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -131,38 +186,98 @@ class Repository: # EIO or FS corruption ensues, which is why we specifically check for ENOSPC. if self._active_txn and no_space_left_on_device: logger.warning('No space left on device, cleaning up partial transaction to free space.') - self.io.cleanup(self.io.get_segments_transaction_id()) - self.rollback() + cleanup = True + else: + cleanup = False + self._rollback(cleanup=cleanup) self.close() @property def id_str(self): return bin_to_hex(self.id) - def create(self, path): - """Create a new empty repository at `path` + def check_can_create_repository(self, path): + """ + Raise self.AlreadyExists if a repository already exists at *path* or any parent directory. + + Checking parent directories is done for two reasons: + (1) It's just a weird thing to do, and usually not intended. A Borg using the "parent" repository + may be confused, or we may accidentally put stuff into the "data/" or "data//" directories. + (2) When implementing repository quotas (which we currently don't), it's important to prohibit + folks from creating quota-free repositories. Since no one can create a repository within another + repository, user's can only use the quota'd repository, when their --restrict-to-path points + at the user's repository. """ if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)): raise self.AlreadyExists(path) + + while True: + # Check all parent directories for Borg's repository README + previous_path = path + # Thus, path = previous_path/.. + path = os.path.abspath(os.path.join(previous_path, os.pardir)) + if path == previous_path: + # We reached the root of the directory hierarchy (/.. = / and C:\.. = C:\). + break + try: + # Use binary mode to avoid troubles if a README contains some stuff not in our locale + with open(os.path.join(path, 'README'), 'rb') as fd: + # Read only the first ~100 bytes (if any), in case some README file we stumble upon is large. + readme_head = fd.read(100) + # The first comparison captures our current variant (REPOSITORY_README), the second comparison + # is an older variant of the README file (used by 1.0.x). + if b'Borg Backup repository' in readme_head or b'Borg repository' in readme_head: + raise self.AlreadyExists(path) + except OSError: + # Ignore FileNotFound, PermissionError, ... + pass + + def create(self, path): + """Create a new empty repository at `path` + """ + self.check_can_create_repository(path) if not os.path.exists(path): os.mkdir(path) with open(os.path.join(path, 'README'), 'w') as fd: - fd.write('This is a Borg repository\n') + fd.write(REPOSITORY_README) os.mkdir(os.path.join(path, 'data')) config = ConfigParser(interpolation=None) config.add_section('repository') config.set('repository', 'version', '1') config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR)) config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE)) - config.set('repository', 'append_only', '0') + config.set('repository', 'append_only', str(int(self.append_only))) + if self.storage_quota: + config.set('repository', 'storage_quota', str(self.storage_quota)) + else: + config.set('repository', 'storage_quota', '0') + config.set('repository', 'additional_free_space', '0') config.set('repository', 'id', bin_to_hex(os.urandom(32))) self.save_config(path, config) def save_config(self, path, config): config_path = os.path.join(path, 'config') - with open(config_path, 'w') as fd: + old_config_path = os.path.join(path, 'config.old') + + if os.path.isfile(old_config_path): + logger.warning("Old config file not securely erased on previous config update") + secure_erase(old_config_path) + + if os.path.isfile(config_path): + try: + os.link(config_path, old_config_path) + except OSError as e: + if e.errno in (errno.EMLINK, errno.ENOSYS, errno.EPERM, errno.ENOTSUP): + logger.warning("Hardlink failed, cannot securely erase old config file") + else: + raise + + with SaveFile(config_path) as fd: config.write(fd) + if os.path.isfile(old_config_path): + secure_erase(old_config_path) + def save_key(self, keydata): assert self.config keydata = keydata.decode('utf-8') # remote repo: msgpack issue #99, getting bytes @@ -173,6 +288,27 @@ class Repository: keydata = self.config.get('repository', 'key') return keydata.encode('utf-8') # remote repo: msgpack issue #99, returning bytes + def get_free_nonce(self): + if not self.lock.got_exclusive_lock(): + raise AssertionError("bug in code, exclusive lock should exist here") + + nonce_path = os.path.join(self.path, 'nonce') + try: + with open(nonce_path, 'r') as fd: + return int.from_bytes(unhexlify(fd.read()), byteorder='big') + except FileNotFoundError: + return None + + def commit_nonce_reservation(self, next_unreserved, start_nonce): + if not self.lock.got_exclusive_lock(): + raise AssertionError("bug in code, exclusive lock should exist here") + + if self.get_free_nonce() != start_nonce: + raise Exception("nonce space reservation with mismatched previous state") + nonce_path = os.path.join(self.path, 'nonce') + with SaveFile(nonce_path, binary=False) as fd: + fd.write(bin_to_hex(next_unreserved.to_bytes(8, byteorder='big'))) + def destroy(self): """Destroy the repository at `self.path` """ @@ -195,7 +331,13 @@ class Repository: index_transaction_id = self.get_index_transaction_id() segments_transaction_id = self.io.get_segments_transaction_id() if index_transaction_id is not None and segments_transaction_id is None: - raise self.CheckNeeded(self.path) + # we have a transaction id from the index, but we did not find *any* + # commit in the segment files (thus no segments transaction id). + # this can happen if a lot of segment files are lost, e.g. due to a + # filesystem or hardware malfunction. it means we have no identifiable + # valid (committed) state of the repo which we could use. + msg = '%s" - although likely this is "beyond repair' % self.path # dirty hack + raise self.CheckNeeded(msg) # Attempt to automatically rebuild index if we crashed between commit # tag write and index save if index_transaction_id != segments_transaction_id: @@ -210,27 +352,43 @@ class Repository: return self.get_index_transaction_id() def break_lock(self): - UpgradableLock(os.path.join(self.path, 'lock')).break_lock() + Lock(os.path.join(self.path, 'lock')).break_lock() + + def migrate_lock(self, old_id, new_id): + # note: only needed for local repos + if self.lock is not None: + self.lock.migrate_lock(old_id, new_id) def open(self, path, exclusive, lock_wait=None, lock=True): self.path = path if not os.path.isdir(path): raise self.DoesNotExist(path) if lock: - self.lock = UpgradableLock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait).acquire() + self.lock = Lock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait, kill_stale_locks=hostname_is_unique()).acquire() else: self.lock = None self.config = ConfigParser(interpolation=None) self.config.read(os.path.join(self.path, 'config')) if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1: + self.close() raise self.InvalidRepository(path) self.max_segment_size = self.config.getint('repository', 'max_segment_size') self.segments_per_dir = self.config.getint('repository', 'segments_per_dir') + self.additional_free_space = parse_file_size(self.config.get('repository', 'additional_free_space', fallback=0)) # append_only can be set in the constructor # it shouldn't be overridden (True -> False) here self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False) + if self.storage_quota is None: + # self.storage_quota is None => no explicit storage_quota was specified, use repository setting. + self.storage_quota = self.config.getint('repository', 'storage_quota', fallback=0) self.id = unhexlify(self.config.get('repository', 'id').strip()) self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir) + if self.check_segment_magic: + # read a segment and check whether we are dealing with a non-upgraded Attic repository + segment = self.io.get_latest_segment() + if segment is not None and self.io.get_segment_magic(segment) == ATTIC_MAGIC: + self.close() + raise self.AtticRepository(path) def close(self): if self.lock: @@ -243,21 +401,42 @@ class Repository: def commit(self, save_space=False): """Commit transaction """ + # save_space is not used anymore, but stays for RPC/API compatibility. + if self.transaction_doomed: + exception = self.transaction_doomed + self.rollback() + raise exception + self.check_free_space() + self.log_storage_quota() self.io.write_commit() if not self.append_only: - self.compact_segments(save_space=save_space) + self.compact_segments() self.write_index() self.rollback() + def _read_integrity(self, transaction_id, key): + integrity_file = 'integrity.%d' % transaction_id + integrity_path = os.path.join(self.path, integrity_file) + try: + with open(integrity_path, 'rb') as fd: + integrity = msgpack.unpack(fd) + except FileNotFoundError: + return + if integrity.get(b'version') != 2: + logger.warning('Unknown integrity data version %r in %s', integrity.get(b'version'), integrity_file) + return + return integrity[key].decode() + def open_index(self, transaction_id, auto_recover=True): if transaction_id is None: return NSIndex() - index_path = os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8') + index_path = os.path.join(self.path, 'index.%d' % transaction_id) + integrity_data = self._read_integrity(transaction_id, b'index') try: - return NSIndex.read(index_path) - except RuntimeError as error: - assert str(error) == 'hashindex_read failed' # everything else means we're in *deep* trouble - logger.warning('Repository index missing or corrupted, trying to recover') + with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd: + return NSIndex.read(fd) + except (ValueError, OSError, FileIntegrityError) as exc: + logger.warning('Repository index missing or corrupted, trying to recover from: %s', exc) os.unlink(index_path) if not auto_recover: raise @@ -268,33 +447,46 @@ class Repository: def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True - try: - self.lock.upgrade() - except (LockError, LockErrorT): - # if upgrading the lock to exclusive fails, we do not have an - # active transaction. this is important for "serve" mode, where - # the repository instance lives on - even if exceptions happened. - self._active_txn = False - raise + if not self.lock.got_exclusive_lock(): + if self.exclusive is not None: + # self.exclusive is either True or False, thus a new client is active here. + # if it is False and we get here, the caller did not use exclusive=True although + # it is needed for a write operation. if it is True and we get here, something else + # went very wrong, because we should have a exclusive lock, but we don't. + raise AssertionError("bug in code, exclusive lock should exist here") + # if we are here, this is an old client talking to a new server (expecting lock upgrade). + # or we are replaying segments and might need a lock upgrade for that. + try: + self.lock.upgrade() + except (LockError, LockErrorT): + # if upgrading the lock to exclusive fails, we do not have an + # active transaction. this is important for "serve" mode, where + # the repository instance lives on - even if exceptions happened. + self._active_txn = False + raise if not self.index or transaction_id is None: try: - self.index = self.open_index(transaction_id, False) - except RuntimeError: + self.index = self.open_index(transaction_id, auto_recover=False) + except (ValueError, OSError, FileIntegrityError) as exc: + logger.warning('Checking repository transaction due to previous error: %s', exc) self.check_transaction() - self.index = self.open_index(transaction_id, False) + self.index = self.open_index(transaction_id, auto_recover=False) if transaction_id is None: self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x] self.compact = FreeSpace() # XXX bad name: freeable_space_of_segment_x = self.compact[x] + self.storage_quota_use = 0 + self.shadow_index.clear() else: if do_cleanup: self.io.cleanup(transaction_id) hints_path = os.path.join(self.path, 'hints.%d' % transaction_id) index_path = os.path.join(self.path, 'index.%d' % transaction_id) + integrity_data = self._read_integrity(transaction_id, b'hints') try: - with open(hints_path, 'rb') as fd: + with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd: hints = msgpack.unpack(fd) - except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e: - logger.warning('Repository hints file missing or corrupted, trying to recover') + except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError, FileIntegrityError) as e: + logger.warning('Repository hints file missing or corrupted, trying to recover: %s', e) if not isinstance(e, FileNotFoundError): os.unlink(hints_path) # index must exist at this point @@ -306,6 +498,7 @@ class Repository: logger.debug('Upgrading from v1 hints.%d', transaction_id) self.segments = hints[b'segments'] self.compact = FreeSpace() + self.storage_quota_use = 0 for segment in sorted(hints[b'compact']): logger.debug('Rebuilding sparse info for segment %d', segment) self._rebuild_sparse(segment) @@ -315,35 +508,143 @@ class Repository: else: self.segments = hints[b'segments'] self.compact = FreeSpace(hints[b'compact']) + self.storage_quota_use = hints.get(b'storage_quota_use', 0) + self.log_storage_quota() + # Drop uncommitted segments in the shadow index + for key, shadowed_segments in self.shadow_index.items(): + for segment in list(shadowed_segments): + if segment > transaction_id: + shadowed_segments.remove(segment) def write_index(self): - hints = {b'version': 2, - b'segments': self.segments, - b'compact': self.compact} - transaction_id = self.io.get_segments_transaction_id() - hints_file = os.path.join(self.path, 'hints.%d' % transaction_id) - with open(hints_file + '.tmp', 'wb') as fd: - msgpack.pack(hints, fd) + def flush_and_sync(fd): fd.flush() os.fsync(fd.fileno()) - os.rename(hints_file + '.tmp', hints_file) - self.index.write(os.path.join(self.path, 'index.tmp')) - os.rename(os.path.join(self.path, 'index.tmp'), - os.path.join(self.path, 'index.%d' % transaction_id)) + + def rename_tmp(file): + os.rename(file + '.tmp', file) + + hints = { + b'version': 2, + b'segments': self.segments, + b'compact': self.compact, + b'storage_quota_use': self.storage_quota_use, + } + integrity = { + # Integrity version started at 2, the current hints version. + # Thus, integrity version == hints version, for now. + b'version': 2, + } + transaction_id = self.io.get_segments_transaction_id() + assert transaction_id is not None + + # Log transaction in append-only mode if self.append_only: with open(os.path.join(self.path, 'transactions'), 'a') as log: - print('transaction %d, UTC time %s' % (transaction_id, datetime.utcnow().isoformat()), file=log) + print('transaction %d, UTC time %s' % ( + transaction_id, datetime.utcnow().strftime(ISO_FORMAT)), file=log) + + # Write hints file + hints_name = 'hints.%d' % transaction_id + hints_file = os.path.join(self.path, hints_name) + with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd: + msgpack.pack(hints, fd) + flush_and_sync(fd) + integrity[b'hints'] = fd.integrity_data + + # Write repository index + index_name = 'index.%d' % transaction_id + index_file = os.path.join(self.path, index_name) + with IntegrityCheckedFile(index_file + '.tmp', filename=index_name, write=True) as fd: + # XXX: Consider using SyncFile for index write-outs. + self.index.write(fd) + flush_and_sync(fd) + integrity[b'index'] = fd.integrity_data + + # Write integrity file, containing checksums of the hints and index files + integrity_name = 'integrity.%d' % transaction_id + integrity_file = os.path.join(self.path, integrity_name) + with open(integrity_file + '.tmp', 'wb') as fd: + msgpack.pack(integrity, fd) + flush_and_sync(fd) + + # Rename the integrity file first + rename_tmp(integrity_file) + sync_dir(self.path) + # Rename the others after the integrity file is hypothetically on disk + rename_tmp(hints_file) + rename_tmp(index_file) + sync_dir(self.path) + # Remove old auxiliary files current = '.%d' % transaction_id for name in os.listdir(self.path): - if not name.startswith(('index.', 'hints.')): + if not name.startswith(('index.', 'hints.', 'integrity.')): continue if name.endswith(current): continue os.unlink(os.path.join(self.path, name)) self.index = None - def compact_segments(self, save_space=False): + def check_free_space(self): + """Pre-commit check for sufficient free space to actually perform the commit.""" + # As a baseline we take four times the current (on-disk) index size. + # At this point the index may only be updated by compaction, which won't resize it. + # We still apply a factor of four so that a later, separate invocation can free space + # (journaling all deletes for all chunks is one index size) or still make minor additions + # (which may grow the index up to twice its current size). + # Note that in a subsequent operation the committed index is still on-disk, therefore we + # arrive at index_size * (1 + 2 + 1). + # In that order: journaled deletes (1), hashtable growth (2), persisted index (1). + required_free_space = self.index.size() * 4 + + # Conservatively estimate hints file size: + # 10 bytes for each segment-refcount pair, 10 bytes for each segment-space pair + # Assume maximum of 5 bytes per integer. Segment numbers will usually be packed more densely (1-3 bytes), + # as will refcounts and free space integers. For 5 MiB segments this estimate is good to ~20 PB repo size. + # Add 4K to generously account for constant format overhead. + hints_size = len(self.segments) * 10 + len(self.compact) * 10 + 4096 + required_free_space += hints_size + + required_free_space += self.additional_free_space + if not self.append_only: + full_segment_size = self.max_segment_size + MAX_OBJECT_SIZE + if len(self.compact) < 10: + # This is mostly for the test suite to avoid overestimated free space needs. This can be annoying + # if TMP is a small-ish tmpfs. + compact_working_space = sum(self.io.segment_size(segment) - free for segment, free in self.compact.items()) + logger.debug('check_free_space: few segments, not requiring a full free segment') + compact_working_space = min(compact_working_space, full_segment_size) + logger.debug('check_free_space: calculated working space for compact as %d bytes', compact_working_space) + required_free_space += compact_working_space + else: + # Keep one full worst-case segment free in non-append-only mode + required_free_space += full_segment_size + if sys.platform != 'win32': + try: + st_vfs = os.statvfs(self.path) + except OSError as os_error: + logger.warning('Failed to check free space before committing: ' + str(os_error)) + return + # f_bavail: even as root - don't touch the Federal Block Reserve! + free_space = st_vfs.f_bavail * st_vfs.f_bsize + logger.debug('check_free_space: required bytes {}, free bytes {}'.format(required_free_space, free_space)) + if free_space < required_free_space: + if self.created: + logger.error('Not enough free space to initialize repository at this location.') + self.destroy() + else: + self._rollback(cleanup=True) + formatted_required = format_file_size(required_free_space) + formatted_free = format_file_size(free_space) + raise self.InsufficientFreeSpaceError(formatted_required, formatted_free) + + def log_storage_quota(self): + if self.storage_quota: + logger.info('Storage quota: %s out of %s used.', + format_file_size(self.storage_quota_use), format_file_size(self.storage_quota)) + + def compact_segments(self): """Compact sparse segments by copying data into new segments """ if not self.compact: @@ -351,34 +652,48 @@ class Repository: index_transaction_id = self.get_index_transaction_id() segments = self.segments unused = [] # list of segments, that are not used anymore + logger = create_logger('borg.debug.compact_segments') - def complete_xfer(): - # complete the transfer (usually exactly when some target segment - # is full, or at the very end when everything is processed) + def complete_xfer(intermediate=True): + # complete the current transfer (when some target segment is full) nonlocal unused # commit the new, compact, used segments - self.io.write_commit() + segment = self.io.write_commit(intermediate=intermediate) + logger.debug('complete_xfer: wrote %scommit at segment %d', 'intermediate ' if intermediate else '', segment) # get rid of the old, sparse, unused segments. free space. for segment in unused: - assert self.segments.pop(segment) == 0 + logger.debug('complete_xfer: deleting unused segment %d', segment) + count = self.segments.pop(segment) + assert count == 0, 'Corrupted segment reference count - corrupted index or hints' self.io.delete_segment(segment) del self.compact[segment] unused = [] + logger.debug('compaction started.') + pi = ProgressIndicatorPercent(total=len(self.compact), msg='Compacting segments %3.0f%%', step=1, + msgid='repository.compact_segments') for segment, freeable_space in sorted(self.compact.items()): if not self.io.segment_exists(segment): + logger.warning('segment %d not found, but listed in compaction data', segment) del self.compact[segment] + pi.show() continue segment_size = self.io.segment_size(segment) if segment_size > 0.2 * self.max_segment_size and freeable_space < 0.15 * segment_size: - logger.debug('not compacting segment %d for later (only %d bytes are sparse)', - segment, freeable_space) + logger.debug('not compacting segment %d (only %d bytes are sparse)', segment, freeable_space) + pi.show() continue segments.setdefault(segment, 0) + logger.debug('compacting segment %d with usage count %d and %d freeable bytes', + segment, segments[segment], freeable_space) for tag, key, offset, data in self.io.iter_objects(segment, include_data=True): - if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset): + if tag == TAG_COMMIT: + continue + in_index = self.index.get(key) + is_index_object = in_index == (segment, offset) + if tag == TAG_PUT and is_index_object: try: - new_segment, offset = self.io.write_put(key, data, raise_full=save_space) + new_segment, offset = self.io.write_put(key, data, raise_full=True) except LoggedIO.SegmentFull: complete_xfer() new_segment, offset = self.io.write_put(key, data) @@ -386,22 +701,79 @@ class Repository: segments.setdefault(new_segment, 0) segments[new_segment] += 1 segments[segment] -= 1 - elif tag == TAG_DELETE: - if index_transaction_id is None or segment > index_transaction_id: + elif tag == TAG_PUT and not is_index_object: + # If this is a PUT shadowed by a later tag, then it will be gone when this segment is deleted after + # this loop. Therefore it is removed from the shadow index. + try: + self.shadow_index[key].remove(segment) + except (KeyError, ValueError): + pass + elif tag == TAG_DELETE and not in_index: + # If the shadow index doesn't contain this key, then we can't say if there's a shadowed older tag, + # therefore we do not drop the delete, but write it to a current segment. + shadowed_put_exists = key not in self.shadow_index or any( + # If the key is in the shadow index and there is any segment with an older PUT of this + # key, we have a shadowed put. + shadowed < segment for shadowed in self.shadow_index[key]) + delete_is_not_stable = index_transaction_id is None or segment > index_transaction_id + + if shadowed_put_exists or delete_is_not_stable: + # (introduced in 6425d16aa84be1eaaf88) + # This is needed to avoid object un-deletion if we crash between the commit and the deletion + # of old segments in complete_xfer(). + # + # However, this only happens if the crash also affects the FS to the effect that file deletions + # did not materialize consistently after journal recovery. If they always materialize in-order + # then this is not a problem, because the old segment containing a deleted object would be deleted + # before the segment containing the delete. + # + # Consider the following series of operations if we would not do this, ie. this entire if: + # would be removed. + # Columns are segments, lines are different keys (line 1 = some key, line 2 = some other key) + # Legend: P=TAG_PUT, D=TAG_DELETE, c=commit, i=index is written for latest commit + # + # Segment | 1 | 2 | 3 + # --------+-------+-----+------ + # Key 1 | P | D | + # Key 2 | P | | P + # commits | c i | c | c i + # --------+-------+-----+------ + # ^- compact_segments starts + # ^- complete_xfer commits, after that complete_xfer deletes + # segments 1 and 2 (and then the index would be written). + # + # Now we crash. But only segment 2 gets deleted, while segment 1 is still around. Now key 1 + # is suddenly undeleted (because the delete in segment 2 is now missing). + # Again, note the requirement here. We delete these in the correct order that this doesn't happen, + # and only if the FS materialization of these deletes is reordered or parts dropped this can happen. + # In this case it doesn't cause outright corruption, 'just' an index count mismatch, which will be + # fixed by borg-check --repair. + # + # Note that in this check the index state is the proxy for a "most definitely settled" repository state, + # ie. the assumption is that *all* operations on segments <= index state are completed and stable. try: - self.io.write_delete(key, raise_full=save_space) + new_segment, size = self.io.write_delete(key, raise_full=True) except LoggedIO.SegmentFull: complete_xfer() - self.io.write_delete(key) - assert segments[segment] == 0 + new_segment, size = self.io.write_delete(key) + self.compact[new_segment] += size + segments.setdefault(new_segment, 0) + assert segments[segment] == 0, 'Corrupted segment reference count - corrupted index or hints' unused.append(segment) - complete_xfer() + pi.show() + pi.finish() + complete_xfer(intermediate=False) + logger.debug('compaction completed.') def replay_segments(self, index_transaction_id, segments_transaction_id): + # fake an old client, so that in case we do not have an exclusive lock yet, prepare_txn will upgrade the lock: + remember_exclusive = self.exclusive + self.exclusive = None self.prepare_txn(index_transaction_id, do_cleanup=False) try: segment_count = sum(1 for _ in self.io.segment_iterator()) - pi = ProgressIndicatorPercent(total=segment_count, msg="Replaying segments %3.0f%%", same_line=True) + pi = ProgressIndicatorPercent(total=segment_count, msg='Replaying segments %3.0f%%', + msgid='repository.replay_segments') for i, (segment, filename) in enumerate(self.io.segment_iterator()): pi.show(i) if index_transaction_id is not None and segment <= index_transaction_id: @@ -413,6 +785,7 @@ class Repository: pi.finish() self.write_index() finally: + self.exclusive = remember_exclusive self.rollback() def _update_index(self, segment, objects, report=None): @@ -429,6 +802,7 @@ class Repository: pass self.index[key] = segment, offset self.segments[segment] += 1 + self.storage_quota_use += size elif tag == TAG_DELETE: try: # if the deleted PUT is not in the index, there is nothing to clean up @@ -441,6 +815,7 @@ class Repository: # is already gone, then it was already compacted. self.segments[s] -= 1 size = self.io.read(s, offset, key, read_data=False) + self.storage_quota_use -= size self.compact[s] += size elif tag == TAG_COMMIT: continue @@ -488,19 +863,30 @@ class Repository: try: transaction_id = self.get_transaction_id() current_index = self.open_index(transaction_id) - except Exception: + logger.debug('Read committed index of transaction %d', transaction_id) + except Exception as exc: transaction_id = self.io.get_segments_transaction_id() current_index = None + logger.debug('Failed to read committed index (%s)', exc) if transaction_id is None: + logger.debug('No segments transaction found') transaction_id = self.get_index_transaction_id() if transaction_id is None: + logger.debug('No index transaction found, trying latest segment') transaction_id = self.io.get_latest_segment() + if transaction_id is None: + report_error('This repository contains no valid data.') + return False if repair: self.io.cleanup(transaction_id) segments_transaction_id = self.io.get_segments_transaction_id() + logger.debug('Segment transaction is %s', segments_transaction_id) + logger.debug('Determined transaction is %s', transaction_id) self.prepare_txn(None) # self.index, self.compact, self.segments all empty now! segment_count = sum(1 for _ in self.io.segment_iterator()) - pi = ProgressIndicatorPercent(total=segment_count, msg="Checking segments %3.1f%%", step=0.1, same_line=True) + logger.debug('Found %d segments', segment_count) + pi = ProgressIndicatorPercent(total=segment_count, msg='Checking segments %3.1f%%', step=0.1, + msgid='repository.check') for i, (segment, filename) in enumerate(self.io.segment_iterator()): pi.show(i) if segment > transaction_id: @@ -521,17 +907,34 @@ class Repository: report_error('Adding commit tag to segment {}'.format(transaction_id)) self.io.segment = transaction_id + 1 self.io.write_commit() + logger.info('Starting repository index check') if current_index and not repair: # current_index = "as found on disk" # self.index = "as rebuilt in-memory from segments" if len(current_index) != len(self.index): - report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index))) + report_error('Index object count mismatch.') + logger.error('committed index: %d objects', len(current_index)) + logger.error('rebuilt index: %d objects', len(self.index)) + + line_format = '%-64s %-16s %-16s' + not_found = '' + logger.warning(line_format, 'ID', 'rebuilt index', 'committed index') + for key, value in self.index.iteritems(): + current_value = current_index.get(key, not_found) + if current_value != value: + logger.warning(line_format, bin_to_hex(key), value, current_value) + for key, current_value in current_index.iteritems(): + if key in self.index: + continue + value = self.index.get(key, not_found) + if current_value != value: + logger.warning(line_format, bin_to_hex(key), value, current_value) elif current_index: for key, value in self.index.iteritems(): if current_index.get(key, (-1, -1)) != value: report_error('Index mismatch for key {}. {} != {}'.format(key, value, current_index.get(key, (-1, -1)))) if repair: - self.compact_segments(save_space=save_space) + self.compact_segments() self.write_index() self.rollback() if error_found: @@ -543,11 +946,18 @@ class Repository: logger.info('Completed repository check, no problems found.') return not error_found or repair - def rollback(self): + def _rollback(self, *, cleanup): """ """ + if cleanup: + self.io.cleanup(self.io.get_segments_transaction_id()) self.index = None self._active_txn = False + self.transaction_doomed = None + + def rollback(self): + # note: when used in remote mode, this is time limited, see RemoteRepository.shutdown_time. + self._rollback(cleanup=False) def __len__(self): if not self.index: @@ -560,24 +970,75 @@ class Repository: return id in self.index def list(self, limit=None, marker=None): + """ + list IDs starting from after id - in index (pseudo-random) order. + """ if not self.index: self.index = self.open_index(self.get_transaction_id()) return [id_ for id_, _ in islice(self.index.iteritems(marker=marker), limit)] - def get(self, id_): + def scan(self, limit=None, marker=None): + """ + list IDs starting from after id - in on-disk order, so that a client + fetching data in this order does linear reads and reuses stuff from disk cache. + + We rely on repository.check() has run already (either now or some time before) and that: + + - if we are called from a borg check command, self.index is a valid, fresh, in-sync repo index. + - if we are called from elsewhere, either self.index or the on-disk index is valid and in-sync. + - the repository segments are valid (no CRC errors). + if we encounter CRC errors in segment entry headers, rest of segment is skipped. + """ + if limit is not None and limit < 1: + raise ValueError('please use limit > 0 or limit = None') + if not self.index: + transaction_id = self.get_transaction_id() + self.index = self.open_index(transaction_id) + at_start = marker is None + # smallest valid seg is 0, smallest valid offs is 8 + start_segment, start_offset = (0, 0) if at_start else self.index[marker] + result = [] + for segment, filename in self.io.segment_iterator(start_segment): + obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False, include_data=False) + while True: + try: + tag, id, offset, size = next(obj_iterator) + except (StopIteration, IntegrityError): + # either end-of-segment or an error - we can not seek to objects at + # higher offsets than one that has an error in the header fields + break + if start_offset > 0: + # we are using a marker and the marker points to the last object we have already + # returned in the previous scan() call - thus, we need to skip this one object. + # also, for the next segment, we need to start at offset 0. + start_offset = 0 + continue + if tag == TAG_PUT and (segment, offset) == self.index.get(id): + # we have found an existing and current object + result.append(id) + if len(result) == limit: + return result + return result + + def get(self, id): if not self.index: self.index = self.open_index(self.get_transaction_id()) try: - segment, offset = self.index[id_] - return self.io.read(segment, offset, id_) + segment, offset = self.index[id] + return self.io.read(segment, offset, id) except KeyError: - raise self.ObjectNotFound(id_, self.path) from None + raise self.ObjectNotFound(id, self.path) from None def get_many(self, ids, is_preloaded=False): for id_ in ids: yield self.get(id_) def put(self, id, data, wait=True): + """put a repo object + + Note: when doing calls with wait=False this gets async and caller must + deal with async results / exceptions later. + """ if not self._active_txn: self.prepare_txn(self.get_transaction_id()) try: @@ -587,29 +1048,53 @@ class Repository: else: self.segments[segment] -= 1 size = self.io.read(segment, offset, id, read_data=False) + self.storage_quota_use -= size self.compact[segment] += size segment, size = self.io.write_delete(id) self.compact[segment] += size self.segments.setdefault(segment, 0) segment, offset = self.io.write_put(id, data) + self.storage_quota_use += len(data) + self.io.put_header_fmt.size self.segments.setdefault(segment, 0) self.segments[segment] += 1 self.index[id] = segment, offset + if self.storage_quota and self.storage_quota_use > self.storage_quota: + self.transaction_doomed = self.StorageQuotaExceeded( + format_file_size(self.storage_quota), format_file_size(self.storage_quota_use)) + raise self.transaction_doomed def delete(self, id, wait=True): + """delete a repo object + + Note: when doing calls with wait=False this gets async and caller must + deal with async results / exceptions later. + """ if not self._active_txn: self.prepare_txn(self.get_transaction_id()) try: segment, offset = self.index.pop(id) except KeyError: raise self.ObjectNotFound(id, self.path) from None + self.shadow_index.setdefault(id, []).append(segment) self.segments[segment] -= 1 size = self.io.read(segment, offset, id, read_data=False) + self.storage_quota_use -= size self.compact[segment] += size segment, size = self.io.write_delete(id) self.compact[segment] += size self.segments.setdefault(segment, 0) + def async_response(self, wait=True): + """Get one async result (only applies to remote repositories). + + async commands (== calls with wait=False, e.g. delete and put) have no results, + but may raise exceptions. These async exceptions must get collected later via + async_response() calls. Repeat the call until it returns None. + The previous calls might either return one (non-None) result or raise an exception. + If wait=True is given and there are outstanding responses, it will wait for them + to arrive. With wait=False, it will only return already received responses. + """ + def preload(self, ids): """Preload objects (only applies to remote repositories) """ @@ -648,18 +1133,30 @@ class LoggedIO: self.fds = None # Just to make sure we're disabled def close_fd(self, fd): - if hasattr(os, 'posix_fadvise'): # only on UNIX - os.posix_fadvise(fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) + safe_fadvise(fd.fileno(), 0, 0, 'DONTNEED') fd.close() - def segment_iterator(self, reverse=False): + def segment_iterator(self, segment=None, reverse=False): + if segment is None: + segment = 0 if not reverse else 2 ** 32 - 1 data_path = os.path.join(self.path, 'data') - dirs = sorted((dir for dir in os.listdir(data_path) if dir.isdigit()), key=int, reverse=reverse) + start_segment_dir = segment // self.segments_per_dir + dirs = os.listdir(data_path) + if not reverse: + dirs = [dir for dir in dirs if dir.isdigit() and int(dir) >= start_segment_dir] + else: + dirs = [dir for dir in dirs if dir.isdigit() and int(dir) <= start_segment_dir] + dirs = sorted(dirs, key=int, reverse=reverse) for dir in dirs: filenames = os.listdir(os.path.join(data_path, dir)) - sorted_filenames = sorted((filename for filename in filenames - if filename.isdigit()), key=int, reverse=reverse) - for filename in sorted_filenames: + if not reverse: + filenames = [filename for filename in filenames if filename.isdigit() and int(filename) >= segment] + else: + filenames = [filename for filename in filenames if filename.isdigit() and int(filename) <= segment] + filenames = sorted(filenames, key=int, reverse=reverse) + for filename in filenames: + # Note: Do not filter out logically deleted segments (see "File system interaction" above), + # since this is used by cleanup and txn state detection as well. yield int(filename), os.path.join(data_path, dir, filename) def get_latest_segment(self): @@ -679,11 +1176,17 @@ class LoggedIO: """Delete segment files left by aborted transactions """ self.segment = transaction_id + 1 + count = 0 for segment, filename in self.segment_iterator(reverse=True): if segment > transaction_id: - os.unlink(filename) + if segment in self.fds: + del self.fds[segment] + truncate_and_unlink(filename) + count += 1 else: break + logger.debug('Cleaned up %d uncommitted segment files (== everything after segment %d).', + count, transaction_id) def is_committed_segment(self, segment): """Check if segment ends with a COMMIT_TAG tag @@ -731,9 +1234,15 @@ class LoggedIO: if not os.path.exists(dirname): os.mkdir(dirname) sync_dir(os.path.join(self.path, 'data')) - self._write_fd = SyncFile(self.segment_filename(self.segment)) + self._write_fd = SyncFile(self.segment_filename(self.segment), binary=True) self._write_fd.write(MAGIC) self.offset = MAGIC_LEN + if self.segment in self.fds: + # we may have a cached fd for a segment file we already deleted and + # we are writing now a new segment file to same file name. get rid of + # of the cached fd that still refers to the old file, so it will later + # get repopulated (on demand) with a fd that refers to the new file. + del self.fds[self.segment] return self._write_fd def get_fd(self, segment): @@ -745,27 +1254,36 @@ class LoggedIO: return fd def close_segment(self): - if self._write_fd: + # set self._write_fd to None early to guard against reentry from error handling code paths: + fd, self._write_fd = self._write_fd, None + if fd is not None: self.segment += 1 self.offset = 0 - self._write_fd.close() - self._write_fd = None + fd.close() def delete_segment(self, segment): if segment in self.fds: del self.fds[segment] try: - os.unlink(self.segment_filename(segment)) + truncate_and_unlink(self.segment_filename(segment)) except FileNotFoundError: pass def segment_exists(self, segment): - return os.path.exists(self.segment_filename(segment)) + filename = self.segment_filename(segment) + # When deleting segments, they are first truncated. If truncate(2) and unlink(2) are split + # across FS transactions, then logically deleted segments will show up as truncated. + return os.path.exists(filename) and os.path.getsize(filename) def segment_size(self, segment): return os.path.getsize(self.segment_filename(segment)) - def iter_objects(self, segment, include_data=False, read_data=True): + def get_segment_magic(self, segment): + fd = self.get_fd(segment) + fd.seek(0) + return fd.read(MAGIC_LEN) + + def iter_objects(self, segment, offset=0, include_data=False, read_data=True): """ Return object iterator for *segment*. @@ -775,10 +1293,14 @@ class LoggedIO: The iterator returns four-tuples of (tag, key, offset, data|size). """ fd = self.get_fd(segment) - fd.seek(0) - if fd.read(MAGIC_LEN) != MAGIC: - raise IntegrityError('Invalid segment magic [segment {}, offset {}]'.format(segment, 0)) - offset = MAGIC_LEN + fd.seek(offset) + if offset == 0: + # we are touching this segment for the first time, check the MAGIC. + # Repository.scan() calls us with segment > 0 when it continues an ongoing iteration + # from a marker position - but then we have checked the magic before already. + if fd.read(MAGIC_LEN) != MAGIC: + raise IntegrityError('Invalid segment magic [segment {}, offset {}]'.format(segment, 0)) + offset = MAGIC_LEN header = fd.read(self.header_fmt.size) while header: size, tag, key, data = self._read(fd, self.header_fmt, header, segment, offset, @@ -789,27 +1311,38 @@ class LoggedIO: else: yield tag, key, offset, size offset += size + # we must get the fd via get_fd() here again as we yielded to our caller and it might + # have triggered closing of the fd we had before (e.g. by calling io.read() for + # different segment(s)). + # by calling get_fd() here again we also make our fd "recently used" so it likely + # does not get kicked out of self.fds LRUcache. + fd = self.get_fd(segment) + fd.seek(offset) header = fd.read(self.header_fmt.size) def recover_segment(self, segment, filename): + logger.info('attempting to recover ' + filename) if segment in self.fds: del self.fds[segment] - with open(filename, 'rb') as fd: - data = memoryview(fd.read()) - os.rename(filename, filename + '.beforerecover') - logger.info('attempting to recover ' + filename) - with open(filename, 'wb') as fd: - fd.write(MAGIC) - while len(data) >= self.header_fmt.size: - crc, size, tag = self.header_fmt.unpack(data[:self.header_fmt.size]) - if size < self.header_fmt.size or size > len(data): - data = data[1:] - continue - if crc32(data[4:size]) & 0xffffffff != crc: - data = data[1:] - continue - fd.write(data[:size]) - data = data[size:] + backup_filename = filename + '.beforerecover' + os.rename(filename, backup_filename) + with open(backup_filename, 'rb') as backup_fd: + # note: file must not be 0 size (windows can't create 0 size mapping) + with mmap.mmap(backup_fd.fileno(), 0, access=mmap.ACCESS_READ) as mm: + data = memoryview(mm) + with open(filename, 'wb') as fd: + fd.write(MAGIC) + while len(data) >= self.header_fmt.size: + crc, size, tag = self.header_fmt.unpack(data[:self.header_fmt.size]) + if size < self.header_fmt.size or size > len(data): + data = data[1:] + continue + if crc32(data[4:size]) & 0xffffffff != crc: + data = data[1:] + continue + fd.write(data[:size]) + data = data[size:] + data.release() def read(self, segment, offset, id, read_data=True): """ @@ -843,9 +1376,14 @@ class LoggedIO: key = None else: raise TypeError("_read called with unsupported format") - if size > MAX_OBJECT_SIZE or size < fmt.size: - raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format( - segment, offset)) + if size > MAX_OBJECT_SIZE: + # if you get this on an archive made with borg < 1.0.7 and millions of files and + # you need to restore it, you can disable this check by using "if False:" above. + raise IntegrityError('Invalid segment entry size {} - too big [segment {}, offset {}]'.format( + size, segment, offset)) + if size < fmt.size: + raise IntegrityError('Invalid segment entry size {} - too small [segment {}, offset {}]'.format( + size, segment, offset)) length = size - fmt.size if read_data: data = fd.read(length) @@ -876,8 +1414,12 @@ class LoggedIO: return size, tag, key, data def write_put(self, id, data, raise_full=False): + data_size = len(data) + if data_size > MAX_DATA_SIZE: + # this would push the segment entry size beyond MAX_OBJECT_SIZE. + raise IntegrityError('More than allowed put data [{} > {}]'.format(data_size, MAX_DATA_SIZE)) fd = self.get_write_fd(raise_full=raise_full) - size = len(data) + self.put_header_fmt.size + size = data_size + self.put_header_fmt.size offset = self.offset header = self.header_no_crc_fmt.pack(size, TAG_PUT) crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff) @@ -893,10 +1435,20 @@ class LoggedIO: self.offset += self.put_header_fmt.size return self.segment, self.put_header_fmt.size - def write_commit(self): - self.close_segment() - fd = self.get_write_fd() + def write_commit(self, intermediate=False): + if intermediate: + # Intermediate commits go directly into the current segment - this makes checking their validity more + # expensive, but is faster and reduces clobber. + fd = self.get_write_fd() + fd.sync() + else: + self.close_segment() + fd = self.get_write_fd() header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT) crc = self.crc_fmt.pack(crc32(header) & 0xffffffff) fd.write(b''.join((crc, header))) self.close_segment() + return self.segment - 1 # close_segment() increments it + + +assert LoggedIO.put_header_fmt.size == 41 # see constants.MAX_OBJECT_SIZE diff --git a/src/borg/selftest.py b/src/borg/selftest.py index 139ed7e8..e8605f7c 100644 --- a/src/borg/selftest.py +++ b/src/borg/selftest.py @@ -30,7 +30,7 @@ SELFTEST_CASES = [ ChunkerTestCase, ] -SELFTEST_COUNT = 29 +SELFTEST_COUNT = 37 class SelfTestResult(TestResult): diff --git a/src/borg/shellpattern.py b/src/borg/shellpattern.py index e0ead38c..7c3d76b2 100644 --- a/src/borg/shellpattern.py +++ b/src/borg/shellpattern.py @@ -2,7 +2,7 @@ import os import re -def translate(pat): +def translate(pat, match_end=r"\Z"): """Translate a shell-style pattern to a regular expression. The pattern may include ``**`` ( stands for the platform-specific path separator; "/" on POSIX systems) for @@ -10,9 +10,12 @@ def translate(pat): any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal character "?"). + Using match_end=regex one can give a regular expression that is used to match after the regex that is generated from + the pattern. The default is to match the end of the string. + This function is derived from the "fnmatch" module distributed with the Python standard library. - Copyright (C) 2001-2016 Python Software Foundation. All rights reserved. + Copyright (C) 2001-2017 Python Software Foundation. All rights reserved. TODO: support {alt1,alt2} shell-style alternatives @@ -59,4 +62,4 @@ def translate(pat): else: res += re.escape(c) - return res + r"\Z(?ms)" + return res + match_end + "(?ms)" diff --git a/src/borg/testsuite/__init__.py b/src/borg/testsuite/__init__.py index 6f64ed02..2be65276 100644 --- a/src/borg/testsuite/__init__.py +++ b/src/borg/testsuite/__init__.py @@ -1,5 +1,6 @@ from contextlib import contextmanager import filecmp +import functools import os import sys if sys.platform != 'win32': @@ -8,10 +9,12 @@ import stat import sysconfig import tempfile import time +import uuid import unittest from ..xattr import get_all from ..platform import get_flags +from ..helpers import umount from .. import platform # Note: this is used by borg.selftest, do not use or import py.test functionality here. @@ -29,13 +32,11 @@ except ImportError: raises = None has_lchflags = hasattr(os, 'lchflags') or sys.platform.startswith('linux') -no_lchlfags_because = '' if has_lchflags else '(not supported on this platform)' try: with tempfile.NamedTemporaryFile() as file: platform.set_flags(file.name, stat.UF_NODUMP) except OSError: has_lchflags = False - no_lchlfags_because = '(the file system at %s does not support flags)' % tempfile.gettempdir() try: import llfuse @@ -55,6 +56,78 @@ if sys.platform.startswith('netbsd'): st_mtime_ns_round = -4 # only >1 microsecond resolution here? +@contextmanager +def unopened_tempfile(): + with tempfile.TemporaryDirectory() as tempdir: + yield os.path.join(tempdir, "file") + + +@functools.lru_cache() +def are_symlinks_supported(): + with unopened_tempfile() as filepath: + try: + os.symlink('somewhere', filepath) + if os.stat(filepath, follow_symlinks=False) and os.readlink(filepath) == 'somewhere': + return True + except OSError: + pass + return False + + +@functools.lru_cache() +def are_hardlinks_supported(): + with unopened_tempfile() as file1path, unopened_tempfile() as file2path: + open(file1path, 'w').close() + try: + os.link(file1path, file2path) + stat1 = os.stat(file1path) + stat2 = os.stat(file2path) + if stat1.st_nlink == stat2.st_nlink == 2 and stat1.st_ino == stat2.st_ino: + return True + except OSError: + pass + return False + + +@functools.lru_cache() +def are_fifos_supported(): + with unopened_tempfile() as filepath: + try: + os.mkfifo(filepath) + return True + except OSError: + return False + + +@functools.lru_cache() +def is_utime_fully_supported(): + if sys.platform == 'win32': + return False + with unopened_tempfile() as filepath: + # Some filesystems (such as SSHFS) don't support utime on symlinks + if are_symlinks_supported(): + os.symlink('something', filepath) + else: + open(filepath, 'w').close() + try: + os.utime(filepath, (1000, 2000), follow_symlinks=False) + new_stats = os.stat(filepath, follow_symlinks=False) + if new_stats.st_atime == 1000 and new_stats.st_mtime == 2000: + return True + except OSError as err: + pass + return False + + +def no_selinux(x): + # selinux fails our FUSE tests, thus ignore selinux xattrs + SELINUX_KEY = 'security.selinux' + if isinstance(x, dict): + return {k: v for k, v in x.items() if k != SELINUX_KEY} + if isinstance(x, list): + return [k for k in x if k != SELINUX_KEY] + + class BaseTestCase(unittest.TestCase): """ """ @@ -75,11 +148,11 @@ class BaseTestCase(unittest.TestCase): yield self.assert_true(os.path.exists(path), '{} should exist'.format(path)) - def assert_dirs_equal(self, dir1, dir2): + def assert_dirs_equal(self, dir1, dir2, **kwargs): diff = filecmp.dircmp(dir1, dir2) - self._assert_dirs_equal_cmp(diff) + self._assert_dirs_equal_cmp(diff, **kwargs) - def _assert_dirs_equal_cmp(self, diff): + def _assert_dirs_equal_cmp(self, diff, ignore_bsdflags=False, ignore_xattrs=False, ignore_ns=False): self.assert_equal(diff.left_only, []) self.assert_equal(diff.right_only, []) self.assert_equal(diff.diff_files, []) @@ -87,50 +160,54 @@ class BaseTestCase(unittest.TestCase): for filename in diff.common: path1 = os.path.join(diff.left, filename) path2 = os.path.join(diff.right, filename) - s1 = os.lstat(path1) - s2 = os.lstat(path2) + s1 = os.stat(path1, follow_symlinks=False) + s2 = os.stat(path2, follow_symlinks=False) # Assume path2 is on FUSE if st_dev is different fuse = s1.st_dev != s2.st_dev - attrs = ['st_mode', 'st_uid', 'st_gid', 'st_rdev'] + attrs = ['st_uid', 'st_gid', 'st_rdev'] if not fuse or not os.path.isdir(path1): - # dir nlink is always 1 on our fuse filesystem + # dir nlink is always 1 on our FUSE filesystem attrs.append('st_nlink') d1 = [filename] + [getattr(s1, a) for a in attrs] d2 = [filename] + [getattr(s2, a) for a in attrs] - d1.append(get_flags(path1, s1)) - d2.append(get_flags(path2, s2)) + d1.insert(1, oct(s1.st_mode)) + d2.insert(1, oct(s2.st_mode)) + if not ignore_bsdflags: + d1.append(get_flags(path1, s1)) + d2.append(get_flags(path2, s2)) # ignore st_rdev if file is not a block/char device, fixes #203 - if not stat.S_ISCHR(d1[1]) and not stat.S_ISBLK(d1[1]): + if not stat.S_ISCHR(s1.st_mode) and not stat.S_ISBLK(s1.st_mode): d1[4] = None - if not stat.S_ISCHR(d2[1]) and not stat.S_ISBLK(d2[1]): + if not stat.S_ISCHR(s2.st_mode) and not stat.S_ISBLK(s2.st_mode): d2[4] = None - # Older versions of llfuse do not support ns precision properly - if fuse and not have_fuse_mtime_ns: - d1.append(round(s1.st_mtime_ns, -4)) - d2.append(round(s2.st_mtime_ns, -4)) - else: - d1.append(round(s1.st_mtime_ns, st_mtime_ns_round)) - d2.append(round(s2.st_mtime_ns, st_mtime_ns_round)) - d1.append(get_all(path1, follow_symlinks=False)) - d2.append(get_all(path2, follow_symlinks=False)) + # If utime isn't fully supported, borg can't set mtime. + # Therefore, we shouldn't test it in that case. + if is_utime_fully_supported(): + # Older versions of llfuse do not support ns precision properly + if ignore_ns: + d1.append(int(s1.st_mtime_ns / 1e9)) + d2.append(int(s2.st_mtime_ns / 1e9)) + elif fuse and not have_fuse_mtime_ns: + d1.append(round(s1.st_mtime_ns, -4)) + d2.append(round(s2.st_mtime_ns, -4)) + else: + d1.append(round(s1.st_mtime_ns, st_mtime_ns_round)) + d2.append(round(s2.st_mtime_ns, st_mtime_ns_round)) + if not ignore_xattrs: + d1.append(no_selinux(get_all(path1, follow_symlinks=False))) + d2.append(no_selinux(get_all(path2, follow_symlinks=False))) self.assert_equal(d1, d2) for sub_diff in diff.subdirs.values(): - self._assert_dirs_equal_cmp(sub_diff) + self._assert_dirs_equal_cmp(sub_diff, ignore_bsdflags=ignore_bsdflags, ignore_xattrs=ignore_xattrs, ignore_ns=ignore_ns) @contextmanager - def fuse_mount(self, location, mountpoint, mount_options=None): + def fuse_mount(self, location, mountpoint, *options): os.mkdir(mountpoint) - args = ['mount', location, mountpoint] - if mount_options: - args += '-o', mount_options + args = ['mount', location, mountpoint] + list(options) self.cmd(*args, fork=True) self.wait_for_mount(mountpoint) yield - if sys.platform.startswith('linux'): - cmd = 'fusermount -u %s' % mountpoint - else: - cmd = 'umount %s' % mountpoint - os.system(cmd) + umount(mountpoint) os.rmdir(mountpoint) # Give the daemon some time to exit time.sleep(.2) diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index 527f7bde..bc113352 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -1,17 +1,17 @@ -import os +from collections import OrderedDict from datetime import datetime, timezone from io import StringIO from unittest.mock import Mock -import pytest import msgpack +import pytest +from . import BaseTestCase +from ..crypto.key import PlaintextKey from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics from ..archive import BackupOSError, backup_io, backup_io_iter -from ..item import Item -from ..key import PlaintextKey from ..helpers import Manifest -from . import BaseTestCase +from ..item import Item, ArchiveItem @pytest.fixture() @@ -30,8 +30,8 @@ def test_stats_basic(stats): assert stats.usize == 10 -def tests_stats_progress(stats, columns=80): - os.environ['COLUMNS'] = str(columns) +def tests_stats_progress(stats, monkeypatch, columns=80): + monkeypatch.setenv('COLUMNS', str(columns)) out = StringIO() stats.show_progress(stream=out) s = '20 B O 10 B C 10 B D 0 N ' @@ -53,7 +53,6 @@ def tests_stats_progress(stats, columns=80): def test_stats_format(stats): assert str(stats) == """\ - Original size Compressed size Deduplicated size This archive: 20 B 10 B 10 B""" s = "{0.osize_fmt}".format(stats) assert s == "20 B" @@ -63,12 +62,17 @@ This archive: 20 B 10 B 10 B"" class MockCache: + class MockRepo: + def async_response(self, wait=True): + pass + def __init__(self): self.objects = {} + self.repository = self.MockRepo() - def add_chunk(self, id, chunk, stats=None): - self.objects[id] = chunk.data - return id, len(chunk.data), len(chunk.data) + def add_chunk(self, id, chunk, stats=None, wait=True): + self.objects[id] = chunk + return id, len(chunk), len(chunk) class ArchiveTimestampTestCase(BaseTestCase): @@ -78,7 +82,7 @@ class ArchiveTimestampTestCase(BaseTestCase): key = PlaintextKey(repository) manifest = Manifest(repository, key) a = Archive(repository, key, manifest, 'test', create=True) - a.metadata = {b'time': isoformat} + a.metadata = ArchiveItem(time=isoformat) self.assert_equal(a.ts, expected) def test_with_microseconds(self): @@ -110,7 +114,7 @@ class ChunkBufferTestCase(BaseTestCase): self.assert_equal(data, [Item(internal_dict=d) for d in unpacker]) def test_partial(self): - big = "0123456789" * 10000 + big = "0123456789abcdefghijklmnopqrstuvwxyz" * 25000 data = [Item(path='full', source=big), Item(path='partial', source=big)] cache = MockCache() key = PlaintextKey(None) @@ -202,11 +206,15 @@ def test_invalid_msgpacked_item(packed, item_keys_serialized): assert not valid_msgpacked_dict(packed, item_keys_serialized) +# pytest-xdist requires always same order for the keys and dicts: +IK = sorted(list(ITEM_KEYS)) + + @pytest.mark.parametrize('packed', [msgpack.packb(o) for o in [ {b'path': b'/a/b/c'}, # small (different msgpack mapping type!) - dict((k, b'') for k in ITEM_KEYS), # as big (key count) as it gets - dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets + OrderedDict((k, b'') for k in IK), # as big (key count) as it gets + OrderedDict((k, b'x' * 1000) for k in IK), # as big (key count and volume) as it gets ]]) def test_valid_msgpacked_items(packed, item_keys_serialized): assert valid_msgpacked_dict(packed, item_keys_serialized) @@ -221,7 +229,7 @@ def test_key_length_msgpacked_items(): def test_backup_io(): with pytest.raises(BackupOSError): - with backup_io(): + with backup_io: raise OSError(123) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index c5ebd130..6d7d7024 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1,46 +1,68 @@ -from configparser import ConfigParser +import argparse import errno -import os -import inspect -from io import StringIO +import io +import json import logging +import os +import pstats import random +import shutil import socket import stat import subprocess import sys -import shutil import tempfile import time import unittest -from unittest.mock import patch +from binascii import unhexlify, b2a_base64 +from configparser import ConfigParser +from datetime import datetime +from datetime import timedelta from hashlib import sha256 +from io import BytesIO, StringIO +from unittest.mock import patch +import msgpack import pytest + try: import llfuse except ImportError: pass +import borg from .. import xattr, helpers, platform -from ..archive import Archive, ChunkBuffer, ArchiveRecreater, flags_noatime, flags_normal -from ..archiver import Archiver -from ..cache import Cache +from ..archive import Archive, ChunkBuffer, flags_noatime, flags_normal +from ..archiver import Archiver, parse_storage_quota +from ..cache import Cache, LocalCache from ..constants import * # NOQA -from ..crypto import bytes_to_long, num_aes_blocks -from ..helpers import Chunk, Manifest +from ..crypto.low_level import bytes_to_long, num_cipher_blocks +from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError +from ..crypto.keymanager import RepoIdMismatch, NotABorgKeyFile +from ..crypto.file_integrity import FileIntegrityError +from ..helpers import Location, get_security_dir +from ..helpers import Manifest, MandatoryFeatureUnsupported from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR from ..helpers import bin_to_hex -from ..key import KeyfileKeyBase +from ..helpers import MAX_S +from ..nanorst import RstToTextLazy, rst_to_terminal +from ..patterns import IECommand, PatternMatcher, parse_pattern +from ..item import Item, ItemDiff +from ..logger import setup_logging from ..remote import RemoteRepository, PathNotAllowed from ..repository import Repository from . import has_lchflags, has_llfuse -from . import BaseTestCase, changedir, environment_variable +from . import BaseTestCase, changedir, environment_variable, no_selinux +from . import are_symlinks_supported, are_hardlinks_supported, are_fifos_supported, is_utime_fully_supported +from .platform import fakeroot_detected +from .upgrader import attic_repo +from . import key + src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw): +def exec_cmd(*args, archiver=None, fork=False, exe=None, input=b'', binary_output=False, **kw): if fork: try: if exe is None: @@ -49,29 +71,55 @@ def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw): borg = (exe, ) elif not isinstance(exe, tuple): raise ValueError('exe must be None, a tuple or a str') - output = subprocess.check_output(borg + args, stderr=subprocess.STDOUT) + output = subprocess.check_output(borg + args, stderr=subprocess.STDOUT, input=input) ret = 0 except subprocess.CalledProcessError as e: output = e.output ret = e.returncode - return ret, os.fsdecode(output) + except SystemExit as e: # possibly raised by argparse + output = '' + ret = e.code + if binary_output: + return ret, output + else: + return ret, os.fsdecode(output) else: stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr try: - sys.stdin = StringIO() - sys.stdout = sys.stderr = output = StringIO() + sys.stdin = StringIO(input.decode()) + sys.stdin.buffer = BytesIO(input) + output = BytesIO() + # Always use utf-8 here, to simply .decode() below + output_text = sys.stdout = sys.stderr = io.TextIOWrapper(output, encoding='utf-8') if archiver is None: archiver = Archiver() archiver.prerun_checks = lambda *args: None archiver.exit_code = EXIT_SUCCESS - args = archiver.parse_args(list(args)) + helpers.exit_code = EXIT_SUCCESS + try: + args = archiver.parse_args(list(args)) + # argparse parsing may raise SystemExit when the command line is bad or + # actions that abort early (eg. --help) where given. Catch this and return + # the error code as-if we invoked a Borg binary. + except SystemExit as e: + output_text.flush() + return e.code, output.getvalue() if binary_output else output.getvalue().decode() ret = archiver.run(args) - return ret, output.getvalue() + output_text.flush() + return ret, output.getvalue() if binary_output else output.getvalue().decode() finally: sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr -# check if the binary "borg.exe" is available +def have_gnutar(): + if not shutil.which('tar'): + return False + popen = subprocess.Popen(['tar', '--version'], stdout=subprocess.PIPE) + stdout, stderr = popen.communicate() + return b'GNU tar' in stdout + + +# check if the binary "borg.exe" is available (for local testing a symlink to virtualenv/bin/borg should do) try: exec_cmd('help', exe='borg.exe', fork=True) BORG_EXES = ['python', 'binary', ] @@ -212,14 +260,18 @@ class ArchiverTestCaseBase(BaseTestCase): self.keys_path = os.path.join(self.tmpdir, 'keys') self.cache_path = os.path.join(self.tmpdir, 'cache') self.exclude_file_path = os.path.join(self.tmpdir, 'excludes') + self.patterns_file_path = os.path.join(self.tmpdir, 'patterns') os.environ['BORG_KEYS_DIR'] = self.keys_path os.environ['BORG_CACHE_DIR'] = self.cache_path os.mkdir(self.input_path) + os.chmod(self.input_path, 0o777) # avoid troubles with fakeroot / FUSE os.mkdir(self.output_path) os.mkdir(self.keys_path) os.mkdir(self.cache_path) with open(self.exclude_file_path, 'wb') as fd: fd.write(b'input/file2\n# A comment line, then a blank line\n\n') + with open(self.patterns_file_path, 'wb') as fd: + fd.write(b'+input/file_important\n- input/file*\n# A comment line, then a blank line\n\n') self._old_wd = os.getcwd() os.chdir(self.tmpdir) @@ -227,6 +279,9 @@ class ArchiverTestCaseBase(BaseTestCase): os.chdir(self._old_wd) # note: ignore_errors=True as workaround for issue #862 shutil.rmtree(self.tmpdir, ignore_errors=True) + # destroy logging configuration + logging.Logger.manager.loggerDict.clear() + setup_logging() def cmd(self, *args, **kw): exit_code = kw.pop('exit_code', 0) @@ -240,15 +295,18 @@ class ArchiverTestCaseBase(BaseTestCase): return output def create_src_archive(self, name): - self.cmd('create', self.repository_location + '::' + name, src_dir) + self.cmd('create', '--compression=lz4', self.repository_location + '::' + name, src_dir) def open_archive(self, name): - repository = Repository(self.repository_path) + repository = Repository(self.repository_path, exclusive=True) with repository: - manifest, key = Manifest.load(repository) + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) archive = Archive(repository, key, manifest, name) return archive, repository + def open_repository(self): + return Repository(self.repository_path, exclusive=True) + def create_regular_file(self, name, size=0, contents=None): filename = os.path.join(self.input_path, name) if not os.path.exists(os.path.dirname(filename)): @@ -262,13 +320,6 @@ class ArchiverTestCaseBase(BaseTestCase): """Create a minimal test case including all supported file types """ # File - self.create_regular_file('empty', size=0) - # next code line raises OverflowError on 32bit cpu (raspberry pi 2): - # 2600-01-01 > 2**64 ns - # os.utime('input/empty', (19880895600, 19880895600)) - # thus, we better test with something not that far in future: - # 2038-01-19 (1970 + 2^31 - 1 seconds) is the 32bit "deadline": - os.utime('input/empty', (2**31 - 1, 2**31 - 1)) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('flagfile', size=1024) # Directory @@ -276,13 +327,21 @@ class ArchiverTestCaseBase(BaseTestCase): # File mode os.chmod('input/file1', 0o4755) # Hard link - os.link(os.path.join(self.input_path, 'file1'), - os.path.join(self.input_path, 'hardlink')) + if are_hardlinks_supported(): + os.link(os.path.join(self.input_path, 'file1'), + os.path.join(self.input_path, 'hardlink')) # Symlink - if sys.platform != 'win32': + if are_symlinks_supported(): os.symlink('somewhere', os.path.join(self.input_path, 'link1')) - if xattr.is_enabled(self.input_path): - xattr.setxattr(os.path.join(self.input_path, 'file1'), 'user.foo', b'bar') + self.create_regular_file('fusexattr', size=1) + if not xattr.XATTR_FAKEROOT and xattr.is_enabled(self.input_path): + # ironically, due to the way how fakeroot works, comparing FUSE file xattrs to orig file xattrs + # will FAIL if fakeroot supports xattrs, thus we only set the xattr if XATTR_FAKEROOT is False. + # This is because fakeroot with xattr-support does not propagate xattrs of the underlying file + # into "fakeroot space". Because the xattrs exposed by borgfs are these of an underlying file + # (from fakeroots point of view) they are invisible to the test process inside the fakeroot. + xattr.setxattr(os.path.join(self.input_path, 'fusexattr'), 'user.foo', b'bar') + xattr.setxattr(os.path.join(self.input_path, 'fusexattr'), 'user.empty', b'') # XXX this always fails for me # ubuntu 14.04, on a TMP dir filesystem with user_xattr, using fakeroot # same for newer ubuntu and centos. @@ -290,40 +349,42 @@ class ArchiverTestCaseBase(BaseTestCase): # so that the test setup for all tests using it does not fail here always for others. # xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False) # FIFO node - if sys.platform != 'win32': + + if are_fifos_supported(): os.mkfifo(os.path.join(self.input_path, 'fifo1')) - if has_lchflags: - platform.set_flags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP) - try: - # Block device - os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20)) - # Char device - os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40)) - # File mode - os.chmod('input/dir2', 0o555) # if we take away write perms, we need root to remove contents - # File owner - os.chown('input/file1', 100, 200) # raises OSError invalid argument on cygwin - have_root = True # we have (fake)root - except PermissionError: - have_root = False - except OSError as e: - if e.errno != errno.EINVAL: - raise - have_root = False - return have_root - else: - return False + if has_lchflags: + platform.set_flags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP) + try: + # Block device + os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20)) + # Char device + os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40)) + # File mode + os.chmod('input/dir2', 0o555) # if we take away write perms, we need root to remove contents + # File owner + os.chown('input/file1', 100, 200) # raises OSError invalid argument on cygwin + have_root = True # we have (fake)root + except PermissionError: + have_root = False + except OSError as e: + # Note: ENOSYS "Function not implemented" happens as non-root on Win 10 Linux Subsystem. + if e.errno not in (errno.EINVAL, errno.ENOSYS): + raise + have_root = False + time.sleep(1) # "empty" must have newer timestamp than other files + self.create_regular_file('empty', size=0) + return have_root class ArchiverTestCase(ArchiverTestCaseBase): def test_basic_functionality(self): have_root = self.create_test_files() # fork required to test show-rc output - output = self.cmd('init', '--show-version', '--show-rc', self.repository_location, fork=True) + output = self.cmd('init', '--encryption=repokey', '--show-version', '--show-rc', self.repository_location, fork=True) self.assert_in('borgbackup version', output) self.assert_in('terminating with success status, rc 0', output) - self.cmd('create', self.repository_location + '::test', 'input') - output = self.cmd('create', '--stats', self.repository_location + '::test.2', 'input') + self.cmd('create', '--exclude-nodump', self.repository_location + '::test', 'input') + output = self.cmd('create', '--exclude-nodump', '--stats', self.repository_location + '::test.2', 'input') self.assert_in('Archive name: test.2', output) self.assert_in('This archive: ', output) with changedir('output'): @@ -338,12 +399,15 @@ class ArchiverTestCase(ArchiverTestCaseBase): 'input/dir2', 'input/dir2/file2', 'input/empty', - 'input/fifo1', 'input/file1', 'input/flagfile', - 'input/hardlink', - 'input/link1', ] + if are_fifos_supported(): + expected.append('input/fifo1') + if are_symlinks_supported(): + expected.append('input/link1') + if are_hardlinks_supported(): + expected.append('input/hardlink') if not have_root: # we could not create these device files without (fake)root expected.remove('input/bdev') @@ -360,11 +424,10 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in(name, list_output) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') - item_count = 3 if has_lchflags else 4 # one file is UF_NODUMP + item_count = 4 if has_lchflags else 5 # one file is UF_NODUMP self.assert_in('Number of files: %d' % item_count, info_output) shutil.rmtree(self.cache_path) - with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='yes'): - info_output2 = self.cmd('info', self.repository_location + '::test') + info_output2 = self.cmd('info', self.repository_location + '::test') def filter(output): # filter for interesting "info" output, ignore cache rebuilding related stuff @@ -382,29 +445,37 @@ class ArchiverTestCase(ArchiverTestCaseBase): @pytest.mark.skipif(sys.platform == 'win32', reason='Can not test on Windows.') def test_unix_socket(self): - self.cmd('init', self.repository_location) - sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - sock.bind(os.path.join(self.input_path, 'unix-socket')) + self.cmd('init', '--encryption=repokey', self.repository_location) + try: + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.bind(os.path.join(self.input_path, 'unix-socket')) + except PermissionError as err: + if err.errno == errno.EPERM: + pytest.skip('unix sockets disabled or not supported') + elif err.errno == errno.EACCES: + pytest.skip('permission denied to create unix sockets') self.cmd('create', self.repository_location + '::test', 'input') sock.close() with changedir('output'): self.cmd('extract', self.repository_location + '::test') assert not os.path.exists('input/unix-socket') - @pytest.mark.skipif(sys.platform == 'win32', reason='Can not test on Windows.') + @pytest.mark.skipif(not are_symlinks_supported(), reason='symlinks not supported') def test_symlink_extract(self): self.create_test_files() - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') with changedir('output'): self.cmd('extract', self.repository_location + '::test') assert os.readlink('input/link1') == 'somewhere' + @pytest.mark.skipif(not is_utime_fully_supported(), reason='cannot properly setup and execute test without utime') def test_atime(self): def has_noatime(some_file): atime_before = os.stat(some_file).st_atime_ns try: - os.close(os.open(some_file, flags_noatime)) + with open(os.open(some_file, flags_noatime)) as file: + file.read() except PermissionError: return False else: @@ -416,7 +487,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): atime, mtime = 123456780, 234567890 have_noatime = has_noatime('input/file1') os.utime('input/file1', (atime, mtime)) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') with changedir('output'): self.cmd('extract', self.repository_location + '::test') @@ -480,7 +551,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): if sparse_support: # we could create a sparse input file, so creating a backup of it and # extracting it again (as sparse) should also work: - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') with changedir(self.output_path): self.cmd('extract', '--sparse', self.repository_location + '::test') @@ -499,7 +570,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): filename = os.path.join(self.input_path, filename) with open(filename, 'wb'): pass - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') for filename in filenames: with changedir('output'): @@ -519,7 +590,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): if self.FORK_DEFAULT: self.cmd('create', self.repository_location + '::test.2', 'input', exit_code=EXIT_ERROR) else: - self.assert_raises(Cache.EncryptionMethodMismatch, lambda: self.cmd('create', self.repository_location + '::test.2', 'input')) + with pytest.raises(Cache.EncryptionMethodMismatch): + self.cmd('create', self.repository_location + '::test.2', 'input') def test_repository_swap_detection2(self): self.create_test_files() @@ -532,10 +604,119 @@ class ArchiverTestCase(ArchiverTestCaseBase): if self.FORK_DEFAULT: self.cmd('create', self.repository_location + '_encrypted::test.2', 'input', exit_code=EXIT_ERROR) else: - self.assert_raises(Cache.RepositoryAccessAborted, lambda: self.cmd('create', self.repository_location + '_encrypted::test.2', 'input')) + with pytest.raises(Cache.RepositoryAccessAborted): + self.cmd('create', self.repository_location + '_encrypted::test.2', 'input') + + def test_repository_swap_detection_no_cache(self): + self.create_test_files() + os.environ['BORG_PASSPHRASE'] = 'passphrase' + self.cmd('init', '--encryption=repokey', self.repository_location) + repository_id = self._extract_repository_id(self.repository_path) + self.cmd('create', self.repository_location + '::test', 'input') + shutil.rmtree(self.repository_path) + self.cmd('init', '--encryption=none', self.repository_location) + self._set_repository_id(self.repository_path, repository_id) + self.assert_equal(repository_id, self._extract_repository_id(self.repository_path)) + self.cmd('delete', '--cache-only', self.repository_location) + if self.FORK_DEFAULT: + self.cmd('create', self.repository_location + '::test.2', 'input', exit_code=EXIT_ERROR) + else: + with pytest.raises(Cache.EncryptionMethodMismatch): + self.cmd('create', self.repository_location + '::test.2', 'input') + + def test_repository_swap_detection2_no_cache(self): + self.create_test_files() + self.cmd('init', '--encryption=none', self.repository_location + '_unencrypted') + os.environ['BORG_PASSPHRASE'] = 'passphrase' + self.cmd('init', '--encryption=repokey', self.repository_location + '_encrypted') + self.cmd('create', self.repository_location + '_encrypted::test', 'input') + self.cmd('delete', '--cache-only', self.repository_location + '_unencrypted') + self.cmd('delete', '--cache-only', self.repository_location + '_encrypted') + shutil.rmtree(self.repository_path + '_encrypted') + os.rename(self.repository_path + '_unencrypted', self.repository_path + '_encrypted') + if self.FORK_DEFAULT: + self.cmd('create', self.repository_location + '_encrypted::test.2', 'input', exit_code=EXIT_ERROR) + else: + with pytest.raises(Cache.RepositoryAccessAborted): + self.cmd('create', self.repository_location + '_encrypted::test.2', 'input') + + def test_repository_swap_detection_repokey_blank_passphrase(self): + # Check that a repokey repo with a blank passphrase is considered like a plaintext repo. + self.create_test_files() + # User initializes her repository with her passphrase + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + # Attacker replaces it with her own repository, which is encrypted but has no passphrase set + shutil.rmtree(self.repository_path) + with environment_variable(BORG_PASSPHRASE=''): + self.cmd('init', '--encryption=repokey', self.repository_location) + # Delete cache & security database, AKA switch to user perspective + self.cmd('delete', '--cache-only', self.repository_location) + repository_id = bin_to_hex(self._extract_repository_id(self.repository_path)) + shutil.rmtree(get_security_dir(repository_id)) + with environment_variable(BORG_PASSPHRASE=None): + # This is the part were the user would be tricked, e.g. she assumes that BORG_PASSPHRASE + # is set, while it isn't. Previously this raised no warning, + # since the repository is, technically, encrypted. + if self.FORK_DEFAULT: + self.cmd('create', self.repository_location + '::test.2', 'input', exit_code=EXIT_ERROR) + else: + with pytest.raises(Cache.CacheInitAbortedError): + self.cmd('create', self.repository_location + '::test.2', 'input') + + def test_repository_move(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + repository_id = bin_to_hex(self._extract_repository_id(self.repository_path)) + os.rename(self.repository_path, self.repository_path + '_new') + with environment_variable(BORG_RELOCATED_REPO_ACCESS_IS_OK='yes'): + self.cmd('info', self.repository_location + '_new') + security_dir = get_security_dir(repository_id) + with open(os.path.join(security_dir, 'location')) as fd: + location = fd.read() + assert location == Location(self.repository_location + '_new').canonical_path() + # Needs no confirmation anymore + self.cmd('info', self.repository_location + '_new') + shutil.rmtree(self.cache_path) + self.cmd('info', self.repository_location + '_new') + shutil.rmtree(security_dir) + self.cmd('info', self.repository_location + '_new') + for file in ('location', 'key-type', 'manifest-timestamp'): + assert os.path.exists(os.path.join(security_dir, file)) + + def test_security_dir_compat(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + repository_id = bin_to_hex(self._extract_repository_id(self.repository_path)) + security_dir = get_security_dir(repository_id) + with open(os.path.join(security_dir, 'location'), 'w') as fd: + fd.write('something outdated') + # This is fine, because the cache still has the correct information. security_dir and cache can disagree + # if older versions are used to confirm a renamed repository. + self.cmd('info', self.repository_location) + + def test_unknown_unencrypted(self): + self.cmd('init', '--encryption=none', self.repository_location) + repository_id = bin_to_hex(self._extract_repository_id(self.repository_path)) + security_dir = get_security_dir(repository_id) + # Ok: repository is known + self.cmd('info', self.repository_location) + + # Ok: repository is still known (through security_dir) + shutil.rmtree(self.cache_path) + self.cmd('info', self.repository_location) + + # Needs confirmation: cache and security dir both gone (eg. another host or rm -rf ~) + shutil.rmtree(self.cache_path) + shutil.rmtree(security_dir) + if self.FORK_DEFAULT: + self.cmd('info', self.repository_location, exit_code=EXIT_ERROR) + else: + with pytest.raises(Cache.CacheInitAbortedError): + self.cmd('info', self.repository_location) + with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='yes'): + self.cmd('info', self.repository_location) def test_strip_components(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('dir/file') self.cmd('create', self.repository_location + '::test', 'input') with changedir('output'): @@ -564,9 +745,12 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.link(os.path.join(self.input_path, 'dir1/source2'), os.path.join(self.input_path, 'dir1/aaaa')) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') + requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported') + + @requires_hardlinks def test_strip_components_links(self): self._extract_hardlinks_setup() with changedir('output'): @@ -579,6 +763,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('extract', self.repository_location + '::test') assert os.stat('input/dir1/hardlink').st_nlink == 4 + @requires_hardlinks def test_extract_hardlinks(self): self._extract_hardlinks_setup() with changedir('output'): @@ -592,7 +777,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): assert os.stat('input/dir1/hardlink').st_nlink == 4 def test_extract_include_exclude(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('file2', size=1024 * 80) self.create_regular_file('file3', size=1024 * 80) @@ -609,7 +794,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3']) def test_extract_include_exclude_regex(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('file2', size=1024 * 80) self.create_regular_file('file3', size=1024 * 80) @@ -643,7 +828,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): @pytest.mark.skipif(sys.platform == 'win32', reason='Can not test on Windows.') def test_extract_include_exclude_regex_from_file(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('file2', size=1024 * 80) self.create_regular_file('file3', size=1024 * 80) @@ -683,7 +868,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(sorted(os.listdir('output/input')), ['file3']) def test_extract_with_pattern(self): - self.cmd("init", self.repository_location) + self.cmd("init", '--encryption=repokey', self.repository_location) self.create_regular_file("file1", size=1024 * 80) self.create_regular_file("file2", size=1024 * 80) self.create_regular_file("file3", size=1024 * 80) @@ -716,7 +901,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"]) def test_extract_list_output(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file', size=1024 * 80) self.cmd('create', self.repository_location + '::test', 'input') @@ -740,8 +925,17 @@ class ArchiverTestCase(ArchiverTestCaseBase): output = self.cmd('extract', '--list', '--info', self.repository_location + '::test') self.assert_in("input/file", output) + def test_extract_progress(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('file', size=1024 * 80) + self.cmd('create', self.repository_location + '::test', 'input') + + with changedir('output'): + output = self.cmd('extract', self.repository_location + '::test', '--progress') + assert 'Extracting:' in output + def _create_test_caches(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('cache1/%s' % CACHE_TAG_NAME, contents=CACHE_TAG_CONTENTS + b' extra stuff') @@ -750,6 +944,146 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.mkdir('input/cache3') os.link('input/cache1/%s' % CACHE_TAG_NAME, 'input/cache3/%s' % CACHE_TAG_NAME) + def test_create_stdin(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + input_data = b'\x00foo\n\nbar\n \n' + self.cmd('create', self.repository_location + '::test', '-', input=input_data) + item = json.loads(self.cmd('list', '--json-lines', self.repository_location + '::test')) + assert item['uid'] == 0 + assert item['gid'] == 0 + assert item['size'] == len(input_data) + assert item['path'] == 'stdin' + extracted_data = self.cmd('extract', '--stdout', self.repository_location + '::test', binary_output=True) + assert extracted_data == input_data + + def test_create_without_root(self): + """test create without a root""" + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', exit_code=2) + + def test_create_pattern_root(self): + """test create with only a root pattern""" + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + output = self.cmd('create', '-v', '--list', '--pattern=R input', self.repository_location + '::test') + self.assert_in("A input/file1", output) + self.assert_in("A input/file2", output) + + def test_create_pattern(self): + """test file patterns during create""" + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('file_important', size=1024 * 80) + output = self.cmd('create', '-v', '--list', + '--pattern=+input/file_important', '--pattern=-input/file*', + self.repository_location + '::test', 'input') + self.assert_in("A input/file_important", output) + self.assert_in('x input/file1', output) + self.assert_in('x input/file2', output) + + def test_create_pattern_file(self): + """test file patterns during create""" + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('otherfile', size=1024 * 80) + self.create_regular_file('file_important', size=1024 * 80) + output = self.cmd('create', '-v', '--list', + '--pattern=-input/otherfile', '--patterns-from=' + self.patterns_file_path, + self.repository_location + '::test', 'input') + self.assert_in("A input/file_important", output) + self.assert_in('x input/file1', output) + self.assert_in('x input/file2', output) + self.assert_in('x input/otherfile', output) + + def test_create_pattern_exclude_folder_but_recurse(self): + """test when patterns exclude a parent folder, but include a child""" + self.patterns_file_path2 = os.path.join(self.tmpdir, 'patterns2') + with open(self.patterns_file_path2, 'wb') as fd: + fd.write(b'+ input/x/b\n- input/x*\n') + + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('x/a/foo_a', size=1024 * 80) + self.create_regular_file('x/b/foo_b', size=1024 * 80) + self.create_regular_file('y/foo_y', size=1024 * 80) + output = self.cmd('create', '-v', '--list', + '--patterns-from=' + self.patterns_file_path2, + self.repository_location + '::test', 'input') + self.assert_in('x input/x/a/foo_a', output) + self.assert_in("A input/x/b/foo_b", output) + self.assert_in('A input/y/foo_y', output) + + def test_create_pattern_exclude_folder_no_recurse(self): + """test when patterns exclude a parent folder and, but include a child""" + self.patterns_file_path2 = os.path.join(self.tmpdir, 'patterns2') + with open(self.patterns_file_path2, 'wb') as fd: + fd.write(b'+ input/x/b\n! input/x*\n') + + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('x/a/foo_a', size=1024 * 80) + self.create_regular_file('x/b/foo_b', size=1024 * 80) + self.create_regular_file('y/foo_y', size=1024 * 80) + output = self.cmd('create', '-v', '--list', + '--patterns-from=' + self.patterns_file_path2, + self.repository_location + '::test', 'input') + self.assert_not_in('input/x/a/foo_a', output) + self.assert_not_in('input/x/a', output) + self.assert_in('A input/y/foo_y', output) + + def test_create_pattern_intermediate_folders_first(self): + """test that intermediate folders appear first when patterns exclude a parent folder but include a child""" + self.patterns_file_path2 = os.path.join(self.tmpdir, 'patterns2') + with open(self.patterns_file_path2, 'wb') as fd: + fd.write(b'+ input/x/a\n+ input/x/b\n- input/x*\n') + + self.cmd('init', '--encryption=repokey', self.repository_location) + + self.create_regular_file('x/a/foo_a', size=1024 * 80) + self.create_regular_file('x/b/foo_b', size=1024 * 80) + with changedir('input'): + self.cmd('create', '--patterns-from=' + self.patterns_file_path2, + self.repository_location + '::test', '.') + + # list the archive and verify that the "intermediate" folders appear before + # their contents + out = self.cmd('list', '--format', '{type} {path}{NL}', self.repository_location + '::test') + out_list = out.splitlines() + + self.assert_in('d x/a', out_list) + self.assert_in('d x/b', out_list) + + assert out_list.index('d x/a') < out_list.index('- x/a/foo_a') + assert out_list.index('d x/b') < out_list.index('- x/b/foo_b') + + def test_create_no_cache_sync(self): + self.create_test_files() + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('delete', '--cache-only', self.repository_location) + create_json = json.loads(self.cmd('create', '--no-cache-sync', self.repository_location + '::test', 'input', + '--json', '--error')) # ignore experimental warning + info_json = json.loads(self.cmd('info', self.repository_location + '::test', '--json')) + create_stats = create_json['cache']['stats'] + info_stats = info_json['cache']['stats'] + assert create_stats == info_stats + self.cmd('delete', '--cache-only', self.repository_location) + self.cmd('create', '--no-cache-sync', self.repository_location + '::test2', 'input') + self.cmd('info', self.repository_location) + self.cmd('check', self.repository_location) + + def test_extract_pattern_opt(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('file_important', size=1024 * 80) + self.cmd('create', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', + '--pattern=+input/file_important', '--pattern=-input/file*', + self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file_important']) + def _assert_test_caches(self): with changedir('output'): self.cmd('extract', self.repository_location + '::test') @@ -768,16 +1102,16 @@ class ArchiverTestCase(ArchiverTestCaseBase): self._assert_test_caches() def _create_test_tagged(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('tagged1/.NOBACKUP') self.create_regular_file('tagged2/00-NOBACKUP') - self.create_regular_file('tagged3/.NOBACKUP/file2') + self.create_regular_file('tagged3/.NOBACKUP/file2', size=1024) def _assert_test_tagged(self): with changedir('output'): self.cmd('extract', self.repository_location + '::test') - self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged3']) + self.assert_equal(sorted(os.listdir('output/input')), ['file1']) def test_exclude_tagged(self): self._create_test_tagged() @@ -792,17 +1126,17 @@ class ArchiverTestCase(ArchiverTestCaseBase): self._assert_test_tagged() def _create_test_keep_tagged(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file0', size=1024) self.create_regular_file('tagged1/.NOBACKUP1') self.create_regular_file('tagged1/file1', size=1024) - self.create_regular_file('tagged2/.NOBACKUP2') + self.create_regular_file('tagged2/.NOBACKUP2/subfile1', size=1024) self.create_regular_file('tagged2/file2', size=1024) self.create_regular_file('tagged3/%s' % CACHE_TAG_NAME, contents=CACHE_TAG_CONTENTS + b' extra stuff') self.create_regular_file('tagged3/file3', size=1024) self.create_regular_file('taggedall/.NOBACKUP1') - self.create_regular_file('taggedall/.NOBACKUP2') + self.create_regular_file('taggedall/.NOBACKUP2/subfile1', size=1024) self.create_regular_file('taggedall/%s' % CACHE_TAG_NAME, contents=CACHE_TAG_CONTENTS + b' extra stuff') self.create_regular_file('taggedall/file4', size=1024) @@ -817,17 +1151,22 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(sorted(os.listdir('output/input/taggedall')), ['.NOBACKUP1', '.NOBACKUP2', CACHE_TAG_NAME, ]) + def test_exclude_keep_tagged_deprecation(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + output_warn = self.cmd('create', '--exclude-caches', '--keep-tag-files', self.repository_location + '::test', src_dir) + self.assert_in('--keep-tag-files" has been deprecated.', output_warn) + def test_exclude_keep_tagged(self): self._create_test_keep_tagged() self.cmd('create', '--exclude-if-present', '.NOBACKUP1', '--exclude-if-present', '.NOBACKUP2', - '--exclude-caches', '--keep-tag-files', self.repository_location + '::test', 'input') + '--exclude-caches', '--keep-exclude-tags', self.repository_location + '::test', 'input') self._assert_test_keep_tagged() def test_recreate_exclude_keep_tagged(self): self._create_test_keep_tagged() self.cmd('create', self.repository_location + '::test', 'input') self.cmd('recreate', '--exclude-if-present', '.NOBACKUP1', '--exclude-if-present', '.NOBACKUP2', - '--exclude-caches', '--keep-tag-files', self.repository_location + '::test') + '--exclude-caches', '--keep-exclude-tags', self.repository_location + '::test') self._assert_test_keep_tagged() @pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='Linux capabilities test, requires fakeroot >= 1.20.2') @@ -844,15 +1183,47 @@ class ArchiverTestCase(ArchiverTestCaseBase): capabilities = b'\x01\x00\x00\x02\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' self.create_regular_file('file') xattr.setxattr('input/file', 'security.capability', capabilities) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') with changedir('output'): with patch.object(os, 'fchown', patched_fchown): self.cmd('extract', self.repository_location + '::test') assert xattr.getxattr('input/file', 'security.capability') == capabilities + @pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='xattr not supported on this system or on this version of' + 'fakeroot') + def test_extract_xattrs_errors(self): + def patched_setxattr_E2BIG(*args, **kwargs): + raise OSError(errno.E2BIG, 'E2BIG') + + def patched_setxattr_ENOTSUP(*args, **kwargs): + raise OSError(errno.ENOTSUP, 'ENOTSUP') + + def patched_setxattr_EACCES(*args, **kwargs): + raise OSError(errno.EACCES, 'EACCES') + + self.create_regular_file('file') + xattr.setxattr('input/file', 'attribute', 'value') + self.cmd('init', self.repository_location, '-e' 'none') + self.cmd('create', self.repository_location + '::test', 'input') + with changedir('output'): + input_abspath = os.path.abspath('input/file') + with patch.object(xattr, 'setxattr', patched_setxattr_E2BIG): + out = self.cmd('extract', self.repository_location + '::test', exit_code=EXIT_WARNING) + assert out == (input_abspath + ': Value or key of extended attribute attribute is too big for this ' + 'filesystem\n') + os.remove(input_abspath) + with patch.object(xattr, 'setxattr', patched_setxattr_ENOTSUP): + out = self.cmd('extract', self.repository_location + '::test', exit_code=EXIT_WARNING) + assert out == (input_abspath + ': Extended attributes are not supported on this filesystem\n') + os.remove(input_abspath) + with patch.object(xattr, 'setxattr', patched_setxattr_EACCES): + out = self.cmd('extract', self.repository_location + '::test', exit_code=EXIT_WARNING) + assert out == (input_abspath + ': Permission denied when setting extended attribute attribute\n') + assert os.path.isfile(input_abspath) + def test_path_normalization(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80) with changedir('input/dir1/dir2'): self.cmd('create', self.repository_location + '::test', '../../../input/dir1/../dir1/dir2/..') @@ -861,7 +1232,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in(' input/dir1/dir2/file', output) def test_exclude_normalization(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('file2', size=1024 * 80) with changedir('input'): @@ -881,13 +1252,13 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_repeated_files(self): self.create_regular_file('file1', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input', 'input') def test_overwrite(self): self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('dir2/file2', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') # Overwriting regular files and directories should be supported os.mkdir('output/input') @@ -906,7 +1277,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_rename(self): self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('dir2/file2', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') self.cmd('create', self.repository_location + '::test.2', 'input') self.cmd('extract', '--dry-run', self.repository_location + '::test') @@ -918,14 +1289,55 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('extract', '--dry-run', self.repository_location + '::test.4') # Make sure both archives have been renamed with Repository(self.repository_path) as repository: - manifest, key = Manifest.load(repository) + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) self.assert_equal(len(manifest.archives), 2) self.assert_in('test.3', manifest.archives) self.assert_in('test.4', manifest.archives) + def test_info(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + info_repo = self.cmd('info', self.repository_location) + assert 'All archives:' in info_repo + info_archive = self.cmd('info', self.repository_location + '::test') + assert 'Archive name: test\n' in info_archive + info_archive = self.cmd('info', '--first', '1', self.repository_location) + assert 'Archive name: test\n' in info_archive + + def test_info_json(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + info_repo = json.loads(self.cmd('info', '--json', self.repository_location)) + repository = info_repo['repository'] + assert len(repository['id']) == 64 + assert 'last_modified' in repository + assert datetime.strptime(repository['last_modified'], ISO_FORMAT) # must not raise + assert info_repo['encryption']['mode'] == 'repokey' + assert 'keyfile' not in info_repo['encryption'] + cache = info_repo['cache'] + stats = cache['stats'] + assert all(isinstance(o, int) for o in stats.values()) + assert all(key in stats for key in ('total_chunks', 'total_csize', 'total_size', 'total_unique_chunks', 'unique_csize', 'unique_size')) + + info_archive = json.loads(self.cmd('info', '--json', self.repository_location + '::test')) + assert info_repo['repository'] == info_archive['repository'] + assert info_repo['cache'] == info_archive['cache'] + archives = info_archive['archives'] + assert len(archives) == 1 + archive = archives[0] + assert archive['name'] == 'test' + assert isinstance(archive['command_line'], list) + assert isinstance(archive['duration'], float) + assert len(archive['id']) == 64 + assert 'stats' in archive + assert datetime.strptime(archive['start'], ISO_FORMAT) + assert datetime.strptime(archive['end'], ISO_FORMAT) + def test_comment(self): self.create_regular_file('file1', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test1', 'input') self.cmd('create', '--comment', 'this is the comment', self.repository_location + '::test2', 'input') self.cmd('create', '--comment', '"deleted" comment', self.repository_location + '::test3', 'input') @@ -945,11 +1357,16 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_delete(self): self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('dir2/file2', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') self.cmd('create', self.repository_location + '::test.2', 'input') + self.cmd('create', self.repository_location + '::test.3', 'input') + self.cmd('create', self.repository_location + '::another_test.1', 'input') + self.cmd('create', self.repository_location + '::another_test.2', 'input') self.cmd('extract', '--dry-run', self.repository_location + '::test') self.cmd('extract', '--dry-run', self.repository_location + '::test.2') + self.cmd('delete', '--prefix', 'another_', self.repository_location) + self.cmd('delete', '--last', '1', self.repository_location) self.cmd('delete', self.repository_location + '::test') self.cmd('extract', '--dry-run', self.repository_location + '::test.2') output = self.cmd('delete', '--stats', self.repository_location + '::test.2') @@ -958,10 +1375,21 @@ class ArchiverTestCase(ArchiverTestCaseBase): with Repository(self.repository_path) as repository: self.assert_equal(len(repository), 1) + def test_delete_multiple(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test1', 'input') + self.cmd('create', self.repository_location + '::test2', 'input') + self.cmd('create', self.repository_location + '::test3', 'input') + self.cmd('delete', self.repository_location + '::test1', 'test2') + self.cmd('extract', '--dry-run', self.repository_location + '::test3') + self.cmd('delete', self.repository_location, 'test3') + assert not self.cmd('list', self.repository_location) + def test_delete_repo(self): self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('dir2/file2', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') self.cmd('create', self.repository_location + '::test.2', 'input') os.environ['BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'] = 'no' @@ -972,8 +1400,40 @@ class ArchiverTestCase(ArchiverTestCaseBase): # Make sure the repo is gone self.assertFalse(os.path.exists(self.repository_path)) + def test_delete_force(self): + self.cmd('init', '--encryption=none', self.repository_location) + self.create_src_archive('test') + with Repository(self.repository_path, exclusive=True) as repository: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + archive = Archive(repository, key, manifest, 'test') + for item in archive.iter_items(): + if 'chunks' in item: + first_chunk_id = item.chunks[0].id + repository.delete(first_chunk_id) + repository.commit() + break + output = self.cmd('delete', '--force', self.repository_location + '::test') + self.assert_in('deleted archive was corrupted', output) + self.cmd('check', '--repair', self.repository_location) + output = self.cmd('list', self.repository_location) + self.assert_not_in('test', output) + + def test_delete_double_force(self): + self.cmd('init', '--encryption=none', self.repository_location) + self.create_src_archive('test') + with Repository(self.repository_path, exclusive=True) as repository: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + archive = Archive(repository, key, manifest, 'test') + id = archive.metadata.items[0] + repository.put(id, b'corrupted items metadata stream chunk') + repository.commit() + self.cmd('delete', '--force', '--force', self.repository_location + '::test') + self.cmd('check', '--repair', self.repository_location) + output = self.cmd('list', self.repository_location) + self.assert_not_in('test', output) + def test_corrupted_repository(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_src_archive('test') self.cmd('extract', '--dry-run', self.repository_location + '::test') output = self.cmd('check', '--show-version', self.repository_location) @@ -990,7 +1450,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): # we currently need to be able to create a lock directory inside the repo: @pytest.mark.xfail(reason="we need to be able to create the lock directory inside the repo") def test_readonly_repository(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_src_archive('test') os.system('chmod -R ugo-w ' + self.repository_path) try: @@ -999,29 +1459,139 @@ class ArchiverTestCase(ArchiverTestCaseBase): # Restore permissions so shutil.rmtree is able to delete it os.system('chmod -R u+w ' + self.repository_path) - @pytest.mark.skipif(sys.platform == 'win32', reason='Meaningless and fails on windows.') + @pytest.mark.skipif('BORG_TESTS_IGNORE_MODES' in os.environ, reason='modes unreliable') def test_umask(self): self.create_regular_file('file1', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') mode = os.stat(self.repository_path).st_mode self.assertEqual(stat.S_IMODE(mode), 0o700) def test_create_dry_run(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', '--dry-run', self.repository_location + '::test', 'input') # Make sure no archive has been created with Repository(self.repository_path) as repository: - manifest, key = Manifest.load(repository) + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) self.assert_equal(len(manifest.archives), 0) - def test_progress(self): + def add_unknown_feature(self, operation): + with Repository(self.repository_path, exclusive=True) as repository: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + manifest.config[b'feature_flags'] = {operation.value.encode(): {b'mandatory': [b'unknown-feature']}} + manifest.write() + repository.commit() + + def cmd_raises_unknown_feature(self, args): + if self.FORK_DEFAULT: + self.cmd(*args, exit_code=EXIT_ERROR) + else: + with pytest.raises(MandatoryFeatureUnsupported) as excinfo: + self.cmd(*args) + assert excinfo.value.args == (['unknown-feature'],) + + def test_unknown_feature_on_create(self): + print(self.cmd('init', '--encryption=repokey', self.repository_location)) + self.add_unknown_feature(Manifest.Operation.WRITE) + self.cmd_raises_unknown_feature(['create', self.repository_location + '::test', 'input']) + + def test_unknown_feature_on_cache_sync(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('delete', '--cache-only', self.repository_location) + self.add_unknown_feature(Manifest.Operation.READ) + self.cmd_raises_unknown_feature(['create', self.repository_location + '::test', 'input']) + + def test_unknown_feature_on_change_passphrase(self): + print(self.cmd('init', '--encryption=repokey', self.repository_location)) + self.add_unknown_feature(Manifest.Operation.CHECK) + self.cmd_raises_unknown_feature(['change-passphrase', self.repository_location]) + + def test_unknown_feature_on_read(self): + print(self.cmd('init', '--encryption=repokey', self.repository_location)) + self.cmd('create', self.repository_location + '::test', 'input') + self.add_unknown_feature(Manifest.Operation.READ) + with changedir('output'): + self.cmd_raises_unknown_feature(['extract', self.repository_location + '::test']) + + self.cmd_raises_unknown_feature(['list', self.repository_location]) + self.cmd_raises_unknown_feature(['info', self.repository_location + '::test']) + + def test_unknown_feature_on_rename(self): + print(self.cmd('init', '--encryption=repokey', self.repository_location)) + self.cmd('create', self.repository_location + '::test', 'input') + self.add_unknown_feature(Manifest.Operation.CHECK) + self.cmd_raises_unknown_feature(['rename', self.repository_location + '::test', 'other']) + + def test_unknown_feature_on_delete(self): + print(self.cmd('init', '--encryption=repokey', self.repository_location)) + self.cmd('create', self.repository_location + '::test', 'input') + self.add_unknown_feature(Manifest.Operation.DELETE) + # delete of an archive raises + self.cmd_raises_unknown_feature(['delete', self.repository_location + '::test']) + self.cmd_raises_unknown_feature(['prune', '--keep-daily=3', self.repository_location]) + # delete of the whole repository ignores features + self.cmd('delete', self.repository_location) + + @unittest.skipUnless(has_llfuse, 'llfuse not installed') + def test_unknown_feature_on_mount(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + self.add_unknown_feature(Manifest.Operation.READ) + mountpoint = os.path.join(self.tmpdir, 'mountpoint') + os.mkdir(mountpoint) + # XXX this might hang if it doesn't raise an error + self.cmd_raises_unknown_feature(['mount', self.repository_location + '::test', mountpoint]) + + @pytest.mark.allow_cache_wipe + def test_unknown_mandatory_feature_in_cache(self): + if self.prefix: + path_prefix = 'ssh://__testsuite__' + else: + path_prefix = '' + + print(self.cmd('init', '--encryption=repokey', self.repository_location)) + + with Repository(self.repository_path, exclusive=True) as repository: + if path_prefix: + repository._location = Location(self.repository_location) + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + with Cache(repository, key, manifest) as cache: + cache.begin_txn() + cache.cache_config.mandatory_features = set(['unknown-feature']) + cache.commit() + + if self.FORK_DEFAULT: + self.cmd('create', self.repository_location + '::test', 'input') + else: + called = False + wipe_cache_safe = LocalCache.wipe_cache + + def wipe_wrapper(*args): + nonlocal called + called = True + wipe_cache_safe(*args) + + with patch.object(LocalCache, 'wipe_cache', wipe_wrapper): + self.cmd('create', self.repository_location + '::test', 'input') + + assert called + + with Repository(self.repository_path, exclusive=True) as repository: + if path_prefix: + repository._location = Location(self.repository_location) + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + with Cache(repository, key, manifest) as cache: + assert cache.cache_config.mandatory_features == set([]) + + def test_progress_on(self): self.create_regular_file('file1', size=1024 * 80) - self.cmd('init', self.repository_location) - # progress forced on + self.cmd('init', '--encryption=repokey', self.repository_location) output = self.cmd('create', '--progress', self.repository_location + '::test4', 'input') self.assert_in("\r", output) - # progress forced off + + def test_progress_off(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) output = self.cmd('create', self.repository_location + '::test5', 'input') self.assert_not_in("\r", output) @@ -1029,11 +1599,10 @@ class ArchiverTestCase(ArchiverTestCaseBase): """test that various file status show expected results clearly incomplete: only tests for the weird "unchanged" status for now""" - now = time.time() self.create_regular_file('file1', size=1024 * 80) - os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago + time.sleep(1) # file2 must have newer timestamps than file1 self.create_regular_file('file2', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) output = self.cmd('create', '--list', self.repository_location + '::test', 'input') if sys.platform == 'win32': output = output.replace('\\', '/') @@ -1048,39 +1617,90 @@ class ArchiverTestCase(ArchiverTestCaseBase): # https://borgbackup.readthedocs.org/en/latest/faq.html#i-am-seeing-a-added-status-for-a-unchanged-file self.assert_in("A input/file2", output) + def test_file_status_cs_cache_mode(self): + """test that a changed file with faked "previous" mtime still gets backed up in ctime,size cache_mode""" + self.create_regular_file('file1', contents=b'123') + time.sleep(1) # file2 must have newer timestamps than file1 + self.create_regular_file('file2', size=10) + self.cmd('init', '--encryption=repokey', self.repository_location) + output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test1', 'input') + # modify file1, but cheat with the mtime (and atime) and also keep same size: + st = os.stat('input/file1') + self.create_regular_file('file1', contents=b'321') + os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns)) + # this mode uses ctime for change detection, so it should find file1 as modified + output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input') + self.assert_in("A input/file1", output) + + def test_file_status_ms_cache_mode(self): + """test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode""" + self.create_regular_file('file1', size=10) + time.sleep(1) # file2 must have newer timestamps than file1 + self.create_regular_file('file2', size=10) + self.cmd('init', '--encryption=repokey', self.repository_location) + output = self.cmd('create', '--list', '--files-cache=mtime,size', self.repository_location + '::test1', 'input') + # change mode of file1, no content change: + st = os.stat('input/file1') + os.chmod('input/file1', st.st_mode ^ stat.S_IRWXO) # this triggers a ctime change, but mtime is unchanged + # this mode uses mtime for change detection, so it should find file1 as unmodified + output = self.cmd('create', '--list', '--files-cache=mtime,size', self.repository_location + '::test2', 'input') + self.assert_in("U input/file1", output) + + def test_file_status_rc_cache_mode(self): + """test that files get rechunked unconditionally in rechunk,ctime cache mode""" + self.create_regular_file('file1', size=10) + time.sleep(1) # file2 must have newer timestamps than file1 + self.create_regular_file('file2', size=10) + self.cmd('init', '--encryption=repokey', self.repository_location) + output = self.cmd('create', '--list', '--files-cache=rechunk,ctime', self.repository_location + '::test1', 'input') + # no changes here, but this mode rechunks unconditionally + output = self.cmd('create', '--list', '--files-cache=rechunk,ctime', self.repository_location + '::test2', 'input') + self.assert_in("A input/file1", output) + def test_file_status_excluded(self): """test that excluded paths are listed""" - now = time.time() self.create_regular_file('file1', size=1024 * 80) - os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago + time.sleep(1) # file2 must have newer timestamps than file1 self.create_regular_file('file2', size=1024 * 80) if has_lchflags: self.create_regular_file('file3', size=1024 * 80) platform.set_flags(os.path.join(self.input_path, 'file3'), stat.UF_NODUMP) - self.cmd('init', self.repository_location) - output = self.cmd('create', '--list', self.repository_location + '::test', 'input') - if sys.platform == 'win32': - output = output.replace('\\', '/') + self.cmd('init', '--encryption=repokey', self.repository_location) + output = self.cmd('create', '--list', '--exclude-nodump', self.repository_location + '::test', 'input') self.assert_in("A input/file1", output) self.assert_in("A input/file2", output) if has_lchflags: self.assert_in("x input/file3", output) # should find second file as excluded - output = self.cmd('create', '--list', self.repository_location + '::test1', 'input', '--exclude', '*/file2') - if sys.platform == 'win32': - output = output.replace('\\', '/') + output = self.cmd('create', '--list', '--exclude-nodump', self.repository_location + '::test1', 'input', '--exclude', '*/file2') self.assert_in("U input/file1", output) self.assert_in("x input/file2", output) if has_lchflags: self.assert_in("x input/file3", output) - def test_create_topical(self): - now = time.time() + def test_create_json(self): self.create_regular_file('file1', size=1024 * 80) - os.utime('input/file1', (now-5, now-5)) + self.cmd('init', '--encryption=repokey', self.repository_location) + create_info = json.loads(self.cmd('create', '--json', self.repository_location + '::test', 'input')) + # The usual keys + assert 'encryption' in create_info + assert 'repository' in create_info + assert 'cache' in create_info + assert 'last_modified' in create_info['repository'] + + archive = create_info['archive'] + assert archive['name'] == 'test' + assert isinstance(archive['command_line'], list) + assert isinstance(archive['duration'], float) + assert len(archive['id']) == 64 + assert 'stats' in archive + + def test_create_topical(self): + self.create_regular_file('file1', size=1024 * 80) + time.sleep(1) # file2 must have newer timestamps than file1 self.create_regular_file('file2', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) # no listing by default output = self.cmd('create', self.repository_location + '::test', 'input') self.assert_not_in('file1', output) @@ -1099,15 +1719,23 @@ class ArchiverTestCase(ArchiverTestCaseBase): output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input') self.assert_in('file1', output) + def test_create_read_special_broken_symlink(self): + os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link')) + self.cmd('init', '--encryption=repokey', self.repository_location) + archive = self.repository_location + '::test' + self.cmd('create', '--read-special', archive, 'input') + output = self.cmd('list', archive) + assert 'input/link -> somewhere doesnt exist' in output + # def test_cmdline_compatibility(self): # self.create_regular_file('file1', size=1024 * 80) - # self.cmd('init', self.repository_location) + # self.cmd('init', '--encryption=repokey', self.repository_location) # self.cmd('create', self.repository_location + '::test', 'input') # output = self.cmd('foo', self.repository_location, '--old') # self.assert_in('"--old" has been deprecated. Use "--new" instead', output) def test_prune_repository(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test1', src_dir) self.cmd('create', self.repository_location + '::test2', src_dir) # these are not really a checkpoints, but they look like some: @@ -1146,7 +1774,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('test5', output) def test_prune_repository_save_space(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test1', src_dir) self.cmd('create', self.repository_location + '::test2', src_dir) output = self.cmd('prune', '--list', '--stats', '--dry-run', self.repository_location, '--keep-daily=2') @@ -1162,7 +1790,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('test2', output) def test_prune_repository_prefix(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir) self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir) self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir) @@ -1182,8 +1810,29 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('bar-2015-08-12-10:00', output) self.assert_in('bar-2015-08-12-20:00', output) + def test_prune_repository_glob(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::2015-08-12-10:00-foo', src_dir) + self.cmd('create', self.repository_location + '::2015-08-12-20:00-foo', src_dir) + self.cmd('create', self.repository_location + '::2015-08-12-10:00-bar', src_dir) + self.cmd('create', self.repository_location + '::2015-08-12-20:00-bar', src_dir) + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo') + self.assert_in('Keeping archive: 2015-08-12-20:00-foo', output) + self.assert_in('Would prune: 2015-08-12-10:00-foo', output) + output = self.cmd('list', self.repository_location) + self.assert_in('2015-08-12-10:00-foo', output) + self.assert_in('2015-08-12-20:00-foo', output) + self.assert_in('2015-08-12-10:00-bar', output) + self.assert_in('2015-08-12-20:00-bar', output) + self.cmd('prune', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo') + output = self.cmd('list', self.repository_location) + self.assert_not_in('2015-08-12-10:00-foo', output) + self.assert_in('2015-08-12-20:00-foo', output) + self.assert_in('2015-08-12-10:00-bar', output) + self.assert_in('2015-08-12-20:00-bar', output) + def test_list_prefix(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test-1', src_dir) self.cmd('create', self.repository_location + '::something-else-than-test-1', src_dir) self.cmd('create', self.repository_location + '::test-2', src_dir) @@ -1193,25 +1842,25 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_not_in('something-else', output) def test_list_format(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) test_archive = self.repository_location + '::test' self.cmd('create', test_archive, src_dir) - self.cmd('list', '--list-format', '-', test_archive, exit_code=1) - self.archiver.exit_code = 0 # reset exit code for following tests + output_warn = self.cmd('list', '--list-format', '-', test_archive) + self.assert_in('--list-format" has been deprecated.', output_warn) output_1 = self.cmd('list', test_archive) output_2 = '' if sys.platform == 'win32': - output_2 = self.cmd('list', '--format', '{user:15} {size:8} {isomtime} {path}{extra}{NL}', test_archive) + output_2 = self.cmd('list', '--format', '{user:15} {size:8d} {mtime} {path}{extra}{NEWLINE}', test_archive) else: - output_2 = self.cmd('list', '--format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}', test_archive) + output_2 = self.cmd('list', '--format', '{mode} {user:6} {group:6} {size:8d} {mtime} {path}{extra}{NEWLINE}', test_archive) output_3 = self.cmd('list', '--format', '{mtime:%s} {path}{NL}', test_archive) self.assertEqual(output_1, output_2) self.assertNotEqual(output_1, output_3) def test_list_repository_format(self): - self.cmd('init', self.repository_location) - self.cmd('create', self.repository_location + '::test-1', src_dir) - self.cmd('create', self.repository_location + '::test-2', src_dir) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', '--comment', 'comment 1', self.repository_location + '::test-1', src_dir) + self.cmd('create', '--comment', 'comment 2', self.repository_location + '::test-2', src_dir) output_1 = self.cmd('list', self.repository_location) output_2 = self.cmd('list', '--format', '{archive:<36} {time} [{id}]{NL}', self.repository_location) self.assertEqual(output_1, output_2) @@ -1219,11 +1868,14 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assertEqual(output_1, 'test-1\ntest-2\n') output_1 = self.cmd('list', '--format', '{barchive}/', self.repository_location) self.assertEqual(output_1, 'test-1/test-2/') + output_3 = self.cmd('list', '--format', '{name} {comment}{NL}', self.repository_location) + self.assert_in('test-1 comment 1\n', output_3) + self.assert_in('test-2 comment 2\n', output_3) def test_list_hash(self): self.create_regular_file('empty_file', size=0) self.create_regular_file('amb', contents=b'a' * 1000000) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) test_archive = self.repository_location + '::test' self.cmd('create', test_archive, 'input') output = self.cmd('list', '--format', '{sha256} {path}{NL}', test_archive) @@ -1236,7 +1888,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): with open(os.path.join(self.input_path, 'two_chunks'), 'wb') as fd: fd.write(b'abba' * 2000000) fd.write(b'baab' * 2000000) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) test_archive = self.repository_location + '::test' self.cmd('create', test_archive, 'input') output = self.cmd('list', '--format', '{num_chunks} {unique_chunks} {path}{NL}', test_archive) @@ -1245,12 +1897,86 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_list_size(self): self.create_regular_file('compressible_file', size=10000) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) test_archive = self.repository_location + '::test' self.cmd('create', '-C', 'lz4', test_archive, 'input') - output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive) - size, csize, path = output.split("\n")[1].split(" ") + output = self.cmd('list', '--format', '{size} {csize} {dsize} {dcsize} {path}{NL}', test_archive) + size, csize, dsize, dcsize, path = output.split("\n")[1].split(" ") assert int(csize) < int(size) + assert int(dcsize) < int(dsize) + assert int(dsize) <= int(size) + assert int(dcsize) <= int(csize) + + def test_list_json(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + list_repo = json.loads(self.cmd('list', '--json', self.repository_location)) + repository = list_repo['repository'] + assert len(repository['id']) == 64 + assert datetime.strptime(repository['last_modified'], ISO_FORMAT) # must not raise + assert list_repo['encryption']['mode'] == 'repokey' + assert 'keyfile' not in list_repo['encryption'] + archive0 = list_repo['archives'][0] + assert datetime.strptime(archive0['time'], ISO_FORMAT) # must not raise + + list_archive = self.cmd('list', '--json-lines', self.repository_location + '::test') + items = [json.loads(s) for s in list_archive.splitlines()] + assert len(items) == 2 + file1 = items[1] + assert file1['path'] == 'input/file1' + assert file1['size'] == 81920 + assert datetime.strptime(file1['mtime'], ISO_FORMAT) # must not raise + + list_archive = self.cmd('list', '--json-lines', '--format={sha256}', self.repository_location + '::test') + items = [json.loads(s) for s in list_archive.splitlines()] + assert len(items) == 2 + file1 = items[1] + assert file1['path'] == 'input/file1' + assert file1['sha256'] == 'b2915eb69f260d8d3c25249195f2c8f4f716ea82ec760ae929732c0262442b2b' + + def test_list_json_args(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('list', '--json-lines', self.repository_location, exit_code=2) + self.cmd('list', '--json', self.repository_location + '::archive', exit_code=2) + + def test_log_json(self): + self.create_test_files() + self.cmd('init', '--encryption=repokey', self.repository_location) + log = self.cmd('create', '--log-json', self.repository_location + '::test', 'input', '--list', '--debug') + messages = {} # type -> message, one of each kind + for line in log.splitlines(): + msg = json.loads(line) + messages[msg['type']] = msg + + file_status = messages['file_status'] + assert 'status' in file_status + assert file_status['path'].startswith('input') + + log_message = messages['log_message'] + assert isinstance(log_message['time'], float) + assert log_message['levelname'] == 'DEBUG' # there should only be DEBUG messages + assert isinstance(log_message['message'], str) + + def test_debug_profile(self): + self.create_test_files() + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input', '--debug-profile=create.prof') + self.cmd('debug', 'convert-profile', 'create.prof', 'create.pyprof') + stats = pstats.Stats('create.pyprof') + stats.strip_dirs() + stats.sort_stats('cumtime') + + self.cmd('create', self.repository_location + '::test2', 'input', '--debug-profile=create.pyprof') + stats = pstats.Stats('create.pyprof') # Only do this on trusted data! + stats.strip_dirs() + stats.sort_stats('cumtime') + + def test_common_options(self): + self.create_test_files() + self.cmd('init', '--encryption=repokey', self.repository_location) + log = self.cmd('--debug', 'create', self.repository_location + '::test', 'input') + assert 'security: read previous location' in log def _get_sizes(self, compression, compressible, size=10000): if compressible: @@ -1310,17 +2036,21 @@ class ArchiverTestCase(ArchiverTestCaseBase): size, csize = self._get_sizes('lzma', compressible=False) assert csize >= size + def test_change_passphrase(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + os.environ['BORG_NEW_PASSPHRASE'] = 'newpassphrase' + # here we have both BORG_PASSPHRASE and BORG_NEW_PASSPHRASE set: + self.cmd('change-passphrase', self.repository_location) + os.environ['BORG_PASSPHRASE'] = 'newpassphrase' + self.cmd('list', self.repository_location) + def test_break_lock(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('break-lock', self.repository_location) def test_usage(self): - if self.FORK_DEFAULT: - self.cmd(exit_code=0) - self.cmd('-h', exit_code=0) - else: - self.assert_raises(SystemExit, lambda: self.cmd()) - self.assert_raises(SystemExit, lambda: self.cmd('-h')) + self.cmd() + self.cmd('-h') def test_help(self): assert 'Borg' in self.cmd('help') @@ -1331,21 +2061,38 @@ class ArchiverTestCase(ArchiverTestCaseBase): @unittest.skipUnless(has_llfuse and sys.platform != 'win32', 'llfuse not installed') def test_fuse(self): - self.cmd('init', self.repository_location) + def has_noatime(some_file): + atime_before = os.stat(some_file).st_atime_ns + try: + os.close(os.open(some_file, flags_noatime)) + except PermissionError: + return False + else: + atime_after = os.stat(some_file).st_atime_ns + noatime_used = flags_noatime != flags_normal + return noatime_used and atime_before == atime_after + + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_test_files() - self.cmd('create', self.repository_location + '::archive', 'input') - self.cmd('create', self.repository_location + '::archive2', 'input') + have_noatime = has_noatime('input/file1') + self.cmd('create', '--exclude-nodump', self.repository_location + '::archive', 'input') + self.cmd('create', '--exclude-nodump', self.repository_location + '::archive2', 'input') if has_lchflags: # remove the file we did not backup, so input and output become equal os.remove(os.path.join('input', 'flagfile')) mountpoint = os.path.join(self.tmpdir, 'mountpoint') # mount the whole repository, archive contents shall show up in archivename subdirs of mountpoint: with self.fuse_mount(self.repository_location, mountpoint): - self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input')) - self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input')) + # bsdflags are not supported by the FUSE mount + # we also ignore xattrs here, they are tested separately + self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'), + ignore_bsdflags=True, ignore_xattrs=True) + self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'), + ignore_bsdflags=True, ignore_xattrs=True) # mount only 1 archive, its contents shall show up directly in mountpoint: with self.fuse_mount(self.repository_location + '::archive', mountpoint): - self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input')) + self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'), + ignore_bsdflags=True, ignore_xattrs=True) # regular file in_fn = 'input/file1' out_fn = os.path.join(mountpoint, 'input', 'file1') @@ -1356,7 +2103,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): assert sti1.st_uid == sto1.st_uid assert sti1.st_gid == sto1.st_gid assert sti1.st_size == sto1.st_size - assert sti1.st_atime == sto1.st_atime + if have_noatime: + assert sti1.st_atime == sto1.st_atime assert sti1.st_ctime == sto1.st_ctime assert sti1.st_mtime == sto1.st_mtime # note: there is another hardlink to this, see below @@ -1364,41 +2112,79 @@ class ArchiverTestCase(ArchiverTestCaseBase): # read with open(in_fn, 'rb') as in_f, open(out_fn, 'rb') as out_f: assert in_f.read() == out_f.read() - # list/read xattrs - if xattr.is_enabled(self.input_path): - assert xattr.listxattr(out_fn) == ['user.foo', ] - assert xattr.getxattr(out_fn, 'user.foo') == b'bar' - else: - assert xattr.listxattr(out_fn) == [] - try: - xattr.getxattr(out_fn, 'user.foo') - except OSError as e: - assert e.errno == llfuse.ENOATTR - else: - assert False, "expected OSError(ENOATTR), but no error was raised" # hardlink (to 'input/file1') - in_fn = 'input/hardlink' - out_fn = os.path.join(mountpoint, 'input', 'hardlink') - sti2 = os.stat(in_fn) - sto2 = os.stat(out_fn) - assert sti2.st_nlink == sto2.st_nlink == 2 - assert sto1.st_ino == sto2.st_ino + if are_hardlinks_supported(): + in_fn = 'input/hardlink' + out_fn = os.path.join(mountpoint, 'input', 'hardlink') + sti2 = os.stat(in_fn) + sto2 = os.stat(out_fn) + assert sti2.st_nlink == sto2.st_nlink == 2 + assert sto1.st_ino == sto2.st_ino # symlink - in_fn = 'input/link1' - out_fn = os.path.join(mountpoint, 'input', 'link1') - sti = os.stat(in_fn, follow_symlinks=False) - sto = os.stat(out_fn, follow_symlinks=False) - assert stat.S_ISLNK(sti.st_mode) - assert stat.S_ISLNK(sto.st_mode) - assert os.readlink(in_fn) == os.readlink(out_fn) + if are_symlinks_supported(): + in_fn = 'input/link1' + out_fn = os.path.join(mountpoint, 'input', 'link1') + sti = os.stat(in_fn, follow_symlinks=False) + sto = os.stat(out_fn, follow_symlinks=False) + assert sti.st_size == len('somewhere') + assert sto.st_size == len('somewhere') + assert stat.S_ISLNK(sti.st_mode) + assert stat.S_ISLNK(sto.st_mode) + assert os.readlink(in_fn) == os.readlink(out_fn) # FIFO - out_fn = os.path.join(mountpoint, 'input', 'fifo1') - sto = os.stat(out_fn) - assert stat.S_ISFIFO(sto.st_mode) + if are_fifos_supported(): + out_fn = os.path.join(mountpoint, 'input', 'fifo1') + sto = os.stat(out_fn) + assert stat.S_ISFIFO(sto.st_mode) + # list/read xattrs + try: + in_fn = 'input/fusexattr' + out_fn = os.path.join(mountpoint, 'input', 'fusexattr') + if not xattr.XATTR_FAKEROOT and xattr.is_enabled(self.input_path): + assert sorted(no_selinux(xattr.listxattr(out_fn))) == ['user.empty', 'user.foo', ] + assert xattr.getxattr(out_fn, 'user.foo') == b'bar' + # Special case: getxattr returns None (not b'') when reading an empty xattr. + assert xattr.getxattr(out_fn, 'user.empty') is None + else: + assert xattr.listxattr(out_fn) == [] + try: + xattr.getxattr(out_fn, 'user.foo') + except OSError as e: + assert e.errno == llfuse.ENOATTR + else: + assert False, "expected OSError(ENOATTR), but no error was raised" + except OSError as err: + if sys.platform.startswith(('freebsd', )) and err.errno == errno.ENOTSUP: + # some systems have no xattr support on FUSE + pass + else: + raise + + @unittest.skipUnless(has_llfuse, 'llfuse not installed') + def test_fuse_versions_view(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('test', contents=b'first') + if are_hardlinks_supported(): + self.create_regular_file('hardlink1', contents=b'') + os.link('input/hardlink1', 'input/hardlink2') + self.cmd('create', self.repository_location + '::archive1', 'input') + self.create_regular_file('test', contents=b'second') + self.cmd('create', self.repository_location + '::archive2', 'input') + mountpoint = os.path.join(self.tmpdir, 'mountpoint') + # mount the whole repository, archive contents shall show up in versioned view: + with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions'): + path = os.path.join(mountpoint, 'input', 'test') # filename shows up as directory ... + files = os.listdir(path) + assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there + assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files} + if are_hardlinks_supported(): + st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')) + st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')) + assert st1.st_ino == st2.st_ino @unittest.skipUnless(has_llfuse and sys.platform != 'win32', 'llfuse not installed') def test_fuse_allow_damaged_files(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_src_archive('archive') # Get rid of a chunk and repair it archive, repository = self.open_archive('archive') @@ -1418,21 +2204,43 @@ class ArchiverTestCase(ArchiverTestCaseBase): with pytest.raises(OSError) as excinfo: open(os.path.join(mountpoint, path)) assert excinfo.value.errno == errno.EIO - with self.fuse_mount(self.repository_location + '::archive', mountpoint, 'allow_damaged_files'): + with self.fuse_mount(self.repository_location + '::archive', mountpoint, '-o', 'allow_damaged_files'): open(os.path.join(mountpoint, path)).close() + @unittest.skipUnless(has_llfuse, 'llfuse not installed') + def test_fuse_mount_options(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_src_archive('arch11') + self.create_src_archive('arch12') + self.create_src_archive('arch21') + self.create_src_archive('arch22') + + mountpoint = os.path.join(self.tmpdir, 'mountpoint') + with self.fuse_mount(self.repository_location, mountpoint, '--first=2', '--sort=name'): + assert sorted(os.listdir(os.path.join(mountpoint))) == ['arch11', 'arch12'] + with self.fuse_mount(self.repository_location, mountpoint, '--last=2', '--sort=name'): + assert sorted(os.listdir(os.path.join(mountpoint))) == ['arch21', 'arch22'] + with self.fuse_mount(self.repository_location, mountpoint, '--prefix=arch1'): + assert sorted(os.listdir(os.path.join(mountpoint))) == ['arch11', 'arch12'] + with self.fuse_mount(self.repository_location, mountpoint, '--prefix=arch2'): + assert sorted(os.listdir(os.path.join(mountpoint))) == ['arch21', 'arch22'] + with self.fuse_mount(self.repository_location, mountpoint, '--prefix=arch'): + assert sorted(os.listdir(os.path.join(mountpoint))) == ['arch11', 'arch12', 'arch21', 'arch22'] + with self.fuse_mount(self.repository_location, mountpoint, '--prefix=nope'): + assert sorted(os.listdir(os.path.join(mountpoint))) == [] + def verify_aes_counter_uniqueness(self, method): seen = set() # Chunks already seen used = set() # counter values already used def verify_uniqueness(): with Repository(self.repository_path) as repository: - for key, _ in repository.open_index(repository.get_transaction_id()).iteritems(): - data = repository.get(key) + for id, _ in repository.open_index(repository.get_transaction_id()).iteritems(): + data = repository.get(id) hash = sha256(data).digest() if hash not in seen: seen.add(hash) - num_blocks = num_aes_blocks(len(data) - 41) + num_blocks = num_cipher_blocks(len(data) - 41) nonce = bytes_to_long(data[33:41]) for counter in range(nonce, nonce + num_blocks): self.assert_not_in(counter, used) @@ -1448,7 +2256,6 @@ class ArchiverTestCase(ArchiverTestCaseBase): verify_uniqueness() self.cmd('delete', self.repository_location + '::test.2') verify_uniqueness() - self.assert_equal(used, set(range(len(used)))) def test_aes_counter_uniqueness_keyfile(self): self.verify_aes_counter_uniqueness('keyfile') @@ -1458,31 +2265,41 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_debug_dump_archive_items(self): self.create_test_files() - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') with changedir('output'): - output = self.cmd('debug-dump-archive-items', self.repository_location + '::test') + output = self.cmd('debug', 'dump-archive-items', self.repository_location + '::test') + output_dir = sorted(os.listdir('output')) + assert len(output_dir) > 0 and output_dir[0].startswith('000000_') + assert 'Done.' in output + + def test_debug_dump_repo_objs(self): + self.create_test_files() + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + with changedir('output'): + output = self.cmd('debug', 'dump-repo-objs', self.repository_location) output_dir = sorted(os.listdir('output')) assert len(output_dir) > 0 and output_dir[0].startswith('000000_') assert 'Done.' in output def test_debug_put_get_delete_obj(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) data = b'some data' hexkey = sha256(data).hexdigest() self.create_regular_file('file', contents=data) - output = self.cmd('debug-put-obj', self.repository_location, 'input/file') + output = self.cmd('debug', 'put-obj', self.repository_location, 'input/file') assert hexkey in output - output = self.cmd('debug-get-obj', self.repository_location, hexkey, 'output/file') + output = self.cmd('debug', 'get-obj', self.repository_location, hexkey, 'output/file') assert hexkey in output with open('output/file', 'rb') as f: data_read = f.read() assert data == data_read - output = self.cmd('debug-delete-obj', self.repository_location, hexkey) + output = self.cmd('debug', 'delete-obj', self.repository_location, hexkey) assert "deleted" in output - output = self.cmd('debug-delete-obj', self.repository_location, hexkey) + output = self.cmd('debug', 'delete-obj', self.repository_location, hexkey) assert "not found" in output - output = self.cmd('debug-delete-obj', self.repository_location, 'invalid') + output = self.cmd('debug', 'delete-obj', self.repository_location, 'invalid') assert "is invalid" in output def test_init_interrupt(self): @@ -1490,26 +2307,99 @@ class ArchiverTestCase(ArchiverTestCaseBase): raise EOFError with patch.object(KeyfileKeyBase, 'create', raise_eof): - self.cmd('init', self.repository_location, exit_code=1) + self.cmd('init', '--encryption=repokey', self.repository_location, exit_code=1) assert not os.path.exists(self.repository_location) - def test_recreate_basic(self): + def test_init_requires_encryption_option(self): + self.cmd('init', self.repository_location, exit_code=2) + + def test_init_nested_repositories(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + if self.FORK_DEFAULT: + self.cmd('init', '--encryption=repokey', self.repository_location + '/nested', exit_code=2) + else: + with pytest.raises(Repository.AlreadyExists): + self.cmd('init', '--encryption=repokey', self.repository_location + '/nested') + + def check_cache(self): + # First run a regular borg check + self.cmd('check', self.repository_location) + # Then check that the cache on disk matches exactly what's in the repo. + with self.open_repository() as repository: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + with Cache(repository, key, manifest, sync=False) as cache: + original_chunks = cache.chunks + Cache.destroy(repository) + with Cache(repository, key, manifest) as cache: + correct_chunks = cache.chunks + assert original_chunks is not correct_chunks + seen = set() + for id, (refcount, size, csize) in correct_chunks.iteritems(): + o_refcount, o_size, o_csize = original_chunks[id] + assert refcount == o_refcount + assert size == o_size + assert csize == o_csize + seen.add(id) + for id, (refcount, size, csize) in original_chunks.iteritems(): + assert id in seen + + def test_check_cache(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + with self.open_repository() as repository: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + with Cache(repository, key, manifest, sync=False) as cache: + cache.begin_txn() + cache.chunks.incref(list(cache.chunks.iteritems())[0][0]) + cache.commit() + with pytest.raises(AssertionError): + self.check_cache() + + def test_recreate_target_rc(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + output = self.cmd('recreate', self.repository_location, '--target=asdf', exit_code=2) + assert 'Need to specify single archive' in output + + def test_recreate_target(self): self.create_test_files() - self.create_regular_file('dir2/file3', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.check_cache() archive = self.repository_location + '::test0' self.cmd('create', archive, 'input') - self.cmd('recreate', archive, 'input/dir2', '-e', 'input/dir2/file3') + self.check_cache() + original_archive = self.cmd('list', self.repository_location) + self.cmd('recreate', archive, 'input/dir2', '-e', 'input/dir2/file3', '--target=new-archive') + self.check_cache() + archives = self.cmd('list', self.repository_location) + assert original_archive in archives + assert 'new-archive' in archives + + archive = self.repository_location + '::new-archive' listing = self.cmd('list', '--short', archive) assert 'file1' not in listing assert 'dir2/file2' in listing assert 'dir2/file3' not in listing + def test_recreate_basic(self): + self.create_test_files() + self.create_regular_file('dir2/file3', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + archive = self.repository_location + '::test0' + self.cmd('create', archive, 'input') + self.cmd('recreate', archive, 'input/dir2', '-e', 'input/dir2/file3') + self.check_cache() + listing = self.cmd('list', '--short', archive) + assert 'file1' not in listing + assert 'dir2/file2' in listing + assert 'dir2/file3' not in listing + + @pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported') def test_recreate_subtree_hardlinks(self): # This is essentially the same problem set as in test_extract_hardlinks self._extract_hardlinks_setup() self.cmd('create', self.repository_location + '::test2', 'input') self.cmd('recreate', self.repository_location + '::test', 'input/dir1') + self.check_cache() with changedir('output'): self.cmd('extract', self.repository_location + '::test') assert os.stat('input/dir1/hardlink').st_nlink == 2 @@ -1524,28 +2414,31 @@ class ArchiverTestCase(ArchiverTestCaseBase): with open(os.path.join(self.input_path, 'large_file'), 'wb') as fd: fd.write(b'a' * 280) fd.write(b'b' * 280) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input') - self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache') + self.cmd('create', self.repository_location + '::test2', 'input', '--files-cache=disabled') list = self.cmd('list', self.repository_location + '::test1', 'input/large_file', '--format', '{num_chunks} {unique_chunks}') num_chunks, unique_chunks = map(int, list.split(' ')) # test1 and test2 do not deduplicate assert num_chunks == unique_chunks self.cmd('recreate', self.repository_location, '--chunker-params', 'default') + self.check_cache() # test1 and test2 do deduplicate after recreate + assert int(self.cmd('list', self.repository_location + '::test1', 'input/large_file', '--format={size}')) assert not int(self.cmd('list', self.repository_location + '::test1', 'input/large_file', '--format', '{unique_chunks}')) def test_recreate_recompress(self): self.create_regular_file('compressible', size=10000) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input', '-C', 'none') file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible', '--format', '{size} {csize} {sha256}') size, csize, sha256_before = file_list.split(' ') assert int(csize) >= int(size) # >= due to metadata overhead - self.cmd('recreate', self.repository_location, '-C', 'lz4') + self.cmd('recreate', self.repository_location, '-C', 'lz4', '--recompress') + self.check_cache() file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible', '--format', '{size} {csize} {sha256}') size, csize, sha256_after = file_list.split(' ') @@ -1554,130 +2447,32 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_recreate_dry_run(self): self.create_regular_file('compressible', size=10000) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') archives_before = self.cmd('list', self.repository_location + '::test') self.cmd('recreate', self.repository_location, '-n', '-e', 'input/compressible') + self.check_cache() archives_after = self.cmd('list', self.repository_location + '::test') assert archives_after == archives_before - def _recreate_interrupt_patch(self, interrupt_after_n_1_files): - def interrupt(self, *args): - if interrupt_after_n_1_files: - self.interrupt = True - pi_save(self, *args) - else: - raise ArchiveRecreater.Interrupted - - def process_item_patch(*args): - return pi_call.pop(0)(*args) - - pi_save = ArchiveRecreater.process_item - pi_call = [pi_save] * interrupt_after_n_1_files + [interrupt] - return process_item_patch - - def _test_recreate_interrupt(self, change_args, interrupt_early): - self.create_test_files() - self.create_regular_file('dir2/abcdef', size=1024 * 80) - self.cmd('init', self.repository_location) - self.cmd('create', self.repository_location + '::test', 'input') - process_files = 1 - if interrupt_early: - process_files = 0 - with patch.object(ArchiveRecreater, 'process_item', self._recreate_interrupt_patch(process_files)): - self.cmd('recreate', self.repository_location, 'input/dir2') - assert 'test.recreate' in self.cmd('list', self.repository_location) - if change_args: - with patch.object(sys, 'argv', sys.argv + ['non-forking tests don\'t use sys.argv']): - output = self.cmd('recreate', '-sv', '--list', '-pC', 'lz4', self.repository_location, 'input/dir2') - else: - output = self.cmd('recreate', '-sv', '--list', self.repository_location, 'input/dir2') - assert 'Found test.recreate, will resume' in output - assert change_args == ('Command line changed' in output) - if not interrupt_early: - assert 'Fast-forwarded to input/dir2/abcdef' in output - assert 'A input/dir2/abcdef' not in output - assert 'A input/dir2/file2' in output - archives = self.cmd('list', self.repository_location) - assert 'test.recreate' not in archives - assert 'test' in archives - files = self.cmd('list', self.repository_location + '::test') - assert 'dir2/file2' in files - assert 'dir2/abcdef' in files - assert 'file1' not in files - - # The _test_create_interrupt requires a deterministic (alphabetic) order of the files to easily check if - # resumption works correctly. Patch scandir_inorder to work in alphabetic order. - - def test_recreate_interrupt(self): - with patch.object(helpers, 'scandir_inorder', helpers.scandir_generic): - self._test_recreate_interrupt(False, True) - - def test_recreate_interrupt2(self): - with patch.object(helpers, 'scandir_inorder', helpers.scandir_generic): - self._test_recreate_interrupt(True, False) - - def _test_recreate_chunker_interrupt_patch(self): - real_add_chunk = Cache.add_chunk - - def add_chunk(*args, **kwargs): - frame = inspect.stack()[2] - try: - caller_self = frame[0].f_locals['self'] - if isinstance(caller_self, ArchiveRecreater): - caller_self.interrupt = True - finally: - del frame - return real_add_chunk(*args, **kwargs) - return add_chunk - - def test_recreate_rechunkify_interrupt(self): - self.create_regular_file('file1', size=1024 * 80) - self.cmd('init', self.repository_location) - self.cmd('create', self.repository_location + '::test', 'input') - archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') - with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()): - self.cmd('recreate', '-pv', '--chunker-params', '10,13,11,4095', self.repository_location) - assert 'test.recreate' in self.cmd('list', self.repository_location) - output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,13,11,4095', self.repository_location) - assert 'Found test.recreate, will resume' in output - assert 'Copied 1 chunks from a partially processed item' in output - archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') - assert archive_after == archive_before - - def test_recreate_changed_source(self): - self.create_test_files() - self.cmd('init', self.repository_location) - self.cmd('create', self.repository_location + '::test', 'input') - with patch.object(ArchiveRecreater, 'process_item', self._recreate_interrupt_patch(1)): - self.cmd('recreate', self.repository_location, 'input/dir2') - assert 'test.recreate' in self.cmd('list', self.repository_location) - self.cmd('delete', self.repository_location + '::test') - self.cmd('create', self.repository_location + '::test', 'input') - output = self.cmd('recreate', self.repository_location, 'input/dir2') - assert 'Source archive changed, will discard test.recreate and start over' in output - - def test_recreate_refuses_temporary(self): - self.cmd('init', self.repository_location) - self.cmd('recreate', self.repository_location + '::cba.recreate', exit_code=2) - def test_recreate_skips_nothing_to_do(self): self.create_regular_file('file1', size=1024 * 80) - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') info_before = self.cmd('info', self.repository_location + '::test') self.cmd('recreate', self.repository_location, '--chunker-params', 'default') + self.check_cache() info_after = self.cmd('info', self.repository_location + '::test') assert info_before == info_after # includes archive ID def test_with_lock(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) lock_path = os.path.join(self.repository_path, 'lock.exclusive') cmd = 'python3', '-c', 'import os, sys; sys.exit(42 if os.path.exists("%s") else 23)' % lock_path self.cmd('with-lock', self.repository_location, *cmd, fork=True, exit_code=42) def test_recreate_list_output(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('file1', size=0) self.create_regular_file('file2', size=0) self.create_regular_file('file3', size=0) @@ -1687,21 +2482,340 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::test', 'input') output = self.cmd('recreate', '--list', '--info', self.repository_location + '::test', '-e', 'input/file2') + self.check_cache() self.assert_in("input/file1", output) self.assert_in("x input/file2", output) output = self.cmd('recreate', '--list', self.repository_location + '::test', '-e', 'input/file3') + self.check_cache() self.assert_in("input/file1", output) self.assert_in("x input/file3", output) output = self.cmd('recreate', self.repository_location + '::test', '-e', 'input/file4') + self.check_cache() self.assert_not_in("input/file1", output) self.assert_not_in("x input/file4", output) output = self.cmd('recreate', '--info', self.repository_location + '::test', '-e', 'input/file5') + self.check_cache() self.assert_not_in("input/file1", output) self.assert_not_in("x input/file5", output) + def test_bad_filters(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('delete', '--first', '1', '--last', '1', self.repository_location, fork=True, exit_code=2) + + def test_key_export_keyfile(self): + export_file = self.output_path + '/exported' + self.cmd('init', self.repository_location, '--encryption', 'keyfile') + repo_id = self._extract_repository_id(self.repository_path) + self.cmd('key', 'export', self.repository_location, export_file) + + with open(export_file, 'r') as fd: + export_contents = fd.read() + + assert export_contents.startswith('BORG_KEY ' + bin_to_hex(repo_id) + '\n') + + key_file = self.keys_path + '/' + os.listdir(self.keys_path)[0] + + with open(key_file, 'r') as fd: + key_contents = fd.read() + + assert key_contents == export_contents + + os.unlink(key_file) + + self.cmd('key', 'import', self.repository_location, export_file) + + with open(key_file, 'r') as fd: + key_contents2 = fd.read() + + assert key_contents2 == key_contents + + def test_key_export_repokey(self): + export_file = self.output_path + '/exported' + self.cmd('init', self.repository_location, '--encryption', 'repokey') + repo_id = self._extract_repository_id(self.repository_path) + self.cmd('key', 'export', self.repository_location, export_file) + + with open(export_file, 'r') as fd: + export_contents = fd.read() + + assert export_contents.startswith('BORG_KEY ' + bin_to_hex(repo_id) + '\n') + + with Repository(self.repository_path) as repository: + repo_key = RepoKey(repository) + repo_key.load(None, Passphrase.env_passphrase()) + + backup_key = KeyfileKey(key.TestKey.MockRepository()) + backup_key.load(export_file, Passphrase.env_passphrase()) + + assert repo_key.enc_key == backup_key.enc_key + + with Repository(self.repository_path) as repository: + repository.save_key(b'') + + self.cmd('key', 'import', self.repository_location, export_file) + + with Repository(self.repository_path) as repository: + repo_key2 = RepoKey(repository) + repo_key2.load(None, Passphrase.env_passphrase()) + + assert repo_key2.enc_key == repo_key2.enc_key + + def test_key_export_qr(self): + export_file = self.output_path + '/exported.html' + self.cmd('init', self.repository_location, '--encryption', 'repokey') + repo_id = self._extract_repository_id(self.repository_path) + self.cmd('key', 'export', '--qr-html', self.repository_location, export_file) + + with open(export_file, 'r', encoding='utf-8') as fd: + export_contents = fd.read() + + assert bin_to_hex(repo_id) in export_contents + assert export_contents.startswith('') + assert export_contents.endswith('') + + def test_key_import_errors(self): + export_file = self.output_path + '/exported' + self.cmd('init', self.repository_location, '--encryption', 'keyfile') + + self.cmd('key', 'import', self.repository_location, export_file, exit_code=EXIT_ERROR) + + with open(export_file, 'w') as fd: + fd.write('something not a key\n') + + if self.FORK_DEFAULT: + self.cmd('key', 'import', self.repository_location, export_file, exit_code=2) + else: + with pytest.raises(NotABorgKeyFile): + self.cmd('key', 'import', self.repository_location, export_file) + + with open(export_file, 'w') as fd: + fd.write('BORG_KEY a0a0a0\n') + + if self.FORK_DEFAULT: + self.cmd('key', 'import', self.repository_location, export_file, exit_code=2) + else: + with pytest.raises(RepoIdMismatch): + self.cmd('key', 'import', self.repository_location, export_file) + + def test_key_export_paperkey(self): + repo_id = 'e294423506da4e1ea76e8dcdf1a3919624ae3ae496fddf905610c351d3f09239' + + export_file = self.output_path + '/exported' + self.cmd('init', self.repository_location, '--encryption', 'keyfile') + self._set_repository_id(self.repository_path, unhexlify(repo_id)) + + key_file = self.keys_path + '/' + os.listdir(self.keys_path)[0] + + with open(key_file, 'w') as fd: + fd.write(KeyfileKey.FILE_ID + ' ' + repo_id + '\n') + fd.write(b2a_base64(b'abcdefghijklmnopqrstu').decode()) + + self.cmd('key', 'export', '--paper', self.repository_location, export_file) + + with open(export_file, 'r') as fd: + export_contents = fd.read() + + assert export_contents == """To restore key use borg key import --paper /path/to/repo + +BORG PAPER KEY v1 +id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 + 1: 616263 646566 676869 6a6b6c 6d6e6f 707172 - 6d + 2: 737475 - 88 +""" + + def test_key_import_paperkey(self): + repo_id = 'e294423506da4e1ea76e8dcdf1a3919624ae3ae496fddf905610c351d3f09239' + self.cmd('init', self.repository_location, '--encryption', 'keyfile') + self._set_repository_id(self.repository_path, unhexlify(repo_id)) + + key_file = self.keys_path + '/' + os.listdir(self.keys_path)[0] + with open(key_file, 'w') as fd: + fd.write(KeyfileKey.FILE_ID + ' ' + repo_id + '\n') + fd.write(b2a_base64(b'abcdefghijklmnopqrstu').decode()) + + typed_input = ( + b'2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 02\n' # Forgot to type "-" + b'2 / e29442 3506da 4e1ea7 25f62a 5a3d41 - 02\n' # Forgot to type second "/" + b'2 / e29442 3506da 4e1ea7 / 25f62a 5a3d42 - 02\n' # Typo (..42 not ..41) + b'2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02\n' # Correct! Congratulations + b'616263 646566 676869 6a6b6c 6d6e6f 707172 - 6d\n' + b'\n\n' # Abort [yN] => N + b'737475 88\n' # missing "-" + b'73747i - 88\n' # typo + b'73747 - 88\n' # missing nibble + b'73 74 75 - 89\n' # line checksum mismatch + b'00a1 - 88\n' # line hash collision - overall hash mismatch, have to start over + + b'2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02\n' + b'616263 646566 676869 6a6b6c 6d6e6f 707172 - 6d\n' + b'73 74 75 - 88\n' + ) + + # In case that this has to change, here is a quick way to find a colliding line hash: + # + # from hashlib import sha256 + # hash_fn = lambda x: sha256(b'\x00\x02' + x).hexdigest()[:2] + # for i in range(1000): + # if hash_fn(i.to_bytes(2, byteorder='big')) == '88': # 88 = line hash + # print(i.to_bytes(2, 'big')) + # break + + self.cmd('key', 'import', '--paper', self.repository_location, input=typed_input) + + # Test abort paths + typed_input = b'\ny\n' + self.cmd('key', 'import', '--paper', self.repository_location, input=typed_input) + typed_input = b'2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02\n\ny\n' + self.cmd('key', 'import', '--paper', self.repository_location, input=typed_input) + + def test_debug_dump_manifest(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + dump_file = self.output_path + '/dump' + output = self.cmd('debug', 'dump-manifest', self.repository_location, dump_file) + assert output == "" + with open(dump_file, "r") as f: + result = json.load(f) + assert 'archives' in result + assert 'config' in result + assert 'item_keys' in result + assert 'timestamp' in result + assert 'version' in result + + def test_debug_dump_archive(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + dump_file = self.output_path + '/dump' + output = self.cmd('debug', 'dump-archive', self.repository_location + "::test", dump_file) + assert output == "" + with open(dump_file, "r") as f: + result = json.load(f) + assert '_name' in result + assert '_manifest_entry' in result + assert '_meta' in result + assert '_items' in result + + def test_debug_refcount_obj(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + output = self.cmd('debug', 'refcount-obj', self.repository_location, '0' * 64).strip() + assert output == 'object 0000000000000000000000000000000000000000000000000000000000000000 not found [info from chunks cache].' + + create_json = json.loads(self.cmd('create', '--json', self.repository_location + '::test', 'input')) + archive_id = create_json['archive']['id'] + output = self.cmd('debug', 'refcount-obj', self.repository_location, archive_id).strip() + assert output == 'object ' + archive_id + ' has 1 referrers [info from chunks cache].' + + # Invalid IDs do not abort or return an error + output = self.cmd('debug', 'refcount-obj', self.repository_location, '124', 'xyza').strip() + assert output == 'object id 124 is invalid.\nobject id xyza is invalid.' + + def test_debug_info(self): + output = self.cmd('debug', 'info') + assert 'CRC implementation' in output + assert 'Python' in output + + def test_benchmark_crud(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + with environment_variable(_BORG_BENCHMARK_CRUD_TEST='YES'): + self.cmd('benchmark', 'crud', self.repository_location, self.input_path) + + requires_gnutar = pytest.mark.skipif(not have_gnutar(), reason='GNU tar must be installed for this test.') + requires_gzip = pytest.mark.skipif(not shutil.which('gzip'), reason='gzip must be installed for this test.') + + @requires_gnutar + def test_export_tar(self): + self.create_test_files() + os.unlink('input/flagfile') + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress') + with changedir('output'): + # This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask. + subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp']) + self.assert_dirs_equal('input', 'output/input', ignore_bsdflags=True, ignore_xattrs=True, ignore_ns=True) + + @requires_gnutar + @requires_gzip + def test_export_tar_gz(self): + if not shutil.which('gzip'): + pytest.skip('gzip is not installed') + self.create_test_files() + os.unlink('input/flagfile') + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', '--list') + assert 'input/file1\n' in list + assert 'input/dir2\n' in list + with changedir('output'): + subprocess.check_call(['tar', 'xpf', '../simple.tar.gz', '--warning=no-timestamp']) + self.assert_dirs_equal('input', 'output/input', ignore_bsdflags=True, ignore_xattrs=True, ignore_ns=True) + + @requires_gnutar + def test_export_tar_strip_components(self): + if not shutil.which('gzip'): + pytest.skip('gzip is not installed') + self.create_test_files() + os.unlink('input/flagfile') + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--strip-components=1', '--list') + # --list's path are those before processing with --strip-components + assert 'input/file1\n' in list + assert 'input/dir2\n' in list + with changedir('output'): + subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp']) + self.assert_dirs_equal('input', 'output/', ignore_bsdflags=True, ignore_xattrs=True, ignore_ns=True) + + @requires_hardlinks + @requires_gnutar + def test_export_tar_strip_components_links(self): + self._extract_hardlinks_setup() + self.cmd('export-tar', self.repository_location + '::test', 'output.tar', '--strip-components=2') + with changedir('output'): + subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp']) + assert os.stat('hardlink').st_nlink == 2 + assert os.stat('subdir/hardlink').st_nlink == 2 + assert os.stat('aaaa').st_nlink == 2 + assert os.stat('source2').st_nlink == 2 + + @requires_hardlinks + @requires_gnutar + def test_extract_hardlinks(self): + self._extract_hardlinks_setup() + self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1') + with changedir('output'): + subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp']) + assert os.stat('input/dir1/hardlink').st_nlink == 2 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2 + assert os.stat('input/dir1/aaaa').st_nlink == 2 + assert os.stat('input/dir1/source2').st_nlink == 2 + + def test_detect_attic_repo(self): + path = attic_repo(self.repository_path) + cmds = [ + ['create', path + '::test', self.tmpdir], + ['extract', path + '::test'], + ['check', path], + ['rename', path + '::test', 'newname'], + ['list', path], + ['delete', path], + ['prune', path], + ['info', path + '::test'], + ['key', 'export', path, 'exported'], + ['key', 'import', path, 'import'], + ['change-passphrase', path], + ['break-lock', path], + ] + for args in cmds: + output = self.cmd(*args, fork=True, exit_code=2) + assert 'Attic repository detected.' in output + @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available') class ArchiverTestCaseBinary(ArchiverTestCase): @@ -1713,15 +2827,11 @@ class ArchiverTestCaseBinary(ArchiverTestCase): pass @unittest.skip('patches objects') - def test_recreate_rechunkify_interrupt(self): + def test_extract_capabilities(self): pass @unittest.skip('patches objects') - def test_recreate_interrupt(self): - pass - - @unittest.skip('patches objects') - def test_recreate_changed_source(self): + def test_extract_xattrs_errors(self): pass @unittest.skip('test_basic_functionality seems incompatible with fakeroot and/or the binary.') @@ -1732,13 +2842,19 @@ class ArchiverTestCaseBinary(ArchiverTestCase): def test_overwrite(self): pass + def test_fuse(self): + if fakeroot_detected(): + unittest.skip('test_fuse with the binary is not compatible with fakeroot') + else: + super().test_fuse() + class ArchiverCheckTestCase(ArchiverTestCaseBase): def setUp(self): super().setUp() with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_src_archive('archive1') self.create_src_archive('archive2') @@ -1758,6 +2874,12 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): self.assert_in('Starting archive consistency check', output) output = self.cmd('check', '-v', '--archives-only', '--prefix=archive2', self.repository_location, exit_code=0) self.assert_not_in('archive1', output) + output = self.cmd('check', '-v', '--archives-only', '--first=1', self.repository_location, exit_code=0) + self.assert_in('archive1', output) + self.assert_not_in('archive2', output) + output = self.cmd('check', '-v', '--archives-only', '--last=1', self.repository_location, exit_code=0) + self.assert_not_in('archive1', output) + self.assert_in('archive2', output) def test_missing_file_chunk(self): archive, repository = self.open_archive('archive1') @@ -1769,12 +2891,14 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): repository.delete(killed_chunk.id) break else: - self.assert_true(False) # should not happen + self.fail('should not happen') repository.commit() self.cmd('check', self.repository_location, exit_code=1) output = self.cmd('check', '--repair', self.repository_location, exit_code=0) self.assert_in('New missing file chunk detected', output) self.cmd('check', self.repository_location, exit_code=0) + output = self.cmd('list', '--format={health}#{path}{LF}', self.repository_location + '::archive1', exit_code=0) + self.assert_in('broken#', output) # check that the file in the old archives has now a different chunk list without the killed chunk for archive_name in ('archive1', 'archive2'): archive, repository = self.open_archive(archive_name) @@ -1785,7 +2909,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): self.assert_not_in(killed_chunk, item.chunks) break else: - self.assert_true(False) # should not happen + self.fail('should not happen') # do a fresh backup (that will include the killed chunk) with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10): self.create_src_archive('archive3') @@ -1802,12 +2926,15 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): self.assert_equal(valid_chunks, item.chunks) break else: - self.assert_true(False) # should not happen + self.fail('should not happen') + # list is also all-healthy again + output = self.cmd('list', '--format={health}#{path}{LF}', self.repository_location + '::archive1', exit_code=0) + self.assert_not_in('broken#', output) def test_missing_archive_item_chunk(self): archive, repository = self.open_archive('archive1') with repository: - repository.delete(archive.metadata[b'items'][-5]) + repository.delete(archive.metadata.items[-5]) repository.commit() self.cmd('check', self.repository_location, exit_code=1) self.cmd('check', '--repair', self.repository_location, exit_code=0) @@ -1833,9 +2960,65 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): self.assert_in('archive2', output) self.cmd('check', self.repository_location, exit_code=0) + def test_corrupted_manifest(self): + archive, repository = self.open_archive('archive1') + with repository: + manifest = repository.get(Manifest.MANIFEST_ID) + corrupted_manifest = manifest + b'corrupted!' + repository.put(Manifest.MANIFEST_ID, corrupted_manifest) + repository.commit() + self.cmd('check', self.repository_location, exit_code=1) + output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0) + self.assert_in('archive1', output) + self.assert_in('archive2', output) + self.cmd('check', self.repository_location, exit_code=0) + + def test_manifest_rebuild_corrupted_chunk(self): + archive, repository = self.open_archive('archive1') + with repository: + manifest = repository.get(Manifest.MANIFEST_ID) + corrupted_manifest = manifest + b'corrupted!' + repository.put(Manifest.MANIFEST_ID, corrupted_manifest) + + chunk = repository.get(archive.id) + corrupted_chunk = chunk + b'corrupted!' + repository.put(archive.id, corrupted_chunk) + repository.commit() + self.cmd('check', self.repository_location, exit_code=1) + output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0) + self.assert_in('archive2', output) + self.cmd('check', self.repository_location, exit_code=0) + + def test_manifest_rebuild_duplicate_archive(self): + archive, repository = self.open_archive('archive1') + key = archive.key + with repository: + manifest = repository.get(Manifest.MANIFEST_ID) + corrupted_manifest = manifest + b'corrupted!' + repository.put(Manifest.MANIFEST_ID, corrupted_manifest) + + archive = msgpack.packb({ + 'cmdline': [], + 'items': [], + 'hostname': 'foo', + 'username': 'bar', + 'name': 'archive1', + 'time': '2016-12-15T18:49:51.849711', + 'version': 1, + }) + archive_id = key.id_hash(archive) + repository.put(archive_id, key.encrypt(archive)) + repository.commit() + self.cmd('check', self.repository_location, exit_code=1) + self.cmd('check', '--repair', self.repository_location, exit_code=0) + output = self.cmd('list', self.repository_location) + self.assert_in('archive1', output) + self.assert_in('archive1.1', output) + self.assert_in('archive2', output) + def test_extra_chunks(self): self.cmd('check', self.repository_location, exit_code=0) - with Repository(self.repository_location) as repository: + with Repository(self.repository_location, exclusive=True) as repository: repository.put(b'01234567890123456789012345678901', b'xxxx') repository.commit() self.cmd('check', self.repository_location, exit_code=1) @@ -1860,6 +3043,10 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): self.cmd('check', self.repository_location, exit_code=0) output = self.cmd('check', '--verify-data', self.repository_location, exit_code=1) assert bin_to_hex(chunk.id) + ', integrity error' in output + # repair (heal is tested in another test) + output = self.cmd('check', '--repair', '--verify-data', self.repository_location, exit_code=0) + assert bin_to_hex(chunk.id) + ', integrity error' in output + assert 'testsuite/archiver.py: New missing file chunk detected' in output def test_verify_data(self): self._test_verify_data('--encryption', 'repokey') @@ -1867,39 +3054,263 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): def test_verify_data_unencrypted(self): self._test_verify_data('--encryption', 'none') + def test_empty_repository(self): + with Repository(self.repository_location, exclusive=True) as repository: + for id_ in repository.list(): + repository.delete(id_) + repository.commit() + self.cmd('check', self.repository_location, exit_code=1) + + def test_attic013_acl_bug(self): + # Attic up to release 0.13 contained a bug where every item unintentionally received + # a b'acl'=None key-value pair. + # This bug can still live on in Borg repositories (through borg upgrade). + class Attic013Item: + def as_dict(self): + return { + # These are required + b'path': '1234', + b'mtime': 0, + b'mode': 0, + b'user': b'0', + b'group': b'0', + b'uid': 0, + b'gid': 0, + # acl is the offending key. + b'acl': None, + } + + archive, repository = self.open_archive('archive1') + with repository: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + with Cache(repository, key, manifest) as cache: + archive = Archive(repository, key, manifest, '0.13', cache=cache, create=True) + archive.items_buffer.add(Attic013Item()) + archive.save() + self.cmd('check', self.repository_location, exit_code=0) + self.cmd('list', self.repository_location + '::0.13', exit_code=0) + + +class ManifestAuthenticationTest(ArchiverTestCaseBase): + def spoof_manifest(self, repository): + with repository: + _, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + repository.put(Manifest.MANIFEST_ID, key.encrypt(msgpack.packb({ + 'version': 1, + 'archives': {}, + 'config': {}, + 'timestamp': (datetime.utcnow() + timedelta(days=1)).strftime(ISO_FORMAT), + }))) + repository.commit() + + def test_fresh_init_tam_required(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + repository = Repository(self.repository_path, exclusive=True) + with repository: + manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + repository.put(Manifest.MANIFEST_ID, key.encrypt(msgpack.packb({ + 'version': 1, + 'archives': {}, + 'timestamp': (datetime.utcnow() + timedelta(days=1)).strftime(ISO_FORMAT), + }))) + repository.commit() + + with pytest.raises(TAMRequiredError): + self.cmd('list', self.repository_location) + + def test_not_required(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_src_archive('archive1234') + repository = Repository(self.repository_path, exclusive=True) + with repository: + shutil.rmtree(get_security_dir(bin_to_hex(repository.id))) + _, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + key.tam_required = False + key.change_passphrase(key._passphrase) + + manifest = msgpack.unpackb(key.decrypt(None, repository.get(Manifest.MANIFEST_ID))) + del manifest[b'tam'] + repository.put(Manifest.MANIFEST_ID, key.encrypt(msgpack.packb(manifest))) + repository.commit() + output = self.cmd('list', '--debug', self.repository_location) + assert 'archive1234' in output + assert 'TAM not found and not required' in output + # Run upgrade + self.cmd('upgrade', '--tam', self.repository_location) + # Manifest must be authenticated now + output = self.cmd('list', '--debug', self.repository_location) + assert 'archive1234' in output + assert 'TAM-verified manifest' in output + # Try to spoof / modify pre-1.0.9 + self.spoof_manifest(repository) + # Fails + with pytest.raises(TAMRequiredError): + self.cmd('list', self.repository_location) + # Force upgrade + self.cmd('upgrade', '--tam', '--force', self.repository_location) + self.cmd('list', self.repository_location) + + def test_disable(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_src_archive('archive1234') + self.cmd('upgrade', '--disable-tam', self.repository_location) + repository = Repository(self.repository_path, exclusive=True) + self.spoof_manifest(repository) + assert not self.cmd('list', self.repository_location) + + def test_disable2(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_src_archive('archive1234') + repository = Repository(self.repository_path, exclusive=True) + self.spoof_manifest(repository) + self.cmd('upgrade', '--disable-tam', self.repository_location) + assert not self.cmd('list', self.repository_location) + class RemoteArchiverTestCase(ArchiverTestCase): prefix = '__testsuite__:' - def test_remote_repo_restrict_to_path(self): - self.cmd('init', self.repository_location) - path_prefix = os.path.dirname(self.repository_path) - with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo']): - self.assert_raises(PathNotAllowed, lambda: self.cmd('init', self.repository_location + '_1')) - with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', path_prefix]): - self.cmd('init', self.repository_location + '_2') - with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo', '--restrict-to-path', path_prefix]): - self.cmd('init', self.repository_location + '_3') + def open_repository(self): + return RemoteRepository(Location(self.repository_location)) - # skip fuse tests here, they deadlock since this change in exec_cmd: - # -output = subprocess.check_output(borg + args, stderr=None) - # +output = subprocess.check_output(borg + args, stderr=subprocess.STDOUT) - # this was introduced because some tests expect stderr contents to show up - # in "output" also. Also, the non-forking exec_cmd catches both, too. - @unittest.skip('deadlock issues') - def test_fuse(self): - pass + def test_remote_repo_restrict_to_path(self): + # restricted to repo directory itself: + with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', self.repository_path]): + self.cmd('init', '--encryption=repokey', self.repository_location) + # restricted to repo directory itself, fail for other directories with same prefix: + with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', self.repository_path]): + with pytest.raises(PathNotAllowed): + self.cmd('init', '--encryption=repokey', self.repository_location + '_0') + + # restricted to a completely different path: + with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo']): + with pytest.raises(PathNotAllowed): + self.cmd('init', '--encryption=repokey', self.repository_location + '_1') + path_prefix = os.path.dirname(self.repository_path) + # restrict to repo directory's parent directory: + with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', path_prefix]): + self.cmd('init', '--encryption=repokey', self.repository_location + '_2') + # restrict to repo directory's parent directory and another directory: + with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo', '--restrict-to-path', path_prefix]): + self.cmd('init', '--encryption=repokey', self.repository_location + '_3') + + def test_remote_repo_restrict_to_repository(self): + # restricted to repo directory itself: + with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-repository', self.repository_path]): + self.cmd('init', '--encryption=repokey', self.repository_location) + parent_path = os.path.join(self.repository_path, '..') + with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-repository', parent_path]): + with pytest.raises(PathNotAllowed): + self.cmd('init', '--encryption=repokey', self.repository_location) @unittest.skip('only works locally') def test_debug_put_get_delete_obj(self): pass + def test_strip_components_doesnt_leak(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file('dir/file', contents=b"test file contents 1") + self.create_regular_file('dir/file2', contents=b"test file contents 2") + self.create_regular_file('skipped-file1', contents=b"test file contents 3") + self.create_regular_file('skipped-file2', contents=b"test file contents 4") + self.create_regular_file('skipped-file3', contents=b"test file contents 5") + self.cmd('create', self.repository_location + '::test', 'input') + marker = 'cached responses left in RemoteRepository' + with changedir('output'): + res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '3') + self.assert_true(marker not in res) + with self.assert_creates_file('file'): + res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '2') + self.assert_true(marker not in res) + with self.assert_creates_file('dir/file'): + res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '1') + self.assert_true(marker not in res) + with self.assert_creates_file('input/dir/file'): + res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '0') + self.assert_true(marker not in res) + + +class ArchiverCorruptionTestCase(ArchiverTestCaseBase): + def setUp(self): + super().setUp() + self.create_test_files() + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cache_path = json.loads(self.cmd('info', self.repository_location, '--json'))['cache']['path'] + + def corrupt(self, file): + with open(file, 'r+b') as fd: + fd.seek(-1, io.SEEK_END) + fd.write(b'1') + + def test_cache_chunks(self): + self.corrupt(os.path.join(self.cache_path, 'chunks')) + + if self.FORK_DEFAULT: + out = self.cmd('info', self.repository_location, exit_code=2) + assert 'failed integrity check' in out + else: + with pytest.raises(FileIntegrityError): + self.cmd('info', self.repository_location) + + def test_cache_files(self): + self.cmd('create', self.repository_location + '::test', 'input') + self.corrupt(os.path.join(self.cache_path, 'files')) + + if self.FORK_DEFAULT: + out = self.cmd('create', self.repository_location + '::test1', 'input', exit_code=2) + assert 'failed integrity check' in out + else: + with pytest.raises(FileIntegrityError): + self.cmd('create', self.repository_location + '::test1', 'input') + + def test_chunks_archive(self): + self.cmd('create', self.repository_location + '::test1', 'input') + # Find ID of test1 so we can corrupt it later :) + target_id = self.cmd('list', self.repository_location, '--format={id}{LF}').strip() + self.cmd('create', self.repository_location + '::test2', 'input') + + # Force cache sync, creating archive chunks of test1 and test2 in chunks.archive.d + self.cmd('delete', '--cache-only', self.repository_location) + self.cmd('info', self.repository_location, '--json') + + chunks_archive = os.path.join(self.cache_path, 'chunks.archive.d') + assert len(os.listdir(chunks_archive)) == 4 # two archives, one chunks cache and one .integrity file each + + self.corrupt(os.path.join(chunks_archive, target_id + '.compact')) + + # Trigger cache sync by changing the manifest ID in the cache config + config_path = os.path.join(self.cache_path, 'config') + config = ConfigParser(interpolation=None) + config.read(config_path) + config.set('cache', 'manifest', bin_to_hex(bytes(32))) + with open(config_path, 'w') as fd: + config.write(fd) + + # Cache sync notices corrupted archive chunks, but automatically recovers. + out = self.cmd('create', '-v', self.repository_location + '::test3', 'input', exit_code=1) + assert 'Reading cached archive chunk index for test1' in out + assert 'Cached archive chunk index of test1 is corrupted' in out + assert 'Fetching and building archive index for test1' in out + + def test_old_version_interfered(self): + # Modify the main manifest ID without touching the manifest ID in the integrity section. + # This happens if a version without integrity checking modifies the cache. + config_path = os.path.join(self.cache_path, 'config') + config = ConfigParser(interpolation=None) + config.read(config_path) + config.set('cache', 'manifest', bin_to_hex(bytes(32))) + with open(config_path, 'w') as fd: + config.write(fd) + + out = self.cmd('info', self.repository_location) + assert 'Cache integrity data not available: old Borg version modified the cache.' in out + class DiffArchiverTestCase(ArchiverTestCaseBase): def test_basic_functionality(self): # Initialize test folder self.create_test_files() - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) # Setup files for the first snapshot self.create_regular_file('file_unchanged', size=128) @@ -1908,17 +3319,19 @@ class DiffArchiverTestCase(ArchiverTestCaseBase): self.create_regular_file('file_replaced', size=1024) os.mkdir('input/dir_replaced_with_file') os.chmod('input/dir_replaced_with_file', stat.S_IFDIR | 0o755) - os.mkdir('input/dir_replaced_with_link') os.mkdir('input/dir_removed') - os.symlink('input/dir_replaced_with_file', 'input/link_changed') - os.symlink('input/file_unchanged', 'input/link_removed') - os.symlink('input/file_removed2', 'input/link_target_removed') - os.symlink('input/empty', 'input/link_target_contents_changed') - os.symlink('input/empty', 'input/link_replaced_by_file') - os.link('input/empty', 'input/hardlink_contents_changed') - os.link('input/file_removed', 'input/hardlink_removed') - os.link('input/file_removed2', 'input/hardlink_target_removed') - os.link('input/file_replaced', 'input/hardlink_target_replaced') + if are_symlinks_supported(): + os.mkdir('input/dir_replaced_with_link') + os.symlink('input/dir_replaced_with_file', 'input/link_changed') + os.symlink('input/file_unchanged', 'input/link_removed') + os.symlink('input/file_removed2', 'input/link_target_removed') + os.symlink('input/empty', 'input/link_target_contents_changed') + os.symlink('input/empty', 'input/link_replaced_by_file') + if are_hardlinks_supported(): + os.link('input/empty', 'input/hardlink_contents_changed') + os.link('input/file_removed', 'input/hardlink_removed') + os.link('input/file_removed2', 'input/hardlink_target_removed') + os.link('input/file_replaced', 'input/hardlink_target_replaced') # Create the first snapshot self.cmd('create', self.repository_location + '::test0', 'input') @@ -1934,16 +3347,18 @@ class DiffArchiverTestCase(ArchiverTestCaseBase): os.chmod('input/dir_replaced_with_file', stat.S_IFREG | 0o755) os.mkdir('input/dir_added') os.rmdir('input/dir_removed') - os.rmdir('input/dir_replaced_with_link') - os.symlink('input/dir_added', 'input/dir_replaced_with_link') - os.unlink('input/link_changed') - os.symlink('input/dir_added', 'input/link_changed') - os.symlink('input/dir_added', 'input/link_added') - os.unlink('input/link_removed') - os.unlink('input/link_replaced_by_file') - self.create_regular_file('link_replaced_by_file', size=16384) - os.unlink('input/hardlink_removed') - os.link('input/file_added', 'input/hardlink_added') + if are_symlinks_supported(): + os.rmdir('input/dir_replaced_with_link') + os.symlink('input/dir_added', 'input/dir_replaced_with_link') + os.unlink('input/link_changed') + os.symlink('input/dir_added', 'input/link_changed') + os.symlink('input/dir_added', 'input/link_added') + os.unlink('input/link_replaced_by_file') + self.create_regular_file('link_replaced_by_file', size=16384) + os.unlink('input/link_removed') + if are_hardlinks_supported(): + os.unlink('input/hardlink_removed') + os.link('input/file_added', 'input/hardlink_added') with open('input/empty', 'ab') as fd: fd.write(b'appended_data') @@ -1952,64 +3367,74 @@ class DiffArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::test1a', 'input') self.cmd('create', '--chunker-params', '16,18,17,4095', self.repository_location + '::test1b', 'input') - def do_asserts(output, archive): + def do_asserts(output, can_compare_ids): # File contents changed (deleted and replaced with a new file) - assert 'B input/file_replaced' in output + change = 'B' if can_compare_ids else '{:<19}'.format('modified') + assert '{} input/file_replaced'.format(change) in output # File unchanged assert 'input/file_unchanged' not in output # Directory replaced with a regular file - assert '[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file' in output + if 'BORG_TESTS_IGNORE_MODES' not in os.environ: + assert '[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file' in output # Basic directory cases assert 'added directory input/dir_added' in output assert 'removed directory input/dir_removed' in output - # Basic symlink cases - assert 'changed link input/link_changed' in output - assert 'added link input/link_added' in output - assert 'removed link input/link_removed' in output + if are_symlinks_supported(): + # Basic symlink cases + assert 'changed link input/link_changed' in output + assert 'added link input/link_added' in output + assert 'removed link input/link_removed' in output - # Symlink replacing or being replaced - assert '] input/dir_replaced_with_link' in output - assert '] input/link_replaced_by_file' in output + # Symlink replacing or being replaced + assert '] input/dir_replaced_with_link' in output + assert '] input/link_replaced_by_file' in output - # Symlink target removed. Should not affect the symlink at all. - assert 'input/link_target_removed' not in output + # Symlink target removed. Should not affect the symlink at all. + assert 'input/link_target_removed' not in output # The inode has two links and the file contents changed. Borg # should notice the changes in both links. However, the symlink # pointing to the file is not changed. - assert '0 B input/empty' in output - assert '0 B input/hardlink_contents_changed' in output - assert 'input/link_target_contents_changed' not in output + change = '0 B' if can_compare_ids else '{:<19}'.format('modified') + assert '{} input/empty'.format(change) in output + if are_hardlinks_supported(): + assert '{} input/hardlink_contents_changed'.format(change) in output + if are_symlinks_supported(): + assert 'input/link_target_contents_changed' not in output # Added a new file and a hard link to it. Both links to the same # inode should appear as separate files. assert 'added 2.05 kB input/file_added' in output - assert 'added 2.05 kB input/hardlink_added' in output + if are_hardlinks_supported(): + assert 'added 2.05 kB input/hardlink_added' in output # The inode has two links and both of them are deleted. They should # appear as two deleted files. assert 'removed 256 B input/file_removed' in output - assert 'removed 256 B input/hardlink_removed' in output + if are_hardlinks_supported(): + assert 'removed 256 B input/hardlink_removed' in output # Another link (marked previously as the source in borg) to the # same inode was removed. This should not change this link at all. - assert 'input/hardlink_target_removed' not in output + if are_hardlinks_supported(): + assert 'input/hardlink_target_removed' not in output # Another link (marked previously as the source in borg) to the # same inode was replaced with a new regular file. This should not # change this link at all. - assert 'input/hardlink_target_replaced' not in output + if are_hardlinks_supported(): + assert 'input/hardlink_target_replaced' not in output - do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a'), '1a') + do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a'), True) # We expect exit_code=1 due to the chunker params warning - do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), '1b') + do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), False) def test_sort_option(self): - self.cmd('init', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('a_file_removed', size=8) self.create_regular_file('f_file_removed', size=16) @@ -2057,16 +3482,23 @@ def test_get_args(): assert args.restrict_to_paths == ['/p1', '/p2'] # trying to cheat - try to execute different subcommand args = archiver.get_args(['borg', 'serve', '--restrict-to-path=/p1', '--restrict-to-path=/p2', ], - 'borg init /') + 'borg init --encryption=repokey /') + assert args.func == archiver.do_serve + + # Check that environment variables in the forced command don't cause issues. If the command + # were not forced, environment variables would be interpreted by the shell, but this does not + # happen for forced commands - we get the verbatim command line and need to deal with env vars. + args = archiver.get_args(['borg', 'serve', ], + 'BORG_HOSTNAME_IS_UNIQUE=yes borg serve --info') assert args.func == archiver.do_serve -def test_compare_chunk_contents(): +def test_chunk_content_equal(): def ccc(a, b): - chunks_a = [Chunk(data) for data in a] - chunks_b = [Chunk(data) for data in b] - compare1 = Archiver.compare_chunk_contents(iter(chunks_a), iter(chunks_b)) - compare2 = Archiver.compare_chunk_contents(iter(chunks_b), iter(chunks_a)) + chunks_a = [data for data in a] + chunks_b = [data for data in b] + compare1 = ItemDiff._chunk_content_equal(iter(chunks_a), iter(chunks_b)) + compare2 = ItemDiff._chunk_content_equal(iter(chunks_b), iter(chunks_a)) assert compare1 == compare2 return compare1 assert ccc([ @@ -2092,3 +3524,178 @@ def test_compare_chunk_contents(): ], [ b'1234', b'565' ]) + + +class TestBuildFilter: + @staticmethod + def peek_and_store_hardlink_masters(item, matched): + pass + + def test_basic(self): + matcher = PatternMatcher() + matcher.add([parse_pattern('included')], IECommand.Include) + filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0) + assert filter(Item(path='included')) + assert filter(Item(path='included/file')) + assert not filter(Item(path='something else')) + + def test_empty(self): + matcher = PatternMatcher(fallback=True) + filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0) + assert filter(Item(path='anything')) + + def test_strip_components(self): + matcher = PatternMatcher(fallback=True) + filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, strip_components=1) + assert not filter(Item(path='shallow')) + assert not filter(Item(path='shallow/')) # can this even happen? paths are normalized... + assert filter(Item(path='deep enough/file')) + assert filter(Item(path='something/dir/file')) + + +class TestCommonOptions: + @staticmethod + def define_common_options(add_common_option): + add_common_option('-h', '--help', action='help', help='show this help message and exit') + add_common_option('--critical', dest='log_level', help='foo', + action='store_const', const='critical', default='warning') + add_common_option('--error', dest='log_level', help='foo', + action='store_const', const='error', default='warning') + add_common_option('--append', dest='append', help='foo', + action='append', metavar='TOPIC', default=[]) + add_common_option('-p', '--progress', dest='progress', action='store_true', help='foo') + add_common_option('--lock-wait', dest='lock_wait', type=int, metavar='N', default=1, + help='(default: %(default)d).') + + @pytest.fixture + def basic_parser(self): + parser = argparse.ArgumentParser(prog='test', description='test parser', add_help=False) + parser.common_options = Archiver.CommonOptions(self.define_common_options, + suffix_precedence=('_level0', '_level1')) + return parser + + @pytest.fixture + def subparsers(self, basic_parser): + return basic_parser.add_subparsers(title='required arguments', metavar='') + + @pytest.fixture + def parser(self, basic_parser): + basic_parser.common_options.add_common_group(basic_parser, '_level0', provide_defaults=True) + return basic_parser + + @pytest.fixture + def common_parser(self, parser): + common_parser = argparse.ArgumentParser(add_help=False, prog='test') + parser.common_options.add_common_group(common_parser, '_level1') + return common_parser + + @pytest.fixture + def parse_vars_from_line(self, parser, subparsers, common_parser): + subparser = subparsers.add_parser('subcommand', parents=[common_parser], add_help=False, + description='foo', epilog='bar', help='baz', + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=1234) + subparser.add_argument('--append-only', dest='append_only', action='store_true') + + def parse_vars_from_line(*line): + print(line) + args = parser.parse_args(line) + parser.common_options.resolve(args) + return vars(args) + + return parse_vars_from_line + + def test_simple(self, parse_vars_from_line): + assert parse_vars_from_line('--error') == { + 'append': [], + 'lock_wait': 1, + 'log_level': 'error', + 'progress': False + } + + assert parse_vars_from_line('--error', 'subcommand', '--critical') == { + 'append': [], + 'lock_wait': 1, + 'log_level': 'critical', + 'progress': False, + 'append_only': False, + 'func': 1234, + } + + with pytest.raises(SystemExit): + parse_vars_from_line('--append-only', 'subcommand') + + assert parse_vars_from_line('--append=foo', '--append', 'bar', 'subcommand', '--append', 'baz') == { + 'append': ['foo', 'bar', 'baz'], + 'lock_wait': 1, + 'log_level': 'warning', + 'progress': False, + 'append_only': False, + 'func': 1234, + } + + @pytest.mark.parametrize('position', ('before', 'after', 'both')) + @pytest.mark.parametrize('flag,args_key,args_value', ( + ('-p', 'progress', True), + ('--lock-wait=3', 'lock_wait', 3), + )) + def test_flag_position_independence(self, parse_vars_from_line, position, flag, args_key, args_value): + line = [] + if position in ('before', 'both'): + line.append(flag) + line.append('subcommand') + if position in ('after', 'both'): + line.append(flag) + + result = { + 'append': [], + 'lock_wait': 1, + 'log_level': 'warning', + 'progress': False, + 'append_only': False, + 'func': 1234, + } + result[args_key] = args_value + + assert parse_vars_from_line(*line) == result + + +def test_parse_storage_quota(): + assert parse_storage_quota('50M') == 50 * 1000**2 + with pytest.raises(argparse.ArgumentTypeError): + parse_storage_quota('5M') + + +def get_all_parsers(): + """ + Return dict mapping command to parser. + """ + parser = Archiver(prog='borg').build_parser() + parsers = {} + + def discover_level(prefix, parser, Archiver): + choices = {} + for action in parser._actions: + if action.choices is not None and 'SubParsersAction' in str(action.__class__): + for cmd, parser in action.choices.items(): + choices[prefix + cmd] = parser + if prefix and not choices: + return + + for command, parser in sorted(choices.items()): + discover_level(command + " ", parser, Archiver) + parsers[command] = parser + + discover_level("", parser, Archiver) + return parsers + + +@pytest.mark.parametrize('command, parser', list(get_all_parsers().items())) +def test_help_formatting(command, parser): + if isinstance(parser.epilog, RstToTextLazy): + assert parser.epilog.rst + + +@pytest.mark.parametrize('topic, helptext', list(Archiver.helptext.items())) +def test_help_formatting_helptexts(topic, helptext): + assert str(rst_to_terminal(helptext)) diff --git a/src/borg/testsuite/attic.tar.gz b/src/borg/testsuite/attic.tar.gz new file mode 100644 index 00000000..5c0f8dfa Binary files /dev/null and b/src/borg/testsuite/attic.tar.gz differ diff --git a/src/borg/testsuite/benchmark.py b/src/borg/testsuite/benchmark.py index 9751bc1a..dcc71b01 100644 --- a/src/borg/testsuite/benchmark.py +++ b/src/borg/testsuite/benchmark.py @@ -14,13 +14,13 @@ from .archiver import changedir, cmd @pytest.yield_fixture -def repo_url(request, tmpdir): - os.environ['BORG_PASSPHRASE'] = '123456' - os.environ['BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'] = 'YES' - os.environ['BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'] = 'YES' - os.environ['BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK'] = 'yes' - os.environ['BORG_KEYS_DIR'] = str(tmpdir.join('keys')) - os.environ['BORG_CACHE_DIR'] = str(tmpdir.join('cache')) +def repo_url(request, tmpdir, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', '123456') + monkeypatch.setenv('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING', 'YES') + monkeypatch.setenv('BORG_DELETE_I_KNOW_WHAT_I_AM_DOING', 'YES') + monkeypatch.setenv('BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK', 'yes') + monkeypatch.setenv('BORG_KEYS_DIR', str(tmpdir.join('keys'))) + monkeypatch.setenv('BORG_CACHE_DIR', str(tmpdir.join('cache'))) yield str(tmpdir.join('repository')) tmpdir.remove(rec=1) @@ -40,9 +40,11 @@ def testdata(request, tmpdir_factory): # do not use a binary zero (\0) to avoid sparse detection def data(size): return b'0' * size - if data_type == 'random': + elif data_type == 'random': def data(size): return os.urandom(size) + else: + raise ValueError("data_type must be 'random' or 'zeros'.") for i in range(count): with open(str(p.join(str(i))), "wb") as f: f.write(data(size)) diff --git a/src/borg/testsuite/cache.py b/src/borg/testsuite/cache.py new file mode 100644 index 00000000..6cce0cb7 --- /dev/null +++ b/src/borg/testsuite/cache.py @@ -0,0 +1,279 @@ +import io +import os.path + +from msgpack import packb + +import pytest + +from .hashindex import H +from .key import TestKey +from ..archive import Statistics +from ..cache import AdHocCache +from ..compress import CompressionSpec +from ..crypto.key import RepoKey +from ..hashindex import ChunkIndex, CacheSynchronizer +from ..helpers import Manifest +from ..repository import Repository + + +class TestCacheSynchronizer: + @pytest.fixture + def index(self): + return ChunkIndex() + + @pytest.fixture + def sync(self, index): + return CacheSynchronizer(index) + + def test_no_chunks(self, index, sync): + data = packb({ + 'foo': 'bar', + 'baz': 1234, + 'bar': 5678, + 'user': 'chunks', + 'chunks': [] + }) + sync.feed(data) + assert not len(index) + + def test_simple(self, index, sync): + data = packb({ + 'foo': 'bar', + 'baz': 1234, + 'bar': 5678, + 'user': 'chunks', + 'chunks': [ + (H(1), 1, 2), + (H(2), 2, 3), + ] + }) + sync.feed(data) + assert len(index) == 2 + assert index[H(1)] == (1, 1, 2) + assert index[H(2)] == (1, 2, 3) + + def test_multiple(self, index, sync): + data = packb({ + 'foo': 'bar', + 'baz': 1234, + 'bar': 5678, + 'user': 'chunks', + 'chunks': [ + (H(1), 1, 2), + (H(2), 2, 3), + ] + }) + data += packb({ + 'xattrs': { + 'security.foo': 'bar', + 'chunks': '123456', + }, + 'stuff': [ + (1, 2, 3), + ] + }) + data += packb({ + 'xattrs': { + 'security.foo': 'bar', + 'chunks': '123456', + }, + 'chunks': [ + (H(1), 1, 2), + (H(2), 2, 3), + ], + 'stuff': [ + (1, 2, 3), + ] + }) + data += packb({ + 'chunks': [ + (H(3), 1, 2), + ], + }) + data += packb({ + 'chunks': [ + (H(1), 1, 2), + ], + }) + + part1 = data[:70] + part2 = data[70:120] + part3 = data[120:] + sync.feed(part1) + sync.feed(part2) + sync.feed(part3) + assert len(index) == 3 + assert index[H(1)] == (3, 1, 2) + assert index[H(2)] == (2, 2, 3) + assert index[H(3)] == (1, 1, 2) + + @pytest.mark.parametrize('elem,error', ( + ({1: 2}, 'Unexpected object: map'), + (bytes(213), [ + 'Unexpected bytes in chunks structure', # structure 2/3 + 'Incorrect key length']), # structure 3/3 + (1, 'Unexpected object: integer'), + (1.0, 'Unexpected object: double'), + (True, 'Unexpected object: true'), + (False, 'Unexpected object: false'), + (None, 'Unexpected object: nil'), + )) + @pytest.mark.parametrize('structure', ( + lambda elem: {'chunks': elem}, + lambda elem: {'chunks': [elem]}, + lambda elem: {'chunks': [(elem, 1, 2)]}, + )) + def test_corrupted(self, sync, structure, elem, error): + packed = packb(structure(elem)) + with pytest.raises(ValueError) as excinfo: + sync.feed(packed) + if isinstance(error, str): + error = [error] + possible_errors = ['cache_sync_feed failed: ' + error for error in error] + assert str(excinfo.value) in possible_errors + + @pytest.mark.parametrize('data,error', ( + # Incorrect tuple length + ({'chunks': [(bytes(32), 2, 3, 4)]}, 'Invalid chunk list entry length'), + ({'chunks': [(bytes(32), 2)]}, 'Invalid chunk list entry length'), + # Incorrect types + ({'chunks': [(1, 2, 3)]}, 'Unexpected object: integer'), + ({'chunks': [(1, bytes(32), 2)]}, 'Unexpected object: integer'), + ({'chunks': [(bytes(32), 1.0, 2)]}, 'Unexpected object: double'), + )) + def test_corrupted_ancillary(self, index, sync, data, error): + packed = packb(data) + with pytest.raises(ValueError) as excinfo: + sync.feed(packed) + assert str(excinfo.value) == 'cache_sync_feed failed: ' + error + + def make_index_with_refcount(self, refcount): + index_data = io.BytesIO() + index_data.write(b'BORG_IDX') + # num_entries + index_data.write((1).to_bytes(4, 'little')) + # num_buckets + index_data.write((1).to_bytes(4, 'little')) + # key_size + index_data.write((32).to_bytes(1, 'little')) + # value_size + index_data.write((3 * 4).to_bytes(1, 'little')) + + index_data.write(H(0)) + index_data.write(refcount.to_bytes(4, 'little')) + index_data.write((1234).to_bytes(4, 'little')) + index_data.write((5678).to_bytes(4, 'little')) + + index_data.seek(0) + index = ChunkIndex.read(index_data) + return index + + def test_corrupted_refcount(self): + index = self.make_index_with_refcount(ChunkIndex.MAX_VALUE + 1) + sync = CacheSynchronizer(index) + data = packb({ + 'chunks': [ + (H(0), 1, 2), + ] + }) + with pytest.raises(ValueError) as excinfo: + sync.feed(data) + assert str(excinfo.value) == 'cache_sync_feed failed: invalid reference count' + + def test_refcount_max_value(self): + index = self.make_index_with_refcount(ChunkIndex.MAX_VALUE) + sync = CacheSynchronizer(index) + data = packb({ + 'chunks': [ + (H(0), 1, 2), + ] + }) + sync.feed(data) + assert index[H(0)] == (ChunkIndex.MAX_VALUE, 1234, 5678) + + def test_refcount_one_below_max_value(self): + index = self.make_index_with_refcount(ChunkIndex.MAX_VALUE - 1) + sync = CacheSynchronizer(index) + data = packb({ + 'chunks': [ + (H(0), 1, 2), + ] + }) + sync.feed(data) + # Incremented to maximum + assert index[H(0)] == (ChunkIndex.MAX_VALUE, 1234, 5678) + sync.feed(data) + assert index[H(0)] == (ChunkIndex.MAX_VALUE, 1234, 5678) + + +class TestAdHocCache: + @pytest.yield_fixture + def repository(self, tmpdir): + self.repository_location = os.path.join(str(tmpdir), 'repository') + with Repository(self.repository_location, exclusive=True, create=True) as repository: + repository.put(H(1), b'1234') + repository.put(Manifest.MANIFEST_ID, b'5678') + yield repository + + @pytest.fixture + def key(self, repository, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') + key = RepoKey.create(repository, TestKey.MockArgs()) + key.compressor = CompressionSpec('none').compressor + return key + + @pytest.fixture + def manifest(self, repository, key): + Manifest(key, repository).write() + return Manifest.load(repository, key=key, operations=Manifest.NO_OPERATION_CHECK)[0] + + @pytest.fixture + def cache(self, repository, key, manifest): + return AdHocCache(repository, key, manifest) + + def test_does_not_contain_manifest(self, cache): + assert not cache.seen_chunk(Manifest.MANIFEST_ID) + + def test_does_not_delete_existing_chunks(self, repository, cache): + assert cache.seen_chunk(H(1)) == ChunkIndex.MAX_VALUE + cache.chunk_decref(H(1), Statistics()) + assert repository.get(H(1)) == b'1234' + + def test_does_not_overwrite(self, cache): + with pytest.raises(AssertionError): + cache.add_chunk(H(1), b'5678', Statistics(), overwrite=True) + + def test_seen_chunk_add_chunk_size(self, cache): + assert cache.add_chunk(H(1), b'5678', Statistics()) == (H(1), 4, 0) + + def test_deletes_chunks_during_lifetime(self, cache, repository): + """E.g. checkpoint archives""" + cache.add_chunk(H(5), b'1010', Statistics()) + assert cache.seen_chunk(H(5)) == 1 + cache.chunk_decref(H(5), Statistics()) + assert not cache.seen_chunk(H(5)) + with pytest.raises(Repository.ObjectNotFound): + repository.get(H(5)) + + def test_files_cache(self, cache): + assert cache.file_known_and_unchanged(bytes(32), None) is None + assert not cache.do_files + assert cache.files is None + + def test_txn(self, cache): + assert not cache._txn_active + cache.seen_chunk(H(5)) + assert cache._txn_active + assert cache.chunks + cache.rollback() + assert not cache._txn_active + assert not hasattr(cache, 'chunks') + + def test_incref_after_add_chunk(self, cache): + assert cache.add_chunk(H(3), b'5678', Statistics()) == (H(3), 4, 47) + assert cache.chunk_incref(H(3), Statistics()) == (H(3), 4, 47) + + def test_existing_incref_after_add_chunk(self, cache): + """This case occurs with part files, see Archive.chunk_file.""" + assert cache.add_chunk(H(1), b'5678', Statistics()) == (H(1), 4, 0) + assert cache.chunk_incref(H(1), Statistics()) == (H(1), 4, 0) diff --git a/src/borg/testsuite/checksums.py b/src/borg/testsuite/checksums.py new file mode 100644 index 00000000..5b0d9fb9 --- /dev/null +++ b/src/borg/testsuite/checksums.py @@ -0,0 +1,39 @@ +import os +import zlib +from binascii import unhexlify + +import pytest + +from ..algorithms import checksums +from ..helpers import bin_to_hex + +crc32_implementations = [checksums.crc32_slice_by_8] +if checksums.have_clmul: + crc32_implementations.append(checksums.crc32_clmul) + + +@pytest.mark.parametrize('implementation', crc32_implementations) +def test_crc32(implementation): + # This includes many critical values, like zero length, 3/4/5, 6/7/8 and so on which are near and on + # alignment boundaries. This is of course just a sanity check ie. "did it compile all right?". + data = os.urandom(256) + initial_crc = 0x12345678 + for i in range(0, 256): + d = data[:i] + assert zlib.crc32(d, initial_crc) == implementation(d, initial_crc) + + +def test_xxh64(): + assert bin_to_hex(checksums.xxh64(b'test', 123)) == '2b81b9401bef86cf' + assert bin_to_hex(checksums.xxh64(b'test')) == '4fdcca5ddb678139' + assert bin_to_hex(checksums.xxh64(unhexlify( + '6f663f01c118abdea553373d5eae44e7dac3b6829b46b9bbeff202b6c592c22d724' + 'fb3d25a347cca6c5b8f20d567e4bb04b9cfa85d17f691590f9a9d32e8ccc9102e9d' + 'cf8a7e6716280cd642ce48d03fdf114c9f57c20d9472bb0f81c147645e6fa3d331'))) == '35d5d2f545d9511a' + + +def test_streaming_xxh64(): + hasher = checksums.StreamingXXH64(123) + hasher.update(b'te') + hasher.update(b'st') + assert bin_to_hex(hasher.digest()) == hasher.hexdigest() == '2b81b9401bef86cf' diff --git a/src/borg/testsuite/compress.py b/src/borg/testsuite/compress.py index 1a435358..f881ad2c 100644 --- a/src/borg/testsuite/compress.py +++ b/src/borg/testsuite/compress.py @@ -1,3 +1,4 @@ +import os import zlib try: import lzma @@ -6,18 +7,18 @@ except ImportError: import pytest -from ..compress import get_compressor, Compressor, CNONE, ZLIB, LZ4 +from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, Auto buffer = bytes(2**16) data = b'fooooooooobaaaaaaaar' * 10 -params = dict(name='zlib', level=6, buffer=buffer) +params = dict(name='zlib', level=6) def test_get_compressor(): c = get_compressor(name='none') assert isinstance(c, CNONE) - c = get_compressor(name='lz4', buffer=buffer) + c = get_compressor(name='lz4') assert isinstance(c, LZ4) c = get_compressor(name='zlib') assert isinstance(c, ZLIB) @@ -35,13 +36,21 @@ def test_cnull(): def test_lz4(): - c = get_compressor(name='lz4', buffer=buffer) + c = get_compressor(name='lz4') cdata = c.compress(data) assert len(cdata) < len(data) assert data == c.decompress(cdata) assert data == Compressor(**params).decompress(cdata) # autodetect +def test_lz4_buffer_allocation(): + # test with a rather huge data object to see if buffer allocation / resizing works + data = os.urandom(50 * 2**20) # 50MiB incompressible data + c = get_compressor(name='lz4') + cdata = c.compress(data) + assert data == c.decompress(cdata) + + def test_zlib(): c = get_compressor(name='zlib') cdata = c.compress(data) @@ -83,18 +92,69 @@ def test_zlib_compat(): def test_compressor(): params_list = [ - dict(name='none', buffer=buffer), - dict(name='lz4', buffer=buffer), - dict(name='zlib', level=0, buffer=buffer), - dict(name='zlib', level=6, buffer=buffer), - dict(name='zlib', level=9, buffer=buffer), + dict(name='none'), + dict(name='lz4'), + dict(name='zlib', level=0), + dict(name='zlib', level=6), + dict(name='zlib', level=9), ] if lzma: params_list += [ - dict(name='lzma', level=0, buffer=buffer), - dict(name='lzma', level=6, buffer=buffer), + dict(name='lzma', level=0), + dict(name='lzma', level=6), # we do not test lzma on level 9 because of the huge memory needs ] for params in params_list: c = Compressor(**params) assert data == c.decompress(c.compress(data)) + + +def test_auto(): + compressor_auto_zlib = CompressionSpec('auto,zlib,9').compressor + compressor_lz4 = CompressionSpec('lz4').compressor + compressor_zlib = CompressionSpec('zlib,9').compressor + data = bytes(500) + compressed_auto_zlib = compressor_auto_zlib.compress(data) + compressed_lz4 = compressor_lz4.compress(data) + compressed_zlib = compressor_zlib.compress(data) + ratio = len(compressed_zlib) / len(compressed_lz4) + assert Compressor.detect(compressed_auto_zlib) == ZLIB if ratio < 0.99 else LZ4 + + data = b'\x00\xb8\xa3\xa2-O\xe1i\xb6\x12\x03\xc21\xf3\x8a\xf78\\\x01\xa5b\x07\x95\xbeE\xf8\xa3\x9ahm\xb1~' + compressed = compressor_auto_zlib.compress(data) + assert Compressor.detect(compressed) == CNONE + + +def test_compression_specs(): + with pytest.raises(ValueError): + CompressionSpec('') + + assert isinstance(CompressionSpec('none').compressor, CNONE) + assert isinstance(CompressionSpec('lz4').compressor, LZ4) + + zlib = CompressionSpec('zlib').compressor + assert isinstance(zlib, ZLIB) + assert zlib.level == 6 + zlib = CompressionSpec('zlib,0').compressor + assert isinstance(zlib, ZLIB) + assert zlib.level == 0 + zlib = CompressionSpec('zlib,9').compressor + assert isinstance(zlib, ZLIB) + assert zlib.level == 9 + with pytest.raises(ValueError): + CompressionSpec('zlib,9,invalid') + + lzma = CompressionSpec('lzma').compressor + assert isinstance(lzma, LZMA) + assert lzma.level == 6 + lzma = CompressionSpec('lzma,0').compressor + assert isinstance(lzma, LZMA) + assert lzma.level == 0 + lzma = CompressionSpec('lzma,9').compressor + assert isinstance(lzma, LZMA) + assert lzma.level == 9 + + with pytest.raises(ValueError): + CompressionSpec('lzma,9,invalid') + with pytest.raises(ValueError): + CompressionSpec('invalid') diff --git a/src/borg/testsuite/crypto.py b/src/borg/testsuite/crypto.py index b79a5d83..407a4e9a 100644 --- a/src/borg/testsuite/crypto.py +++ b/src/borg/testsuite/crypto.py @@ -1,7 +1,9 @@ from binascii import hexlify, unhexlify -from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256 -from ..crypto import increment_iv, bytes16_to_int, int_to_bytes16 +from ..crypto.low_level import AES256_CTR_HMAC_SHA256, AES256_OCB, CHACHA20_POLY1305, UNENCRYPTED, \ + IntegrityError, blake2b_256, hmac_sha256, openssl10 +from ..crypto.low_level import bytes_to_long, bytes_to_int, long_to_bytes +from ..crypto.low_level import hkdf_hmac_sha512 from . import BaseTestCase @@ -18,42 +20,162 @@ class CryptoTestCase(BaseTestCase): self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1) self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1') - def test_bytes16_to_int(self): - self.assert_equal(bytes16_to_int(b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'), 1) - self.assert_equal(int_to_bytes16(1), b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1') - self.assert_equal(bytes16_to_int(b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0'), 2 ** 64) - self.assert_equal(int_to_bytes16(2 ** 64), b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0') + def test_UNENCRYPTED(self): + iv = b'' # any IV is ok, it just must be set and not None + data = b'data' + header = b'header' + cs = UNENCRYPTED(None, None, iv, header_len=6) + envelope = cs.encrypt(data, header=header) + self.assert_equal(envelope, header + data) + got_data = cs.decrypt(envelope) + self.assert_equal(got_data, data) - def test_increment_iv(self): - iv0 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0' - iv1 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1' - iv2 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2' - self.assert_equal(increment_iv(iv0, 0), iv0) - self.assert_equal(increment_iv(iv0, 1), iv1) - self.assert_equal(increment_iv(iv0, 2), iv2) - iva = b'\0\0\0\0\0\0\0\0\xff\xff\xff\xff\xff\xff\xff\xff' - ivb = b'\0\0\0\0\0\0\0\1\x00\x00\x00\x00\x00\x00\x00\x00' - ivc = b'\0\0\0\0\0\0\0\1\x00\x00\x00\x00\x00\x00\x00\x01' - self.assert_equal(increment_iv(iva, 0), iva) - self.assert_equal(increment_iv(iva, 1), ivb) - self.assert_equal(increment_iv(iva, 2), ivc) - self.assert_equal(increment_iv(iv0, 2**64), ivb) - - def test_aes(self): - key = b'X' * 32 + def test_AES256_CTR_HMAC_SHA256(self): + # this tests the layout as in attic / borg < 1.2 (1 type byte, no aad) + mac_key = b'Y' * 32 + enc_key = b'X' * 32 + iv = 0 data = b'foo' * 10 - # encrypt - aes = AES(is_encrypt=True, key=key) - self.assert_equal(bytes_to_long(aes.iv, 8), 0) - cdata = aes.encrypt(data) + header = b'\x42' + # encrypt-then-mac + cs = AES256_CTR_HMAC_SHA256(mac_key, enc_key, iv, header_len=1, aad_offset=1) + hdr_mac_iv_cdata = cs.encrypt(data, header=header) + hdr = hdr_mac_iv_cdata[0:1] + mac = hdr_mac_iv_cdata[1:33] + iv = hdr_mac_iv_cdata[33:41] + cdata = hdr_mac_iv_cdata[41:] + self.assert_equal(hexlify(hdr), b'42') + self.assert_equal(hexlify(mac), b'af90b488b0cc4a8f768fe2d6814fa65aec66b148135e54f7d4d29a27f22f57a8') + self.assert_equal(hexlify(iv), b'0000000000000000') self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466') - self.assert_equal(bytes_to_long(aes.iv, 8), 2) - # decrypt - aes = AES(is_encrypt=False, key=key) - self.assert_equal(bytes_to_long(aes.iv, 8), 0) - pdata = aes.decrypt(cdata) + self.assert_equal(cs.next_iv(), 2) + # auth-then-decrypt + cs = AES256_CTR_HMAC_SHA256(mac_key, enc_key, header_len=len(header), aad_offset=1) + pdata = cs.decrypt(hdr_mac_iv_cdata) self.assert_equal(data, pdata) - self.assert_equal(bytes_to_long(aes.iv, 8), 2) + self.assert_equal(cs.next_iv(), 2) + # auth-failure due to corruption (corrupted data) + cs = AES256_CTR_HMAC_SHA256(mac_key, enc_key, header_len=len(header), aad_offset=1) + hdr_mac_iv_cdata_corrupted = hdr_mac_iv_cdata[:41] + b'\0' + hdr_mac_iv_cdata[42:] + self.assert_raises(IntegrityError, + lambda: cs.decrypt(hdr_mac_iv_cdata_corrupted)) + + def test_AES256_CTR_HMAC_SHA256_aad(self): + mac_key = b'Y' * 32 + enc_key = b'X' * 32 + iv = 0 + data = b'foo' * 10 + header = b'\x12\x34\x56' + # encrypt-then-mac + cs = AES256_CTR_HMAC_SHA256(mac_key, enc_key, iv, header_len=3, aad_offset=1) + hdr_mac_iv_cdata = cs.encrypt(data, header=header) + hdr = hdr_mac_iv_cdata[0:3] + mac = hdr_mac_iv_cdata[3:35] + iv = hdr_mac_iv_cdata[35:43] + cdata = hdr_mac_iv_cdata[43:] + self.assert_equal(hexlify(hdr), b'123456') + self.assert_equal(hexlify(mac), b'7659a915d9927072ef130258052351a17ef882692893c3850dd798c03d2dd138') + self.assert_equal(hexlify(iv), b'0000000000000000') + self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466') + self.assert_equal(cs.next_iv(), 2) + # auth-then-decrypt + cs = AES256_CTR_HMAC_SHA256(mac_key, enc_key, header_len=len(header), aad_offset=1) + pdata = cs.decrypt(hdr_mac_iv_cdata) + self.assert_equal(data, pdata) + self.assert_equal(cs.next_iv(), 2) + # auth-failure due to corruption (corrupted aad) + cs = AES256_CTR_HMAC_SHA256(mac_key, enc_key, header_len=len(header), aad_offset=1) + hdr_mac_iv_cdata_corrupted = hdr_mac_iv_cdata[:1] + b'\0' + hdr_mac_iv_cdata[2:] + self.assert_raises(IntegrityError, + lambda: cs.decrypt(hdr_mac_iv_cdata_corrupted)) + + def test_AE(self): + # used in legacy-like layout (1 type byte, no aad) + mac_key = None + enc_key = b'X' * 32 + iv = 0 + data = b'foo' * 10 + header = b'\x23' + tests = [ + # (ciphersuite class, exp_mac, exp_cdata) + ] + if not openssl10: + tests += [ + (AES256_OCB, + b'b6909c23c9aaebd9abbe1ff42097652d', + b'877ce46d2f62dee54699cebc3ba41d9ab613f7c486778c1b3636664b1493', ), + (CHACHA20_POLY1305, + b'fd08594796e0706cde1e8b461e3e0555', + b'a093e4b0387526f085d3c40cca84a35230a5c0dd766453b77ba38bcff775', ) + ] + for cs_cls, exp_mac, exp_cdata in tests: + # print(repr(cs_cls)) + # encrypt/mac + cs = cs_cls(mac_key, enc_key, iv, header_len=1, aad_offset=1) + hdr_mac_iv_cdata = cs.encrypt(data, header=header) + hdr = hdr_mac_iv_cdata[0:1] + mac = hdr_mac_iv_cdata[1:17] + iv = hdr_mac_iv_cdata[17:29] + cdata = hdr_mac_iv_cdata[29:] + self.assert_equal(hexlify(hdr), b'23') + self.assert_equal(hexlify(mac), exp_mac) + self.assert_equal(hexlify(iv), b'000000000000000000000000') + self.assert_equal(hexlify(cdata), exp_cdata) + self.assert_equal(cs.next_iv(), 1) + # auth/decrypt + cs = cs_cls(mac_key, enc_key, header_len=len(header), aad_offset=1) + pdata = cs.decrypt(hdr_mac_iv_cdata) + self.assert_equal(data, pdata) + self.assert_equal(cs.next_iv(), 1) + # auth-failure due to corruption (corrupted data) + cs = cs_cls(mac_key, enc_key, header_len=len(header), aad_offset=1) + hdr_mac_iv_cdata_corrupted = hdr_mac_iv_cdata[:29] + b'\0' + hdr_mac_iv_cdata[30:] + self.assert_raises(IntegrityError, + lambda: cs.decrypt(hdr_mac_iv_cdata_corrupted)) + + def test_AEAD(self): + # test with aad + mac_key = None + enc_key = b'X' * 32 + iv = 0 + data = b'foo' * 10 + header = b'\x12\x34\x56' + tests = [ + # (ciphersuite class, exp_mac, exp_cdata) + ] + if not openssl10: + tests += [ + (AES256_OCB, + b'f2748c412af1c7ead81863a18c2c1893', + b'877ce46d2f62dee54699cebc3ba41d9ab613f7c486778c1b3636664b1493', ), + (CHACHA20_POLY1305, + b'b7e7c9a79f2404e14f9aad156bf091dd', + b'a093e4b0387526f085d3c40cca84a35230a5c0dd766453b77ba38bcff775', ) + ] + for cs_cls, exp_mac, exp_cdata in tests: + # print(repr(cs_cls)) + # encrypt/mac + cs = cs_cls(mac_key, enc_key, iv, header_len=3, aad_offset=1) + hdr_mac_iv_cdata = cs.encrypt(data, header=header) + hdr = hdr_mac_iv_cdata[0:3] + mac = hdr_mac_iv_cdata[3:19] + iv = hdr_mac_iv_cdata[19:31] + cdata = hdr_mac_iv_cdata[31:] + self.assert_equal(hexlify(hdr), b'123456') + self.assert_equal(hexlify(mac), exp_mac) + self.assert_equal(hexlify(iv), b'000000000000000000000000') + self.assert_equal(hexlify(cdata), exp_cdata) + self.assert_equal(cs.next_iv(), 1) + # auth/decrypt + cs = cs_cls(mac_key, enc_key, header_len=len(header), aad_offset=1) + pdata = cs.decrypt(hdr_mac_iv_cdata) + self.assert_equal(data, pdata) + self.assert_equal(cs.next_iv(), 1) + # auth-failure due to corruption (corrupted aad) + cs = cs_cls(mac_key, enc_key, header_len=len(header), aad_offset=1) + hdr_mac_iv_cdata_corrupted = hdr_mac_iv_cdata[:1] + b'\0' + hdr_mac_iv_cdata[2:] + self.assert_raises(IntegrityError, + lambda: cs.decrypt(hdr_mac_iv_cdata_corrupted)) def test_hmac_sha256(self): # RFC 4231 test vectors @@ -80,3 +202,71 @@ class CryptoTestCase(BaseTestCase): hmac = unhexlify('82558a389a443c0ea4cc819899f2083a' '85f0faa3e578f8077a2e3ff46729665b') assert hmac_sha256(key, data) == hmac + + def test_blake2b_256(self): + # In BLAKE2 the output length actually is part of the hashes personality - it is *not* simple truncation like in + # the SHA-2 family. Therefore we need to generate test vectors ourselves (as is true for most applications that + # are not precisely vanilla BLAKE2b-512 or BLAKE2s-256). + # + # Obtained via "b2sum" utility from the official BLAKE2 repository. It calculates the exact hash of a file's + # contents, no extras (like length) included. + assert blake2b_256(b'', b'abc') == unhexlify('bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319') + assert blake2b_256(b'a', b'bc') == unhexlify('bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319') + assert blake2b_256(b'ab', b'c') == unhexlify('bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319') + assert blake2b_256(b'abc', b'') == unhexlify('bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319') + + key = unhexlify('e944973af2256d4d670c12dd75304c319f58f4e40df6fb18ef996cb47e063676') + data = memoryview(b'1234567890' * 100) + assert blake2b_256(key, data) == unhexlify('97ede832378531dd0f4c668685d166e797da27b47d8cd441e885b60abd5e0cb2') + + # These test vectors come from https://www.kullo.net/blog/hkdf-sha-512-test-vectors/ + # who claims to have verified these against independent Python and C++ implementations. + + def test_hkdf_hmac_sha512(self): + ikm = b'\x0b' * 22 + salt = bytes.fromhex('000102030405060708090a0b0c') + info = bytes.fromhex('f0f1f2f3f4f5f6f7f8f9') + l = 42 + + okm = hkdf_hmac_sha512(ikm, salt, info, l) + assert okm == bytes.fromhex('832390086cda71fb47625bb5ceb168e4c8e26a1a16ed34d9fc7fe92c1481579338da362cb8d9f925d7cb') + + def test_hkdf_hmac_sha512_2(self): + ikm = bytes.fromhex('000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627' + '28292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f') + salt = bytes.fromhex('606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868' + '788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeaf') + info = bytes.fromhex('b0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7' + 'd8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff') + l = 82 + + okm = hkdf_hmac_sha512(ikm, salt, info, l) + assert okm == bytes.fromhex('ce6c97192805b346e6161e821ed165673b84f400a2b514b2fe23d84cd189ddf1b695b48cbd1c838844' + '1137b3ce28f16aa64ba33ba466b24df6cfcb021ecff235f6a2056ce3af1de44d572097a8505d9e7a93') + + def test_hkdf_hmac_sha512_3(self): + ikm = bytes.fromhex('0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b') + salt = None + info = b'' + l = 42 + + okm = hkdf_hmac_sha512(ikm, salt, info, l) + assert okm == bytes.fromhex('f5fa02b18298a72a8c23898a8703472c6eb179dc204c03425c970e3b164bf90fff22d04836d0e2343bac') + + def test_hkdf_hmac_sha512_4(self): + ikm = bytes.fromhex('0b0b0b0b0b0b0b0b0b0b0b') + salt = bytes.fromhex('000102030405060708090a0b0c') + info = bytes.fromhex('f0f1f2f3f4f5f6f7f8f9') + l = 42 + + okm = hkdf_hmac_sha512(ikm, salt, info, l) + assert okm == bytes.fromhex('7413e8997e020610fbf6823f2ce14bff01875db1ca55f68cfcf3954dc8aff53559bd5e3028b080f7c068') + + def test_hkdf_hmac_sha512_5(self): + ikm = bytes.fromhex('0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c') + salt = None + info = b'' + l = 42 + + okm = hkdf_hmac_sha512(ikm, salt, info, l) + assert okm == bytes.fromhex('1407d46013d98bc6decefcfee55f0f90b0c7f63d68eb1a80eaf07e953cfc0a3a5240a155d6e4daa965bb') diff --git a/src/borg/testsuite/file_integrity.py b/src/borg/testsuite/file_integrity.py new file mode 100644 index 00000000..6dee247a --- /dev/null +++ b/src/borg/testsuite/file_integrity.py @@ -0,0 +1,140 @@ + +import pytest + +from ..crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError + + +class TestReadIntegrityFile: + def test_no_integrity(self, tmpdir): + protected_file = tmpdir.join('file') + protected_file.write('1234') + assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file)) is None + + def test_truncated_integrity(self, tmpdir): + protected_file = tmpdir.join('file') + protected_file.write('1234') + tmpdir.join('file.integrity').write('') + with pytest.raises(FileIntegrityError): + DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file)) + + def test_unknown_algorithm(self, tmpdir): + protected_file = tmpdir.join('file') + protected_file.write('1234') + tmpdir.join('file.integrity').write('{"algorithm": "HMAC_SERIOUSHASH", "digests": "1234"}') + assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file)) is None + + @pytest.mark.parametrize('json', ( + '{"ALGORITHM": "HMAC_SERIOUSHASH", "digests": "1234"}', + '[]', + '1234.5', + '"A string"', + 'Invalid JSON', + )) + def test_malformed(self, tmpdir, json): + protected_file = tmpdir.join('file') + protected_file.write('1234') + tmpdir.join('file.integrity').write(json) + with pytest.raises(FileIntegrityError): + DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file)) + + +class TestDetachedIntegrityCheckedFile: + @pytest.fixture + def integrity_protected_file(self, tmpdir): + path = str(tmpdir.join('file')) + with DetachedIntegrityCheckedFile(path, write=True) as fd: + fd.write(b'foo and bar') + return path + + def test_simple(self, tmpdir, integrity_protected_file): + assert tmpdir.join('file').check(file=True) + assert tmpdir.join('file.integrity').check(file=True) + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: + assert fd.read() == b'foo and bar' + + def test_corrupted_file(self, integrity_protected_file): + with open(integrity_protected_file, 'ab') as fd: + fd.write(b' extra data') + with pytest.raises(FileIntegrityError): + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: + assert fd.read() == b'foo and bar extra data' + + def test_corrupted_file_partial_read(self, integrity_protected_file): + with open(integrity_protected_file, 'ab') as fd: + fd.write(b' extra data') + with pytest.raises(FileIntegrityError): + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: + data = b'foo and bar' + assert fd.read(len(data)) == data + + @pytest.mark.parametrize('new_name', ( + 'different_file', + 'different_file.different_ext', + )) + def test_renamed_file(self, tmpdir, integrity_protected_file, new_name): + new_path = tmpdir.join(new_name) + tmpdir.join('file').move(new_path) + tmpdir.join('file.integrity').move(new_path + '.integrity') + with pytest.raises(FileIntegrityError): + with DetachedIntegrityCheckedFile(str(new_path), write=False) as fd: + assert fd.read() == b'foo and bar' + + def test_moved_file(self, tmpdir, integrity_protected_file): + new_dir = tmpdir.mkdir('another_directory') + tmpdir.join('file').move(new_dir.join('file')) + tmpdir.join('file.integrity').move(new_dir.join('file.integrity')) + new_path = str(new_dir.join('file')) + with DetachedIntegrityCheckedFile(new_path, write=False) as fd: + assert fd.read() == b'foo and bar' + + def test_no_integrity(self, tmpdir, integrity_protected_file): + tmpdir.join('file.integrity').remove() + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: + assert fd.read() == b'foo and bar' + + +class TestDetachedIntegrityCheckedFileParts: + @pytest.fixture + def integrity_protected_file(self, tmpdir): + path = str(tmpdir.join('file')) + with DetachedIntegrityCheckedFile(path, write=True) as fd: + fd.write(b'foo and bar') + fd.hash_part('foopart') + fd.write(b' other data') + return path + + def test_simple(self, integrity_protected_file): + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: + data1 = b'foo and bar' + assert fd.read(len(data1)) == data1 + fd.hash_part('foopart') + assert fd.read() == b' other data' + + def test_wrong_part_name(self, integrity_protected_file): + with pytest.raises(FileIntegrityError): + # Because some hash_part failed, the final digest will fail as well - again - even if we catch + # the failing hash_part. This is intentional: (1) it makes the code simpler (2) it's a good fail-safe + # against overly broad exception handling. + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: + data1 = b'foo and bar' + assert fd.read(len(data1)) == data1 + with pytest.raises(FileIntegrityError): + # This specific bit raises it directly + fd.hash_part('barpart') + # Still explodes in the end. + + @pytest.mark.parametrize('partial_read', (False, True)) + def test_part_independence(self, integrity_protected_file, partial_read): + with open(integrity_protected_file, 'ab') as fd: + fd.write(b'some extra stuff that does not belong') + with pytest.raises(FileIntegrityError): + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: + data1 = b'foo and bar' + try: + assert fd.read(len(data1)) == data1 + fd.hash_part('foopart') + except FileIntegrityError: + assert False, 'This part must not raise, since this part is still valid.' + if not partial_read: + fd.read() + # But overall it explodes with the final digest. Neat, eh? diff --git a/src/borg/testsuite/hashindex.py b/src/borg/testsuite/hashindex.py index 000dfe4c..0b4b3bc5 100644 --- a/src/borg/testsuite/hashindex.py +++ b/src/borg/testsuite/hashindex.py @@ -1,11 +1,13 @@ import base64 import hashlib +import io import os import tempfile import zlib -from ..hashindex import NSIndex, ChunkIndex +from ..hashindex import NSIndex, ChunkIndex, ChunkIndexEntry from .. import hashindex +from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError from . import BaseTestCase # Note: these tests are part of the self test, do not use or import py.test functionality here. @@ -17,6 +19,11 @@ def H(x): return bytes('%-0.32d' % x, 'ascii') +def H2(x): + # like H(x), but with pseudo-random distribution of the output value + return hashlib.sha256(H(x)).digest() + + class HashIndexTestCase(BaseTestCase): def _generic_test(self, cls, make_value, sha): @@ -24,19 +31,28 @@ class HashIndexTestCase(BaseTestCase): self.assert_equal(len(idx), 0) # Test set for x in range(100): - idx[bytes('%-32d' % x, 'ascii')] = make_value(x) + idx[H(x)] = make_value(x) self.assert_equal(len(idx), 100) for x in range(100): - self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x)) + self.assert_equal(idx[H(x)], make_value(x)) # Test update for x in range(100): - idx[bytes('%-32d' % x, 'ascii')] = make_value(x * 2) + idx[H(x)] = make_value(x * 2) self.assert_equal(len(idx), 100) for x in range(100): - self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2)) + self.assert_equal(idx[H(x)], make_value(x * 2)) # Test delete for x in range(50): - del idx[bytes('%-32d' % x, 'ascii')] + del idx[H(x)] + # Test some keys still in there + for x in range(50, 100): + assert H(x) in idx + # Test some keys not there any more + for x in range(50): + assert H(x) not in idx + # Test delete non-existing key + for x in range(50): + self.assert_raises(KeyError, idx.__delitem__, H(x)) self.assert_equal(len(idx), 50) idx_name = tempfile.NamedTemporaryFile() idx.write(idx_name.name) @@ -48,7 +64,7 @@ class HashIndexTestCase(BaseTestCase): idx = cls.read(idx_name.name) self.assert_equal(len(idx), 50) for x in range(50, 100): - self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2)) + self.assert_equal(idx[H(x)], make_value(x * 2)) idx.clear() self.assert_equal(len(idx), 0) idx.write(idx_name.name) @@ -57,11 +73,11 @@ class HashIndexTestCase(BaseTestCase): def test_nsindex(self): self._generic_test(NSIndex, lambda x: (x, x), - '80fba5b40f8cf12f1486f1ba33c9d852fb2b41a5b5961d3b9d1228cf2aa9c4c9') + '85f72b036c692c8266e4f51ccf0cff2147204282b5e316ae508d30a448d88fef') def test_chunkindex(self): self._generic_test(ChunkIndex, lambda x: (x, x, x), - '1d71865e72e3c3af18d3c7216b6fa7b014695eaa3ed7f14cf9cd02fba75d1c95') + 'c83fdf33755fc37879285f2ecfc5d1f63b97577494902126b6fb6f3e4d852488') def test_resize(self): n = 2000 # Must be >= MIN_BUCKETS @@ -71,11 +87,11 @@ class HashIndexTestCase(BaseTestCase): initial_size = os.path.getsize(idx_name.name) self.assert_equal(len(idx), 0) for x in range(n): - idx[bytes('%-32d' % x, 'ascii')] = x, x + idx[H(x)] = x, x idx.write(idx_name.name) self.assert_true(initial_size < os.path.getsize(idx_name.name)) for x in range(n): - del idx[bytes('%-32d' % x, 'ascii')] + del idx[H(x)] self.assert_equal(len(idx), 0) idx.write(idx_name.name) self.assert_equal(initial_size, os.path.getsize(idx_name.name)) @@ -83,9 +99,12 @@ class HashIndexTestCase(BaseTestCase): def test_iteritems(self): idx = NSIndex() for x in range(100): - idx[bytes('%-0.32d' % x, 'ascii')] = x, x - all = list(idx.iteritems()) + idx[H(x)] = x, x + iterator = idx.iteritems() + all = list(iterator) self.assert_equal(len(all), 100) + # iterator is already exhausted by list(): + self.assert_raises(StopIteration, next, iterator) second_half = list(idx.iteritems(marker=all[49][0])) self.assert_equal(len(second_half), 50) self.assert_equal(second_half, all[50:]) @@ -122,19 +141,65 @@ class HashIndexTestCase(BaseTestCase): assert unique_chunks == 3 +class HashIndexExtraTestCase(BaseTestCase): + """These tests are separate because they should not become part of the selftest. + """ + def test_chunk_indexer(self): + # see _hashindex.c hash_sizes, we want to be close to the max. load + # because interesting errors happen there. + key_count = int(65537 * ChunkIndex.MAX_LOAD_FACTOR) - 10 + index = ChunkIndex(key_count) + all_keys = [hashlib.sha256(H(k)).digest() for k in range(key_count)] + # we're gonna delete 1/3 of all_keys, so let's split them 2/3 and 1/3: + keys, to_delete_keys = all_keys[0:(2*key_count//3)], all_keys[(2*key_count//3):] + + for i, key in enumerate(keys): + index[key] = (i, i, i) + for i, key in enumerate(to_delete_keys): + index[key] = (i, i, i) + + for key in to_delete_keys: + del index[key] + for i, key in enumerate(keys): + assert index[key] == (i, i, i) + for key in to_delete_keys: + assert index.get(key) is None + + # now delete every key still in the index + for key in keys: + del index[key] + # the index should now be empty + assert list(index.iteritems()) == [] + + +class HashIndexSizeTestCase(BaseTestCase): + def test_size_on_disk(self): + idx = ChunkIndex() + assert idx.size() == 18 + 1031 * (32 + 3 * 4) + + def test_size_on_disk_accurate(self): + idx = ChunkIndex() + for i in range(1234): + idx[H(i)] = i, i**2, i**3 + with tempfile.NamedTemporaryFile() as file: + idx.write(file.name) + size = os.path.getsize(file.name) + assert idx.size() == size + + class HashIndexRefcountingTestCase(BaseTestCase): def test_chunkindex_limit(self): idx = ChunkIndex() - idx[H(1)] = hashindex.MAX_VALUE - 1, 1, 2 + idx[H(1)] = ChunkIndex.MAX_VALUE - 1, 1, 2 # 5 is arbitray, any number of incref/decrefs shouldn't move it once it's limited for i in range(5): # first incref to move it to the limit refcount, *_ = idx.incref(H(1)) - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE for i in range(5): refcount, *_ = idx.decref(H(1)) - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def _merge(self, refcounta, refcountb): def merge(refcount1, refcount2): @@ -153,23 +218,23 @@ class HashIndexRefcountingTestCase(BaseTestCase): def test_chunkindex_merge_limit1(self): # Check that it does *not* limit at MAX_VALUE - 1 # (MAX_VALUE is odd) - half = hashindex.MAX_VALUE // 2 - assert self._merge(half, half) == hashindex.MAX_VALUE - 1 + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half, half) == ChunkIndex.MAX_VALUE - 1 def test_chunkindex_merge_limit2(self): # 3000000000 + 2000000000 > MAX_VALUE - assert self._merge(3000000000, 2000000000) == hashindex.MAX_VALUE + assert self._merge(3000000000, 2000000000) == ChunkIndex.MAX_VALUE def test_chunkindex_merge_limit3(self): # Crossover point: both addition and limit semantics will yield the same result - half = hashindex.MAX_VALUE // 2 - assert self._merge(half + 1, half) == hashindex.MAX_VALUE + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half + 1, half) == ChunkIndex.MAX_VALUE def test_chunkindex_merge_limit4(self): # Beyond crossover, result of addition would be 2**31 - half = hashindex.MAX_VALUE // 2 - assert self._merge(half + 2, half) == hashindex.MAX_VALUE - assert self._merge(half + 1, half + 1) == hashindex.MAX_VALUE + half = ChunkIndex.MAX_VALUE // 2 + assert self._merge(half + 2, half) == ChunkIndex.MAX_VALUE + assert self._merge(half + 1, half + 1) == ChunkIndex.MAX_VALUE def test_chunkindex_add(self): idx1 = ChunkIndex() @@ -180,17 +245,17 @@ class HashIndexRefcountingTestCase(BaseTestCase): def test_incref_limit(self): idx1 = ChunkIndex() - idx1[H(1)] = (hashindex.MAX_VALUE, 6, 7) + idx1[H(1)] = (ChunkIndex.MAX_VALUE, 6, 7) idx1.incref(H(1)) refcount, *_ = idx1[H(1)] - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def test_decref_limit(self): idx1 = ChunkIndex() - idx1[H(1)] = hashindex.MAX_VALUE, 6, 7 + idx1[H(1)] = ChunkIndex.MAX_VALUE, 6, 7 idx1.decref(H(1)) refcount, *_ = idx1[H(1)] - assert refcount == hashindex.MAX_VALUE + assert refcount == ChunkIndex.MAX_VALUE def test_decref_zero(self): idx1 = ChunkIndex() @@ -210,7 +275,7 @@ class HashIndexRefcountingTestCase(BaseTestCase): def test_setitem_raises(self): idx1 = ChunkIndex() with self.assert_raises(AssertionError): - idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0 + idx1[H(1)] = ChunkIndex.MAX_VALUE + 1, 0, 0 def test_keyerror(self): idx = ChunkIndex() @@ -267,14 +332,180 @@ class HashIndexDataTestCase(BaseTestCase): idx2 = ChunkIndex() idx2[H(3)] = 2**32 - 123456, 6, 7 idx1.merge(idx2) - assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7) + assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7) + + +class HashIndexIntegrityTestCase(HashIndexDataTestCase): + def write_integrity_checked_index(self, tempdir): + idx = self._deserialize_hashindex(self.HASHINDEX) + file = os.path.join(tempdir, 'idx') + with IntegrityCheckedFile(path=file, write=True) as fd: + idx.write(fd) + integrity_data = fd.integrity_data + assert 'final' in integrity_data + assert 'HashHeader' in integrity_data + return file, integrity_data + + def test_integrity_checked_file(self): + with tempfile.TemporaryDirectory() as tempdir: + file, integrity_data = self.write_integrity_checked_index(tempdir) + with open(file, 'r+b') as fd: + fd.write(b'Foo') + with self.assert_raises(FileIntegrityError): + with IntegrityCheckedFile(path=file, write=False, integrity_data=integrity_data) as fd: + ChunkIndex.read(fd) + + +class HashIndexCompactTestCase(HashIndexDataTestCase): + def index(self, num_entries, num_buckets): + index_data = io.BytesIO() + index_data.write(b'BORG_IDX') + # num_entries + index_data.write(num_entries.to_bytes(4, 'little')) + # num_buckets + index_data.write(num_buckets.to_bytes(4, 'little')) + # key_size + index_data.write((32).to_bytes(1, 'little')) + # value_size + index_data.write((3 * 4).to_bytes(1, 'little')) + + self.index_data = index_data + + def index_from_data(self): + self.index_data.seek(0) + index = ChunkIndex.read(self.index_data) + return index + + def index_to_data(self, index): + data = io.BytesIO() + index.write(data) + return data.getvalue() + + def index_from_data_compact_to_data(self): + index = self.index_from_data() + index.compact() + compact_index = self.index_to_data(index) + return compact_index + + def write_entry(self, key, *values): + self.index_data.write(key) + for value in values: + self.index_data.write(value.to_bytes(4, 'little')) + + def write_empty(self, key): + self.write_entry(key, 0xffffffff, 0, 0) + + def write_deleted(self, key): + self.write_entry(key, 0xfffffffe, 0, 0) + + def test_simple(self): + self.index(num_entries=3, num_buckets=6) + self.write_entry(H2(0), 1, 2, 3) + self.write_deleted(H2(1)) + self.write_empty(H2(2)) + self.write_entry(H2(3), 5, 6, 7) + self.write_entry(H2(4), 8, 9, 10) + self.write_empty(H2(5)) + + compact_index = self.index_from_data_compact_to_data() + + self.index(num_entries=3, num_buckets=3) + self.write_entry(H2(0), 1, 2, 3) + self.write_entry(H2(3), 5, 6, 7) + self.write_entry(H2(4), 8, 9, 10) + assert compact_index == self.index_data.getvalue() + + def test_first_empty(self): + self.index(num_entries=3, num_buckets=6) + self.write_deleted(H2(1)) + self.write_entry(H2(0), 1, 2, 3) + self.write_empty(H2(2)) + self.write_entry(H2(3), 5, 6, 7) + self.write_entry(H2(4), 8, 9, 10) + self.write_empty(H2(5)) + + compact_index = self.index_from_data_compact_to_data() + + self.index(num_entries=3, num_buckets=3) + self.write_entry(H2(0), 1, 2, 3) + self.write_entry(H2(3), 5, 6, 7) + self.write_entry(H2(4), 8, 9, 10) + assert compact_index == self.index_data.getvalue() + + def test_last_used(self): + self.index(num_entries=3, num_buckets=6) + self.write_deleted(H2(1)) + self.write_entry(H2(0), 1, 2, 3) + self.write_empty(H2(2)) + self.write_entry(H2(3), 5, 6, 7) + self.write_empty(H2(5)) + self.write_entry(H2(4), 8, 9, 10) + + compact_index = self.index_from_data_compact_to_data() + + self.index(num_entries=3, num_buckets=3) + self.write_entry(H2(0), 1, 2, 3) + self.write_entry(H2(3), 5, 6, 7) + self.write_entry(H2(4), 8, 9, 10) + assert compact_index == self.index_data.getvalue() + + def test_too_few_empty_slots(self): + self.index(num_entries=3, num_buckets=6) + self.write_deleted(H2(1)) + self.write_entry(H2(0), 1, 2, 3) + self.write_entry(H2(3), 5, 6, 7) + self.write_empty(H2(2)) + self.write_empty(H2(5)) + self.write_entry(H2(4), 8, 9, 10) + + compact_index = self.index_from_data_compact_to_data() + + self.index(num_entries=3, num_buckets=3) + self.write_entry(H2(0), 1, 2, 3) + self.write_entry(H2(3), 5, 6, 7) + self.write_entry(H2(4), 8, 9, 10) + assert compact_index == self.index_data.getvalue() + + def test_empty(self): + self.index(num_entries=0, num_buckets=6) + self.write_deleted(H2(1)) + self.write_empty(H2(0)) + self.write_deleted(H2(3)) + self.write_empty(H2(2)) + self.write_empty(H2(5)) + self.write_deleted(H2(4)) + + compact_index = self.index_from_data_compact_to_data() + + self.index(num_entries=0, num_buckets=0) + assert compact_index == self.index_data.getvalue() + + def test_merge(self): + master = ChunkIndex() + idx1 = ChunkIndex() + idx1[H(1)] = 1, 100, 100 + idx1[H(2)] = 2, 200, 200 + idx1[H(3)] = 3, 300, 300 + idx1.compact() + assert idx1.size() == 18 + 3 * (32 + 3 * 4) + + master.merge(idx1) + assert master[H(1)] == (1, 100, 100) + assert master[H(2)] == (2, 200, 200) + assert master[H(3)] == (3, 300, 300) class NSIndexTestCase(BaseTestCase): def test_nsindex_segment_limit(self): idx = NSIndex() with self.assert_raises(AssertionError): - idx[H(1)] = hashindex.MAX_VALUE + 1, 0 + idx[H(1)] = NSIndex.MAX_VALUE + 1, 0 assert H(1) not in idx - idx[H(2)] = hashindex.MAX_VALUE, 0 + idx[H(2)] = NSIndex.MAX_VALUE, 0 assert H(2) in idx + + +class AllIndexTestCase(BaseTestCase): + def test_max_load_factor(self): + assert NSIndex.MAX_LOAD_FACTOR < 1.0 + assert ChunkIndex.MAX_LOAD_FACTOR < 1.0 diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index 5754d075..9bb37bcc 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -1,32 +1,36 @@ import hashlib -import logging +import io import os +import shutil import sys +from argparse import ArgumentTypeError from datetime import datetime, timezone, timedelta from time import mktime, strptime, sleep import pytest + import msgpack import msgpack.fallback +from .. import platform from ..helpers import Location -from ..helpers import partial_format, format_file_size, format_timedelta, format_line, PlaceholderError +from ..helpers import Buffer +from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError, replace_placeholders from ..helpers import make_path_safe, clean_lines -from ..helpers import prune_within, prune_split -from ..helpers import get_cache_dir, get_keys_dir +from ..helpers import interval, prune_within, prune_split +from ..helpers import get_cache_dir, get_keys_dir, get_security_dir, get_config_dir from ..helpers import is_slow_msgpack from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex -from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk +from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless -from ..helpers import load_excludes -from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2 -from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern +from ..helpers import swidth_slice +from ..helpers import chunkit +from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS +from ..helpers import popen_with_error_handling +from ..helpers import dash_open -from . import BaseTestCase, environment_variable, FakeInputs - -if sys.platform == 'win32': - import posixpath +from . import BaseTestCase, FakeInputs class BigIntTestCase(BaseTestCase): @@ -47,53 +51,151 @@ def test_bin_to_hex(): class TestLocationWithoutEnv: - def test_ssh(self, monkeypatch): + @pytest.fixture + def keys_dir(self, tmpdir, monkeypatch): + tmpdir = str(tmpdir) + monkeypatch.setenv('BORG_KEYS_DIR', tmpdir) + if not tmpdir.endswith(os.path.sep): + tmpdir += os.path.sep + return tmpdir + + def test_ssh(self, monkeypatch, keys_dir): monkeypatch.delenv('BORG_REPO', raising=False) assert repr(Location('ssh://user@host:1234/some/path::archive')) == \ "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')" + assert Location('ssh://user@host:1234/some/path::archive').to_key_filename() == keys_dir + 'host__some_path' assert repr(Location('ssh://user@host:1234/some/path')) == \ "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)" + assert repr(Location('ssh://user@host/some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" + assert repr(Location('ssh://user@[::]:1234/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='::', port=1234, path='/some/path', archive='archive')" + assert repr(Location('ssh://user@[::]:1234/some/path')) == \ + "Location(proto='ssh', user='user', host='::', port=1234, path='/some/path', archive=None)" + assert Location('ssh://user@[::]:1234/some/path').to_key_filename() == keys_dir + '____some_path' + assert repr(Location('ssh://user@[::]/some/path')) == \ + "Location(proto='ssh', user='user', host='::', port=None, path='/some/path', archive=None)" + assert repr(Location('ssh://user@[2001:db8::]:1234/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='2001:db8::', port=1234, path='/some/path', archive='archive')" + assert repr(Location('ssh://user@[2001:db8::]:1234/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::', port=1234, path='/some/path', archive=None)" + assert Location('ssh://user@[2001:db8::]:1234/some/path').to_key_filename() == keys_dir + '2001_db8____some_path' + assert repr(Location('ssh://user@[2001:db8::]/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::', port=None, path='/some/path', archive=None)" + assert repr(Location('ssh://user@[2001:db8::c0:ffee]:1234/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='2001:db8::c0:ffee', port=1234, path='/some/path', archive='archive')" + assert repr(Location('ssh://user@[2001:db8::c0:ffee]:1234/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::c0:ffee', port=1234, path='/some/path', archive=None)" + assert repr(Location('ssh://user@[2001:db8::c0:ffee]/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::c0:ffee', port=None, path='/some/path', archive=None)" + assert repr(Location('ssh://user@[2001:db8::192.0.2.1]:1234/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='2001:db8::192.0.2.1', port=1234, path='/some/path', archive='archive')" + assert repr(Location('ssh://user@[2001:db8::192.0.2.1]:1234/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::192.0.2.1', port=1234, path='/some/path', archive=None)" + assert repr(Location('ssh://user@[2001:db8::192.0.2.1]/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::192.0.2.1', port=None, path='/some/path', archive=None)" + assert Location('ssh://user@[2001:db8::192.0.2.1]/some/path').to_key_filename() == keys_dir + '2001_db8__192_0_2_1__some_path' - def test_file(self, monkeypatch): + def test_file(self, monkeypatch, keys_dir): monkeypatch.delenv('BORG_REPO', raising=False) if sys.platform != 'win32': assert repr(Location('file:///some/path::archive')) == \ "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')" assert repr(Location('file:///some/path')) == \ "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)" + assert Location('file:///some/path').to_key_filename() == keys_dir + 'some_path' else: assert repr(Location('file://C:/some/path::archive')).replace('\\\\', '/') == \ "Location(proto='file', user=None, host=None, port=None, path='C:/some/path', archive='archive')" assert repr(Location('file://C:/some/path')).replace('\\\\', '/') == \ "Location(proto='file', user=None, host=None, port=None, path='C:/some/path', archive=None)" - def test_scp(self, monkeypatch): + def test_scp(self, monkeypatch, keys_dir): monkeypatch.delenv('BORG_REPO', raising=False) assert repr(Location('user@host:/some/path::archive')) == \ "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" assert repr(Location('user@host:/some/path')) == \ "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" + assert repr(Location('user@[::]:/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='::', port=None, path='/some/path', archive='archive')" + assert repr(Location('user@[::]:/some/path')) == \ + "Location(proto='ssh', user='user', host='::', port=None, path='/some/path', archive=None)" + assert repr(Location('user@[2001:db8::]:/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='2001:db8::', port=None, path='/some/path', archive='archive')" + assert repr(Location('user@[2001:db8::]:/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::', port=None, path='/some/path', archive=None)" + assert repr(Location('user@[2001:db8::c0:ffee]:/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='2001:db8::c0:ffee', port=None, path='/some/path', archive='archive')" + assert repr(Location('user@[2001:db8::c0:ffee]:/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::c0:ffee', port=None, path='/some/path', archive=None)" + assert repr(Location('user@[2001:db8::192.0.2.1]:/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='2001:db8::192.0.2.1', port=None, path='/some/path', archive='archive')" + assert repr(Location('user@[2001:db8::192.0.2.1]:/some/path')) == \ + "Location(proto='ssh', user='user', host='2001:db8::192.0.2.1', port=None, path='/some/path', archive=None)" + assert Location('user@[2001:db8::192.0.2.1]:/some/path').to_key_filename() == keys_dir + '2001_db8__192_0_2_1__some_path' - def test_folder(self, monkeypatch): + def test_smb(self, monkeypatch, keys_dir): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('file:////server/share/path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='//server/share/path', archive='archive')" + assert Location('file:////server/share/path::archive').to_key_filename() == keys_dir + 'server_share_path' + + def test_folder(self, monkeypatch, keys_dir): monkeypatch.delenv('BORG_REPO', raising=False) assert repr(Location('path::archive')) == \ "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" assert repr(Location('path')) == \ "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)" + assert Location('path').to_key_filename() == keys_dir + 'path' - def test_abspath(self, monkeypatch): + def test_long_path(self, monkeypatch, keys_dir): + monkeypatch.delenv('BORG_REPO', raising=False) + assert Location(os.path.join(*(40 * ['path']))).to_key_filename() == keys_dir + '_'.join(20 * ['path']) + '_' + + def test_abspath(self, monkeypatch, keys_dir): monkeypatch.delenv('BORG_REPO', raising=False) assert repr(Location('/some/absolute/path::archive')) == \ "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')" assert repr(Location('/some/absolute/path')) == \ "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)" + assert Location('/some/absolute/path').to_key_filename() == keys_dir + 'some_absolute_path' + assert repr(Location('ssh://user@host/some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" + assert Location('ssh://user@host/some/path').to_key_filename() == keys_dir + 'host__some_path' - def test_relpath(self, monkeypatch): + def test_relpath(self, monkeypatch, keys_dir): monkeypatch.delenv('BORG_REPO', raising=False) assert repr(Location('some/relative/path::archive')) == \ "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')" assert repr(Location('some/relative/path')) == \ "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)" + assert Location('some/relative/path').to_key_filename() == keys_dir + 'some_relative_path' + assert repr(Location('ssh://user@host/./some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/./some/path', archive=None)" + assert Location('ssh://user@host/./some/path').to_key_filename() == keys_dir + 'host__some_path' + assert repr(Location('ssh://user@host/~/some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/~/some/path', archive=None)" + assert Location('ssh://user@host/~/some/path').to_key_filename() == keys_dir + 'host__some_path' + assert repr(Location('ssh://user@host/~user/some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/~user/some/path', archive=None)" + assert Location('ssh://user@host/~user/some/path').to_key_filename() == keys_dir + 'host__user_some_path' + + def test_with_colons(self, monkeypatch, keys_dir): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('/abs/path:w:cols::arch:col')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/abs/path:w:cols', archive='arch:col')" + assert repr(Location('/abs/path:with:colons::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/abs/path:with:colons', archive='archive')" + assert repr(Location('/abs/path:with:colons')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/abs/path:with:colons', archive=None)" + assert Location('/abs/path:with:colons').to_key_filename() == keys_dir + 'abs_path_with_colons' + + def test_user_parsing(self): + # see issue #1930 + assert repr(Location('host:path::2016-12-31@23:59:59')) == \ + "Location(proto='ssh', user=None, host='host', port=None, path='path', archive='2016-12-31@23:59:59')" + assert repr(Location('ssh://host/path::2016-12-31@23:59:59')) == \ + "Location(proto='ssh', user=None, host='host', port=None, path='/path', archive='2016-12-31@23:59:59')" def test_underspecified(self, monkeypatch): monkeypatch.delenv('BORG_REPO', raising=False) @@ -104,11 +206,6 @@ class TestLocationWithoutEnv: with pytest.raises(ValueError): Location() - def test_no_double_colon(self, monkeypatch): - monkeypatch.delenv('BORG_REPO', raising=False) - with pytest.raises(ValueError): - Location('ssh://localhost:22/path:archive') - def test_no_slashes(self, monkeypatch): monkeypatch.delenv('BORG_REPO', raising=False) with pytest.raises(ValueError): @@ -121,26 +218,33 @@ class TestLocationWithoutEnv: 'ssh://user@host:1234/some/path::archive'] for location in locations: assert Location(location).canonical_path() == \ - Location(Location(location).canonical_path()).canonical_path() + Location(Location(location).canonical_path()).canonical_path(), "failed: %s" % location def test_format_path(self, monkeypatch): monkeypatch.delenv('BORG_REPO', raising=False) test_pid = os.getpid() assert repr(Location('/some/path::archive{pid}')) == \ "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive{}')".format(test_pid) - location_time1 = Location('/some/path::archive{now}') + location_time1 = Location('/some/path::archive{now:%s}') sleep(1.1) - location_time2 = Location('/some/path::archive{now}') + location_time2 = Location('/some/path::archive{now:%s}') assert location_time1.archive != location_time2.archive + def test_bad_syntax(self): + with pytest.raises(ValueError): + # this is invalid due to the 2nd colon, correct: 'ssh://user@host/path' + Location('ssh://user@host:/path') + class TestLocationWithEnv: def test_ssh(self, monkeypatch): monkeypatch.setenv('BORG_REPO', 'ssh://user@host:1234/some/path') assert repr(Location('::archive')) == \ "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')" - assert repr(Location()) == \ + assert repr(Location('::')) == \ "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)" + assert repr(Location()) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)" def test_file(self, monkeypatch): if sys.platform != 'win32': @@ -149,40 +253,61 @@ class TestLocationWithEnv: "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')" assert repr(Location()) == \ "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)" else: monkeypatch.setenv('BORG_REPO', 'file://C:/some/path') assert repr(Location('::archive')).replace('\\\\', '/') == \ "Location(proto='file', user=None, host=None, port=None, path='C:/some/path', archive='archive')" assert repr(Location()).replace('\\\\', '/') == \ "Location(proto='file', user=None, host=None, port=None, path='C:/some/path', archive=None)" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='file://C:/some/path', archive=None)" def test_scp(self, monkeypatch): monkeypatch.setenv('BORG_REPO', 'user@host:/some/path') assert repr(Location('::archive')) == \ "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" - assert repr(Location()) == \ + assert repr(Location('::')) == \ "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" + assert repr(Location()) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" def test_folder(self, monkeypatch): monkeypatch.setenv('BORG_REPO', 'path') assert repr(Location('::archive')) == \ "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" - assert repr(Location()) == \ + assert repr(Location('::')) == \ "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)" def test_abspath(self, monkeypatch): monkeypatch.setenv('BORG_REPO', '/some/absolute/path') assert repr(Location('::archive')) == \ "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')" - assert repr(Location()) == \ + assert repr(Location('::')) == \ "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)" def test_relpath(self, monkeypatch): monkeypatch.setenv('BORG_REPO', 'some/relative/path') assert repr(Location('::archive')) == \ "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')" - assert repr(Location()) == \ + assert repr(Location('::')) == \ "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)" + + def test_with_colons(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', '/abs/path:w:cols') + assert repr(Location('::arch:col')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/abs/path:w:cols', archive='arch:col')" + assert repr(Location('::')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/abs/path:w:cols', archive=None)" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='/abs/path:w:cols', archive=None)" def test_no_slashes(self, monkeypatch): monkeypatch.setenv('BORG_REPO', '/some/absolute/path') @@ -201,323 +326,6 @@ class FormatTimedeltaTestCase(BaseTestCase): ) -def check_patterns(files, pattern, expected): - """Utility for testing patterns. - """ - assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths" - - matched = [f for f in files if pattern.match(f)] - - assert matched == (files if expected is None else expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("/", None), - ("/./", None), - ("", []), - ("/home/u", []), - ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]), - ("/etc", ["/etc/server/config", "/etc/server/hosts"]), - ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]), - ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), - ("/srv", ["/srv/messages", "/srv/dmesg"]), - ]) -@pytest.mark.skipif(sys.platform == 'win32', reason='Need some windows path tests') -def test_patterns_prefix(pattern, expected): - files = [ - "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", - "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", - ] - - check_patterns(files, PathPrefixPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("", []), - ("foo", []), - ("relative", ["relative/path1", "relative/two"]), - ("more", ["more/relative"]), - ]) -@pytest.mark.skipif(sys.platform == 'win32', reason='Need some windows path tests') -def test_patterns_prefix_relative(pattern, expected): - files = ["relative/path1", "relative/two", "more/relative"] - - check_patterns(files, PathPrefixPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("/*", None), - ("/./*", None), - ("*", None), - ("*/*", None), - ("*///*", None), - ("/home/u", []), - ("/home/*", - ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", - "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), - ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), - ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]), - ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), - ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), - ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), - ("/srv*", ["/srv/messages", "/srv/dmesg"]), - ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), - ]) -@pytest.mark.skipif(sys.platform == 'win32', reason='Need some windows path tests') -def test_patterns_fnmatch(pattern, expected): - files = [ - "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", - "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", - "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", - ] - - check_patterns(files, FnmatchPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("*", None), - ("**/*", None), - ("/**/*", None), - ("/./*", None), - ("*/*", None), - ("*///*", None), - ("/home/u", []), - ("/home/*", - ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", - "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), - ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), - ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]), - ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]), - ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]), - ("*/.pr????e", []), - ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), - ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), - ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), - ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), - ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]), - ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]), - ("/srv/*", ["/srv/messages", "/srv/dmesg"]), - ("/srv2/**", ["/srv2", "/srv2/blafasel"]), - ("/srv2/**/", ["/srv2/blafasel"]), - ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]), - ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]), - ]) -@pytest.mark.skipif(sys.platform == 'win32', reason='Need some windows path tests') -def test_patterns_shell(pattern, expected): - files = [ - "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", - "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg", - "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", - ] - - check_patterns(files, ShellPattern(pattern), expected) - - -@pytest.mark.parametrize("pattern, expected", [ - # "None" means all files, i.e. all match the given pattern - ("", None), - (".*", None), - ("^/", None), - ("^abc$", []), - ("^[^/]", []), - ("^(?!/srv|/foo|/opt)", - ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", - "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]), - ]) -@pytest.mark.skipif(sys.platform == 'win32', reason='Need some windows path tests') -def test_patterns_regex(pattern, expected): - files = [ - '/srv/data', '/foo/bar', '/home', - '/home/user/.profile', '/home/user/.bashrc', - '/home/user2/.profile', '/home/user2/public_html/index.html', - '/opt/log/messages.txt', '/opt/log/dmesg.txt', - "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", - ] - - obj = RegexPattern(pattern) - assert str(obj) == pattern - assert obj.pattern == pattern - - check_patterns(files, obj, expected) - - -def test_regex_pattern(): - # The forward slash must match the platform-specific path separator - assert RegexPattern("^/$").match("/") - assert RegexPattern("^/$").match(os.path.sep) - assert not RegexPattern(r"^\\$").match("/") - - -def use_normalized_unicode(): - return sys.platform in ("darwin",) - - -def _make_test_patterns(pattern): - return [PathPrefixPattern(pattern), - FnmatchPattern(pattern), - RegexPattern("^{}/foo$".format(pattern)), - ShellPattern(pattern), - ] - - -@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}")) -def test_composed_unicode_pattern(pattern): - assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode() - - -@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}")) -def test_decomposed_unicode_pattern(pattern): - assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode() - assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - -@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1"))) -def test_invalid_unicode_pattern(pattern): - assert not pattern.match("ba/foo") - assert pattern.match(str(b"ba\x80/foo", "latin1")) - - -@pytest.mark.parametrize("lines, expected", [ - # "None" means all files, i.e. none excluded - ([], None), - (["# Comment only"], None), - (["*"], []), - (["# Comment", - "*/something00.txt", - " *whitespace* ", - # Whitespace before comment - " #/ws*", - # Empty line - "", - "# EOF"], - ["/more/data", "/home", " #/wsfoobar"]), - (["re:.*"], []), - (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]), - ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]), - (["", "", "", - "# This is a test with mixed pattern styles", - # Case-insensitive pattern - "re:(?i)BAR|ME$", - "", - "*whitespace*", - "fm:*/something00*"], - ["/more/data"]), - ([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]), - ([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]), - (["pp:./"], None), - (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]), - (["pp:aaabbb"], None), - (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), - ]) -def test_patterns_from_file(tmpdir, lines, expected): - files = [ - '/data/something00.txt', '/more/data', '/home', - ' #/wsfoobar', - '\tstart/whitespace', - '/whitespace/end\t', - ] - - def evaluate(filename): - matcher = PatternMatcher(fallback=True) - matcher.add(load_excludes(open(filename, "rt")), False) - return [path for path in files if matcher.match(path)] - - exclfile = tmpdir.join("exclude.txt") - - with exclfile.open("wt") as fh: - fh.write("\n".join(lines)) - - assert evaluate(str(exclfile)) == (files if expected is None else expected) - - -@pytest.mark.parametrize("pattern, cls", [ - ("", FnmatchPattern), - - # Default style - ("*", FnmatchPattern), - ("/data/*", FnmatchPattern), - - # fnmatch style - ("fm:", FnmatchPattern), - ("fm:*", FnmatchPattern), - ("fm:/data/*", FnmatchPattern), - ("fm:fm:/data/*", FnmatchPattern), - - # Regular expression - ("re:", RegexPattern), - ("re:.*", RegexPattern), - ("re:^/something/", RegexPattern), - ("re:re:^/something/", RegexPattern), - - # Path prefix - ("pp:", PathPrefixPattern), - ("pp:/", PathPrefixPattern), - ("pp:/data/", PathPrefixPattern), - ("pp:pp:/data/", PathPrefixPattern), - - # Shell-pattern style - ("sh:", ShellPattern), - ("sh:*", ShellPattern), - ("sh:/data/*", ShellPattern), - ("sh:sh:/data/*", ShellPattern), - ]) -def test_parse_pattern(pattern, cls): - assert isinstance(parse_pattern(pattern), cls) - - -@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"]) -def test_parse_pattern_error(pattern): - with pytest.raises(ValueError): - parse_pattern(pattern) - - -def test_pattern_matcher(): - pm = PatternMatcher() - - assert pm.fallback is None - - for i in ["", "foo", "bar"]: - assert pm.match(i) is None - - pm.add([RegexPattern("^a")], "A") - pm.add([RegexPattern("^b"), RegexPattern("^z")], "B") - pm.add([RegexPattern("^$")], "Empty") - pm.fallback = "FileNotFound" - - assert pm.match("") == "Empty" - assert pm.match("aaa") == "A" - assert pm.match("bbb") == "B" - assert pm.match("ccc") == "FileNotFound" - assert pm.match("xyz") == "FileNotFound" - assert pm.match("z") == "B" - - assert PatternMatcher(fallback="hey!").fallback == "hey!" - - -def test_compression_specs(): - with pytest.raises(ValueError): - CompressionSpec('') - assert CompressionSpec('none') == dict(name='none') - assert CompressionSpec('lz4') == dict(name='lz4') - assert CompressionSpec('zlib') == dict(name='zlib', level=6) - assert CompressionSpec('zlib,0') == dict(name='zlib', level=0) - assert CompressionSpec('zlib,9') == dict(name='zlib', level=9) - with pytest.raises(ValueError): - CompressionSpec('zlib,9,invalid') - assert CompressionSpec('lzma') == dict(name='lzma', level=6) - assert CompressionSpec('lzma,0') == dict(name='lzma', level=0) - assert CompressionSpec('lzma,9') == dict(name='lzma', level=9) - with pytest.raises(ValueError): - CompressionSpec('lzma,9,invalid') - with pytest.raises(ValueError): - CompressionSpec('invalid') - - def test_chunkerparams(): assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095) assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095) @@ -576,16 +384,48 @@ class PruneSplitTestCase(BaseTestCase): dotest(test_archives, 0, [], []) -class PruneWithinTestCase(BaseTestCase): +class IntervalTestCase(BaseTestCase): + def test_interval(self): + self.assert_equal(interval('1H'), 1) + self.assert_equal(interval('1d'), 24) + self.assert_equal(interval('1w'), 168) + self.assert_equal(interval('1m'), 744) + self.assert_equal(interval('1y'), 8760) - def test(self): + def test_interval_time_unit(self): + with pytest.raises(ArgumentTypeError) as exc: + interval('H') + self.assert_equal( + exc.value.args, + ('Unexpected interval number "": expected an integer greater than 0',)) + with pytest.raises(ArgumentTypeError) as exc: + interval('-1d') + self.assert_equal( + exc.value.args, + ('Unexpected interval number "-1": expected an integer greater than 0',)) + with pytest.raises(ArgumentTypeError) as exc: + interval('food') + self.assert_equal( + exc.value.args, + ('Unexpected interval number "foo": expected an integer greater than 0',)) + + def test_interval_number(self): + with pytest.raises(ArgumentTypeError) as exc: + interval('5') + self.assert_equal( + exc.value.args, + ("Unexpected interval time unit \"5\": expected one of ['H', 'd', 'w', 'm', 'y']",)) + + +class PruneWithinTestCase(BaseTestCase): + def test_prune_within(self): def subset(lst, indices): return {lst[i] for i in indices} def dotest(test_archives, within, indices): for ta in test_archives, reversed(test_archives): - self.assert_equal(set(prune_within(ta, within)), + self.assert_equal(set(prune_within(ta, interval(within))), subset(test_archives, indices)) # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours @@ -622,38 +462,49 @@ class TestParseTimestamp(BaseTestCase): self.assert_equal(parse_timestamp('2015-04-19T20:25:00'), datetime(2015, 4, 19, 20, 25, 0, 0, timezone.utc)) -def test_get_cache_dir(): +def test_get_config_dir(monkeypatch): + """test that get_config_dir respects environment""" + monkeypatch.delenv('BORG_CONFIG_DIR', raising=False) + monkeypatch.delenv('XDG_CONFIG_HOME', raising=False) + assert get_config_dir() == os.path.join(os.path.expanduser('~'), '.config', 'borg') + monkeypatch.setenv('XDG_CONFIG_HOME', '/var/tmp/.config') + assert get_config_dir() == os.path.join('/var/tmp/.config', 'borg') + monkeypatch.setenv('BORG_CONFIG_DIR', '/var/tmp') + assert get_config_dir() == '/var/tmp' + + +def test_get_cache_dir(monkeypatch): """test that get_cache_dir respects environment""" - # reset BORG_CACHE_DIR in order to test default - old_env = None - if os.environ.get('BORG_CACHE_DIR'): - old_env = os.environ['BORG_CACHE_DIR'] - del(os.environ['BORG_CACHE_DIR']) + monkeypatch.delenv('BORG_CACHE_DIR', raising=False) + monkeypatch.delenv('XDG_CACHE_HOME', raising=False) assert get_cache_dir() == os.path.join(os.path.expanduser('~'), '.cache', 'borg') - os.environ['XDG_CACHE_HOME'] = '/var/tmp/.cache' + monkeypatch.setenv('XDG_CACHE_HOME', '/var/tmp/.cache') assert get_cache_dir() == os.path.join('/var/tmp/.cache', 'borg') - os.environ['BORG_CACHE_DIR'] = '/var/tmp' + monkeypatch.setenv('BORG_CACHE_DIR', '/var/tmp') assert get_cache_dir() == '/var/tmp' - # reset old env - if old_env is not None: - os.environ['BORG_CACHE_DIR'] = old_env -def test_get_keys_dir(): +def test_get_keys_dir(monkeypatch): """test that get_keys_dir respects environment""" - # reset BORG_KEYS_DIR in order to test default - old_env = None - if os.environ.get('BORG_KEYS_DIR'): - old_env = os.environ['BORG_KEYS_DIR'] - del(os.environ['BORG_KEYS_DIR']) + monkeypatch.delenv('BORG_KEYS_DIR', raising=False) + monkeypatch.delenv('XDG_CONFIG_HOME', raising=False) assert get_keys_dir() == os.path.join(os.path.expanduser('~'), '.config', 'borg', 'keys') - os.environ['XDG_CONFIG_HOME'] = '/var/tmp/.config' + monkeypatch.setenv('XDG_CONFIG_HOME', '/var/tmp/.config') assert get_keys_dir() == os.path.join('/var/tmp/.config', 'borg', 'keys') - os.environ['BORG_KEYS_DIR'] = '/var/tmp' + monkeypatch.setenv('BORG_KEYS_DIR', '/var/tmp') assert get_keys_dir() == '/var/tmp' - # reset old env - if old_env is not None: - os.environ['BORG_KEYS_DIR'] = old_env + + +def test_get_security_dir(monkeypatch): + """test that get_security_dir respects environment""" + monkeypatch.delenv('BORG_SECURITY_DIR', raising=False) + monkeypatch.delenv('XDG_CONFIG_HOME', raising=False) + assert get_security_dir() == os.path.join(os.path.expanduser('~'), '.config', 'borg', 'security') + assert get_security_dir(repository_id='1234') == os.path.join(os.path.expanduser('~'), '.config', 'borg', 'security', '1234') + monkeypatch.setenv('XDG_CONFIG_HOME', '/var/tmp/.config') + assert get_security_dir() == os.path.join('/var/tmp/.config', 'borg', 'security') + monkeypatch.setenv('BORG_SECURITY_DIR', '/var/tmp') + assert get_security_dir() == '/var/tmp' def test_file_size(): @@ -703,6 +554,26 @@ def test_file_size_sign(): assert format_file_size(size, sign=True) == fmt +@pytest.mark.parametrize('string,value', ( + ('1', 1), + ('20', 20), + ('5K', 5000), + ('1.75M', 1750000), + ('1e+9', 1e9), + ('-1T', -1e12), +)) +def test_parse_file_size(string, value): + assert parse_file_size(string) == int(value) + + +@pytest.mark.parametrize('string', ( + '', '5 Äpfel', '4E', '2229 bit', '1B', +)) +def test_parse_file_size_invalid(string): + with pytest.raises(ValueError): + parse_file_size(string) + + def test_is_slow_msgpack(): saved_packer = msgpack.Packer try: @@ -714,6 +585,61 @@ def test_is_slow_msgpack(): assert not is_slow_msgpack() +class TestBuffer: + def test_type(self): + buffer = Buffer(bytearray) + assert isinstance(buffer.get(), bytearray) + buffer = Buffer(bytes) # don't do that in practice + assert isinstance(buffer.get(), bytes) + + def test_len(self): + buffer = Buffer(bytearray, size=0) + b = buffer.get() + assert len(buffer) == len(b) == 0 + buffer = Buffer(bytearray, size=1234) + b = buffer.get() + assert len(buffer) == len(b) == 1234 + + def test_resize(self): + buffer = Buffer(bytearray, size=100) + assert len(buffer) == 100 + b1 = buffer.get() + buffer.resize(200) + assert len(buffer) == 200 + b2 = buffer.get() + assert b2 is not b1 # new, bigger buffer + buffer.resize(100) + assert len(buffer) >= 100 + b3 = buffer.get() + assert b3 is b2 # still same buffer (200) + buffer.resize(100, init=True) + assert len(buffer) == 100 # except on init + b4 = buffer.get() + assert b4 is not b3 # new, smaller buffer + + def test_limit(self): + buffer = Buffer(bytearray, size=100, limit=200) + buffer.resize(200) + assert len(buffer) == 200 + with pytest.raises(Buffer.MemoryLimitExceeded): + buffer.resize(201) + assert len(buffer) == 200 + + def test_get(self): + buffer = Buffer(bytearray, size=100, limit=200) + b1 = buffer.get(50) + assert len(b1) >= 50 # == 100 + b2 = buffer.get(100) + assert len(b2) >= 100 # == 100 + assert b2 is b1 # did not need resizing yet + b3 = buffer.get(200) + assert len(b3) == 200 + assert b3 is not b2 # new, resized buffer + with pytest.raises(Buffer.MemoryLimitExceeded): + buffer.get(201) # beyond limit + assert len(buffer) == 200 + + def test_yes_input(): inputs = list(TRUISH) input = FakeInputs(inputs) @@ -742,21 +668,20 @@ def test_yes_input_custom(): assert not yes(falsish=('NOPE', ), input=input) -def test_yes_env(): +def test_yes_env(monkeypatch): for value in TRUISH: - with environment_variable(OVERRIDE_THIS=value): - assert yes(env_var_override='OVERRIDE_THIS') + monkeypatch.setenv('OVERRIDE_THIS', value) + assert yes(env_var_override='OVERRIDE_THIS') for value in FALSISH: - with environment_variable(OVERRIDE_THIS=value): - assert not yes(env_var_override='OVERRIDE_THIS') + monkeypatch.setenv('OVERRIDE_THIS', value) + assert not yes(env_var_override='OVERRIDE_THIS') -def test_yes_env_default(): +def test_yes_env_default(monkeypatch): for value in DEFAULTISH: - with environment_variable(OVERRIDE_THIS=value): - assert yes(env_var_override='OVERRIDE_THIS', default=True) - with environment_variable(OVERRIDE_THIS=value): - assert not yes(env_var_override='OVERRIDE_THIS', default=False) + monkeypatch.setenv('OVERRIDE_THIS', value) + assert yes(env_var_override='OVERRIDE_THIS', default=True) + assert not yes(env_var_override='OVERRIDE_THIS', default=False) def test_yes_defaults(): @@ -805,24 +730,22 @@ def test_yes_output(capfd): assert 'false-msg' in err -def test_progress_percentage_multiline(capfd): - pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=False, msg="%3.0f%%") - pi.show(0) +def test_yes_env_output(capfd, monkeypatch): + env_var = 'OVERRIDE_SOMETHING' + monkeypatch.setenv(env_var, 'yes') + assert yes(env_var_override=env_var) out, err = capfd.readouterr() - assert err == ' 0%\n' - pi.show(420) - out, err = capfd.readouterr() - assert err == ' 42%\n' - pi.show(1000) - out, err = capfd.readouterr() - assert err == '100%\n' - pi.finish() - out, err = capfd.readouterr() - assert err == '' + assert out == '' + assert env_var in err + assert 'yes' in err -def test_progress_percentage_sameline(capfd): - pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=True, msg="%3.0f%%") +def test_progress_percentage_sameline(capfd, monkeypatch): + # run the test as if it was in a 4x1 terminal + monkeypatch.setenv('COLUMNS', '4') + monkeypatch.setenv('LINES', '1') + pi = ProgressIndicatorPercent(1000, step=5, start=0, msg="%3.0f%%") + pi.logger.setLevel('INFO') pi.show(0) out, err = capfd.readouterr() assert err == ' 0%\r' @@ -838,23 +761,26 @@ def test_progress_percentage_sameline(capfd): assert err == ' ' * 4 + '\r' -def test_progress_percentage_step(capfd): - pi = ProgressIndicatorPercent(100, step=2, start=0, same_line=False, msg="%3.0f%%") +def test_progress_percentage_step(capfd, monkeypatch): + # run the test as if it was in a 4x1 terminal + monkeypatch.setenv('COLUMNS', '4') + monkeypatch.setenv('LINES', '1') + pi = ProgressIndicatorPercent(100, step=2, start=0, msg="%3.0f%%") + pi.logger.setLevel('INFO') pi.show() out, err = capfd.readouterr() - assert err == ' 0%\n' + assert err == ' 0%\r' pi.show() out, err = capfd.readouterr() assert err == '' # no output at 1% as we have step == 2 pi.show() out, err = capfd.readouterr() - assert err == ' 2%\n' + assert err == ' 2%\r' def test_progress_percentage_quiet(capfd): - logging.getLogger('borg.output.progress').setLevel(logging.WARN) - - pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=False, msg="%3.0f%%") + pi = ProgressIndicatorPercent(1000, step=5, start=0, msg="%3.0f%%") + pi.logger.setLevel('WARN') pi.show(0) out, err = capfd.readouterr() assert err == '' @@ -904,7 +830,7 @@ def test_partial_format(): def test_chunk_file_wrapper(): - cfw = ChunkIteratorFileWrapper(iter([Chunk(b'abc'), Chunk(b'def')])) + cfw = ChunkIteratorFileWrapper(iter([b'abc', b'def'])) assert cfw.read(2) == b'ab' assert cfw.read(50) == b'cdef' assert cfw.exhausted @@ -914,6 +840,23 @@ def test_chunk_file_wrapper(): assert cfw.exhausted +def test_chunkit(): + it = chunkit('abcdefg', 3) + assert next(it) == ['a', 'b', 'c'] + assert next(it) == ['d', 'e', 'f'] + assert next(it) == ['g'] + with pytest.raises(StopIteration): + next(it) + with pytest.raises(StopIteration): + next(it) + + it = chunkit('ab', 3) + assert list(it) == [['a', 'b']] + + it = chunkit('', 3) + assert list(it) == [] + + def test_clean_lines(): conf = """\ #comment @@ -929,38 +872,6 @@ data2 assert list(clean_lines(conf, remove_comments=False)) == ['#comment', 'data1 #data1', 'data2', 'data3', ] -def test_compression_decider1(): - default = CompressionSpec('zlib') - conf = """ -# use super-fast lz4 compression on huge VM files in this path: -lz4:/srv/vm_disks - -# jpeg or zip files do not compress: -none:*.jpeg -none:*.zip -""".splitlines() - - cd = CompressionDecider1(default, []) # no conf, always use default - assert cd.decide('/srv/vm_disks/linux')['name'] == 'zlib' - assert cd.decide('test.zip')['name'] == 'zlib' - assert cd.decide('test')['name'] == 'zlib' - - cd = CompressionDecider1(default, [conf, ]) - assert cd.decide('/srv/vm_disks/linux')['name'] == 'lz4' - assert cd.decide('test.zip')['name'] == 'none' - assert cd.decide('test')['name'] == 'zlib' # no match in conf, use default - - -def test_compression_decider2(): - default = CompressionSpec('zlib') - - cd = CompressionDecider2(default) - compr_spec, chunk = cd.decide(Chunk(None)) - assert compr_spec['name'] == 'zlib' - compr_spec, chunk = cd.decide(Chunk(None, compress=CompressionSpec('lzma'))) - assert compr_spec['name'] == 'lzma' - - def test_format_line(): data = dict(foo='bar baz') assert format_line('', data) == '' @@ -974,3 +885,94 @@ def test_format_line_erroneous(): assert format_line('{invalid}', data) with pytest.raises(PlaceholderError): assert format_line('{}', data) + with pytest.raises(PlaceholderError): + assert format_line('{now!r}', data) + with pytest.raises(PlaceholderError): + assert format_line('{now.__class__.__module__.__builtins__}', data) + + +def test_replace_placeholders(): + now = datetime.now() + assert " " not in replace_placeholders('{now}') + assert int(replace_placeholders('{now:%Y}')) == now.year + + +def working_swidth(): + return platform.swidth('선') == 2 + + +@pytest.mark.skipif(not working_swidth(), reason='swidth() is not supported / active') +def test_swidth_slice(): + string = '나윤선나윤선나윤선나윤선나윤선' + assert swidth_slice(string, 1) == '' + assert swidth_slice(string, -1) == '' + assert swidth_slice(string, 4) == '나윤' + assert swidth_slice(string, -4) == '윤선' + + +@pytest.mark.skipif(not working_swidth(), reason='swidth() is not supported / active') +def test_swidth_slice_mixed_characters(): + string = '나윤a선나윤선나윤선나윤선나윤선' + assert swidth_slice(string, 5) == '나윤a' + assert swidth_slice(string, 6) == '나윤a' + + +def test_safe_timestamps(): + if SUPPORT_32BIT_PLATFORMS: + # ns fit into int64 + assert safe_ns(2 ** 64) <= 2 ** 63 - 1 + assert safe_ns(-1) == 0 + # s fit into int32 + assert safe_s(2 ** 64) <= 2 ** 31 - 1 + assert safe_s(-1) == 0 + # datetime won't fall over its y10k problem + beyond_y10k = 2 ** 100 + with pytest.raises(OverflowError): + datetime.utcfromtimestamp(beyond_y10k) + assert datetime.utcfromtimestamp(safe_s(beyond_y10k)) > datetime(2038, 1, 1) + assert datetime.utcfromtimestamp(safe_ns(beyond_y10k) / 1000000000) > datetime(2038, 1, 1) + else: + # ns fit into int64 + assert safe_ns(2 ** 64) <= 2 ** 63 - 1 + assert safe_ns(-1) == 0 + # s are so that their ns conversion fits into int64 + assert safe_s(2 ** 64) * 1000000000 <= 2 ** 63 - 1 + assert safe_s(-1) == 0 + # datetime won't fall over its y10k problem + beyond_y10k = 2 ** 100 + with pytest.raises(OverflowError): + datetime.utcfromtimestamp(beyond_y10k) + assert datetime.utcfromtimestamp(safe_s(beyond_y10k)) > datetime(2262, 1, 1) + assert datetime.utcfromtimestamp(safe_ns(beyond_y10k) / 1000000000) > datetime(2262, 1, 1) + + +class TestPopenWithErrorHandling: + @pytest.mark.skipif(not shutil.which('test'), reason='"test" binary is needed') + def test_simple(self): + proc = popen_with_error_handling('test 1') + assert proc.wait() == 0 + + @pytest.mark.skipif(shutil.which('borg-foobar-test-notexist'), reason='"borg-foobar-test-notexist" binary exists (somehow?)') + def test_not_found(self): + proc = popen_with_error_handling('borg-foobar-test-notexist 1234') + assert proc is None + + @pytest.mark.parametrize('cmd', ( + 'mismatched "quote', + 'foo --bar="baz', + '' + )) + def test_bad_syntax(self, cmd): + proc = popen_with_error_handling(cmd) + assert proc is None + + def test_shell(self): + with pytest.raises(AssertionError): + popen_with_error_handling('', shell=True) + + +def test_dash_open(): + assert dash_open('-', 'r') is sys.stdin + assert dash_open('-', 'w') is sys.stdout + assert dash_open('-', 'rb') is sys.stdin.buffer + assert dash_open('-', 'wb') is sys.stdout.buffer diff --git a/src/borg/testsuite/item.py b/src/borg/testsuite/item.py index b0b7569e..aa40cc06 100644 --- a/src/borg/testsuite/item.py +++ b/src/borg/testsuite/item.py @@ -1,5 +1,6 @@ import pytest +from ..cache import ChunkListEntry from ..item import Item from ..helpers import StableDict @@ -145,3 +146,26 @@ def test_unknown_property(): item = Item() with pytest.raises(AttributeError): item.unknown_attribute = None + + +def test_item_file_size(): + item = Item(mode=0o100666, chunks=[ + ChunkListEntry(csize=1, size=1000, id=None), + ChunkListEntry(csize=1, size=2000, id=None), + ]) + assert item.get_size() == 3000 + with pytest.raises(AssertionError): + item.get_size(compressed=True, memorize=True) + assert item.get_size(compressed=True) == 2 + item.get_size(memorize=True) + assert item.size == 3000 + + +def test_item_file_size_no_chunks(): + item = Item(mode=0o100666) + assert item.get_size() == 0 + + +def test_item_optr(): + item = Item() + assert Item.from_optr(item.to_optr()) is item diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py index c62f5ffe..07531174 100644 --- a/src/borg/testsuite/key.py +++ b/src/borg/testsuite/key.py @@ -1,21 +1,24 @@ import getpass +import os.path import re import tempfile from binascii import hexlify, unhexlify +import msgpack import pytest -from ..crypto import bytes_to_long, num_aes_blocks -from ..helpers import Location -from ..helpers import Chunk +from ..crypto.key import Passphrase, PasswordRetriesExceeded, bin_to_hex +from ..crypto.key import PlaintextKey, PassphraseKey, AuthenticatedKey, RepoKey, KeyfileKey, \ + Blake2KeyfileKey, Blake2RepoKey, Blake2AuthenticatedKey +from ..crypto.key import ID_HMAC_SHA_256, ID_BLAKE2b_256 +from ..crypto.key import TAMRequiredError, TAMInvalid, TAMUnsupportedSuiteError, UnsupportedManifestError +from ..crypto.key import identify_key +from ..crypto.low_level import bytes_to_long +from ..crypto.low_level import IntegrityError as IntegrityErrorBase from ..helpers import IntegrityError -from ..key import PlaintextKey, PassphraseKey, KeyfileKey, Passphrase, PasswordRetriesExceeded, bin_to_hex - - -@pytest.fixture(autouse=True) -def clean_env(monkeypatch): - # Workaround for some tests (testsuite/archiver) polluting the environment - monkeypatch.delenv('BORG_PASSPHRASE', False) +from ..helpers import Location +from ..helpers import StableDict +from ..helpers import get_security_dir class TestKey: @@ -38,43 +41,107 @@ class TestKey: """)) keyfile2_id = unhexlify('c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314') + keyfile_blake2_key_file = """ + BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000 + hqlhbGdvcml0aG2mc2hhMjU2pGRhdGHaAZBu680Do3CmfWzeMCwe48KJi3Vps9mEDy7MKF + TastsEhiAd1RQMuxfZpklkLeddMMWk+aPtFiURRFb02JLXV5cKRC1o2ZDdiNa0nao+o6+i + gUjjsea9TAu25t3vxh8uQWs5BuKRLBRr0nUgrSd0IYMUgn+iVbLJRzCCssvxsklkwQxN3F + Y+MvBnn8kUXSeoSoQ2l0fBHzq94Y7LMOm/owMam5URnE8/UEc6ZXBrbyX4EXxDtUqJcs+D + i451thtlGdigDLpvf9nyK66mjiCpPCTCgtlzq0Pe1jcdhnsUYLg+qWzXZ7e2opEZoC6XxS + 3DIuBOxG3Odqj9IKB+6/kl94vz98awPWFSpYcLZVWu7sIP38ZkUK+ad5MHTo/LvTuZdFnd + iqKzZIDUJl3Zl1WGmP/0xVOmfIlznkCZy4d3SMuujwIcqQ5kDvwDRPpdhBBk+UWQY5vFXk + kR1NBNLSTyhAzu3fiUmFl0qZ+UWPRkGAEBy/NuoEibrWwab8BX97cATyvnmOqYkU9PT0C6 + l2l9E4bPpGhhc2jaACDnIa8KgKv84/b5sjaMgSZeIVkuKSLJy2NN8zoH8lnd36ppdGVyYX + Rpb25zzgABhqCkc2FsdNoAIEJLlLh7q74j3q53856H5GgzA1HH+aW5bA/as544+PGkp3Zl + cnNpb24B""".strip() + + keyfile_blake2_cdata = bytes.fromhex('04fdf9475cf2323c0ba7a99ddc011064f2e7d039f539f2e448' + '0e6f5fc6ff9993d604040404040404098c8cee1c6db8c28947') + # Verified against b2sum. Entire string passed to BLAKE2, including the padded 64 byte key contained in + # keyfile_blake2_key_file above is + # 19280471de95185ec27ecb6fc9edbb4f4db26974c315ede1cd505fab4250ce7cd0d081ea66946c + # 95f0db934d5f616921efbd869257e8ded2bd9bd93d7f07b1a30000000000000000000000000000 + # 000000000000000000000000000000000000000000000000000000000000000000000000000000 + # 00000000000000000000007061796c6f6164 + # p a y l o a d + keyfile_blake2_id = bytes.fromhex('d8bc68e961c79f99be39061589e5179b2113cd9226e07b08ddd4a1fef7ce93fb') + @pytest.fixture def keys_dir(self, request, monkeypatch, tmpdir): monkeypatch.setenv('BORG_KEYS_DIR', tmpdir) return tmpdir + @pytest.fixture(params=( + PlaintextKey, + AuthenticatedKey, + KeyfileKey, + RepoKey, + AuthenticatedKey, + Blake2KeyfileKey, + Blake2RepoKey, + Blake2AuthenticatedKey, + )) + def key(self, request, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') + return request.param.create(self.MockRepository(), self.MockArgs()) + class MockRepository: class _Location: orig = '/some/place' + def canonical_path(self): + return self.orig + _location = _Location() id = bytes(32) id_str = bin_to_hex(id) + def get_free_nonce(self): + return None + + def commit_nonce_reservation(self, next_unreserved, start_nonce): + pass + + def save_key(self, data): + self.key_data = data + + def load_key(self): + return self.key_data + def test_plaintext(self): key = PlaintextKey.create(None, None) - chunk = Chunk(b'foo') - assert hexlify(key.id_hash(chunk.data)) == b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae' - assert chunk == key.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)) + chunk = b'foo' + assert hexlify(key.id_hash(chunk)) == b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae' + assert chunk == key.decrypt(key.id_hash(chunk), key.encrypt(chunk)) def test_keyfile(self, monkeypatch, keys_dir): monkeypatch.setenv('BORG_PASSPHRASE', 'test') key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) - assert bytes_to_long(key.enc_cipher.iv, 8) == 0 - manifest = key.encrypt(Chunk(b'XXX')) - assert key.extract_nonce(manifest) == 0 - manifest2 = key.encrypt(Chunk(b'XXX')) + assert key.cipher.next_iv() == 0 + manifest = key.encrypt(b'ABC') + assert key.cipher.extract_iv(manifest) == 0 + manifest2 = key.encrypt(b'ABC') assert manifest != manifest2 assert key.decrypt(None, manifest) == key.decrypt(None, manifest2) - assert key.extract_nonce(manifest2) == 1 - iv = key.extract_nonce(manifest) + assert key.cipher.extract_iv(manifest2) == 1 + iv = key.cipher.extract_iv(manifest) key2 = KeyfileKey.detect(self.MockRepository(), manifest) - assert bytes_to_long(key2.enc_cipher.iv, 8) == iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD) + assert key2.cipher.next_iv() >= iv + key2.cipher.block_count(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD) # Key data sanity check assert len({key2.id_key, key2.enc_key, key2.enc_hmac_key}) == 3 assert key2.chunk_seed != 0 - chunk = Chunk(b'foo') - assert chunk == key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)) + chunk = b'foo' + assert chunk == key2.decrypt(key.id_hash(chunk), key.encrypt(chunk)) + + def test_keyfile_nonce_rollback_protection(self, monkeypatch, keys_dir): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') + repository = self.MockRepository() + with open(os.path.join(get_security_dir(repository.id_str), 'nonce'), "w") as fd: + fd.write("0000000000002000") + key = KeyfileKey.create(repository, self.MockArgs()) + data = key.encrypt(b'ABC') + assert key.cipher.extract_iv(data) == 0x2000 + assert key.decrypt(None, data) == b'ABC' def test_keyfile_kfenv(self, tmpdir, monkeypatch): keyfile = tmpdir.join('keyfile') @@ -83,8 +150,8 @@ class TestKey: assert not keyfile.exists() key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) assert keyfile.exists() - chunk = Chunk(b'XXX') - chunk_id = key.id_hash(chunk.data) + chunk = b'ABC' + chunk_id = key.id_hash(chunk) chunk_cdata = key.encrypt(chunk) key = KeyfileKey.detect(self.MockRepository(), chunk_cdata) assert chunk == key.decrypt(chunk_id, chunk_cdata) @@ -97,7 +164,7 @@ class TestKey: fd.write(self.keyfile2_key_file) monkeypatch.setenv('BORG_PASSPHRASE', 'passphrase') key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) - assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data == b'payload' + assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata) == b'payload' def test_keyfile2_kfenv(self, tmpdir, monkeypatch): keyfile = tmpdir.join('keyfile') @@ -106,38 +173,45 @@ class TestKey: monkeypatch.setenv('BORG_KEY_FILE', str(keyfile)) monkeypatch.setenv('BORG_PASSPHRASE', 'passphrase') key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) - assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data == b'payload' + assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata) == b'payload' + + def test_keyfile_blake2(self, monkeypatch, keys_dir): + with keys_dir.join('keyfile').open('w') as fd: + fd.write(self.keyfile_blake2_key_file) + monkeypatch.setenv('BORG_PASSPHRASE', 'passphrase') + key = Blake2KeyfileKey.detect(self.MockRepository(), self.keyfile_blake2_cdata) + assert key.decrypt(self.keyfile_blake2_id, self.keyfile_blake2_cdata) == b'payload' def test_passphrase(self, keys_dir, monkeypatch): monkeypatch.setenv('BORG_PASSPHRASE', 'test') key = PassphraseKey.create(self.MockRepository(), None) - assert bytes_to_long(key.enc_cipher.iv, 8) == 0 + assert key.cipher.next_iv() == 0 assert hexlify(key.id_key) == b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6' assert hexlify(key.enc_hmac_key) == b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901' assert hexlify(key.enc_key) == b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a' assert key.chunk_seed == -775740477 - manifest = key.encrypt(Chunk(b'XXX')) - assert key.extract_nonce(manifest) == 0 - manifest2 = key.encrypt(Chunk(b'XXX')) + manifest = key.encrypt(b'ABC') + assert key.cipher.extract_iv(manifest) == 0 + manifest2 = key.encrypt(b'ABC') assert manifest != manifest2 assert key.decrypt(None, manifest) == key.decrypt(None, manifest2) - assert key.extract_nonce(manifest2) == 1 - iv = key.extract_nonce(manifest) + assert key.cipher.extract_iv(manifest2) == 1 + iv = key.cipher.extract_iv(manifest) key2 = PassphraseKey.detect(self.MockRepository(), manifest) - assert bytes_to_long(key2.enc_cipher.iv, 8) == iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD) + assert key2.cipher.next_iv() == iv + key2.cipher.block_count(len(manifest)) assert key.id_key == key2.id_key assert key.enc_hmac_key == key2.enc_hmac_key assert key.enc_key == key2.enc_key assert key.chunk_seed == key2.chunk_seed - chunk = Chunk(b'foo') - assert hexlify(key.id_hash(chunk.data)) == b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990' - assert chunk == key2.decrypt(key2.id_hash(chunk.data), key.encrypt(chunk)) + chunk = b'foo' + assert hexlify(key.id_hash(chunk)) == b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990' + assert chunk == key2.decrypt(key2.id_hash(chunk), key.encrypt(chunk)) def _corrupt_byte(self, key, data, offset): data = bytearray(data) - data[offset] += 1 - with pytest.raises(IntegrityError): - key.decrypt("", data) + data[offset] ^= 1 + with pytest.raises(IntegrityErrorBase): + key.decrypt(b'', data) def test_decrypt_integrity(self, monkeypatch, keys_dir): with keys_dir.join('keyfile').open('w') as fd: @@ -155,6 +229,56 @@ class TestKey: id[12] = 0 key.decrypt(id, data) + def test_roundtrip(self, key): + repository = key.repository + plaintext = b'foo' + encrypted = key.encrypt(plaintext) + identified_key_class = identify_key(encrypted) + assert identified_key_class == key.__class__ + loaded_key = identified_key_class.detect(repository, encrypted) + decrypted = loaded_key.decrypt(None, encrypted) + assert decrypted == plaintext + + def test_decrypt_decompress(self, key): + plaintext = b'123456789' + encrypted = key.encrypt(plaintext) + assert key.decrypt(None, encrypted, decompress=False) != plaintext + assert key.decrypt(None, encrypted) == plaintext + + def test_assert_id(self, key): + plaintext = b'123456789' + id = key.id_hash(plaintext) + key.assert_id(id, plaintext) + id_changed = bytearray(id) + id_changed[0] ^= 1 + with pytest.raises(IntegrityError): + key.assert_id(id_changed, plaintext) + plaintext_changed = plaintext + b'1' + with pytest.raises(IntegrityError): + key.assert_id(id, plaintext_changed) + + def test_authenticated_encrypt(self, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') + key = AuthenticatedKey.create(self.MockRepository(), self.MockArgs()) + assert AuthenticatedKey.id_hash is ID_HMAC_SHA_256.id_hash + assert len(key.id_key) == 32 + plaintext = b'123456789' + authenticated = key.encrypt(plaintext) + # 0x07 is the key TYPE, 0x0100 identifies LZ4 compression, 0x90 is part of LZ4 and means that an uncompressed + # block of length nine follows (the plaintext). + assert authenticated == b'\x07\x01\x00\x90' + plaintext + + def test_blake2_authenticated_encrypt(self, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') + key = Blake2AuthenticatedKey.create(self.MockRepository(), self.MockArgs()) + assert Blake2AuthenticatedKey.id_hash is ID_BLAKE2b_256.id_hash + assert len(key.id_key) == 128 + plaintext = b'123456789' + authenticated = key.encrypt(plaintext) + # 0x06 is the key TYPE, 0x0100 identifies LZ4 compression, 0x90 is part of LZ4 and means that an uncompressed + # block of length nine follows (the plaintext). + assert authenticated == b'\x06\x01\x00\x90' + plaintext + class TestPassphrase: def test_passphrase_new_verification(self, capsys, monkeypatch): @@ -195,3 +319,115 @@ class TestPassphrase: def test_passphrase_repr(self): assert "secret" not in repr(Passphrase("secret")) + + +class TestTAM: + @pytest.fixture + def key(self, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') + return KeyfileKey.create(TestKey.MockRepository(), TestKey.MockArgs()) + + def test_unpack_future(self, key): + blob = b'\xc1\xc1\xc1\xc1foobar' + with pytest.raises(UnsupportedManifestError): + key.unpack_and_verify_manifest(blob) + + blob = b'\xc1\xc1\xc1' + with pytest.raises((ValueError, msgpack.UnpackException)): + key.unpack_and_verify_manifest(blob) + + def test_missing_when_required(self, key): + blob = msgpack.packb({}) + with pytest.raises(TAMRequiredError): + key.unpack_and_verify_manifest(blob) + + def test_missing(self, key): + blob = msgpack.packb({}) + key.tam_required = False + unpacked, verified = key.unpack_and_verify_manifest(blob) + assert unpacked == {} + assert not verified + + def test_unknown_type_when_required(self, key): + blob = msgpack.packb({ + 'tam': { + 'type': 'HMAC_VOLLBIT', + }, + }) + with pytest.raises(TAMUnsupportedSuiteError): + key.unpack_and_verify_manifest(blob) + + def test_unknown_type(self, key): + blob = msgpack.packb({ + 'tam': { + 'type': 'HMAC_VOLLBIT', + }, + }) + key.tam_required = False + unpacked, verified = key.unpack_and_verify_manifest(blob) + assert unpacked == {} + assert not verified + + @pytest.mark.parametrize('tam, exc', ( + ({}, TAMUnsupportedSuiteError), + ({'type': b'\xff'}, TAMUnsupportedSuiteError), + (None, TAMInvalid), + (1234, TAMInvalid), + )) + def test_invalid(self, key, tam, exc): + blob = msgpack.packb({ + 'tam': tam, + }) + with pytest.raises(exc): + key.unpack_and_verify_manifest(blob) + + @pytest.mark.parametrize('hmac, salt', ( + ({}, bytes(64)), + (bytes(64), {}), + (None, bytes(64)), + (bytes(64), None), + )) + def test_wrong_types(self, key, hmac, salt): + data = { + 'tam': { + 'type': 'HKDF_HMAC_SHA512', + 'hmac': hmac, + 'salt': salt + }, + } + tam = data['tam'] + if hmac is None: + del tam['hmac'] + if salt is None: + del tam['salt'] + blob = msgpack.packb(data) + with pytest.raises(TAMInvalid): + key.unpack_and_verify_manifest(blob) + + def test_round_trip(self, key): + data = {'foo': 'bar'} + blob = key.pack_and_authenticate_metadata(data) + assert blob.startswith(b'\x82') + + unpacked = msgpack.unpackb(blob) + assert unpacked[b'tam'][b'type'] == b'HKDF_HMAC_SHA512' + + unpacked, verified = key.unpack_and_verify_manifest(blob) + assert verified + assert unpacked[b'foo'] == b'bar' + assert b'tam' not in unpacked + + @pytest.mark.parametrize('which', (b'hmac', b'salt')) + def test_tampered(self, key, which): + data = {'foo': 'bar'} + blob = key.pack_and_authenticate_metadata(data) + assert blob.startswith(b'\x82') + + unpacked = msgpack.unpackb(blob, object_hook=StableDict) + assert len(unpacked[b'tam'][which]) == 64 + unpacked[b'tam'][which] = unpacked[b'tam'][which][0:32] + bytes(32) + assert len(unpacked[b'tam'][which]) == 64 + blob = msgpack.packb(unpacked) + + with pytest.raises(TAMInvalid): + key.unpack_and_verify_manifest(blob) diff --git a/src/borg/testsuite/locking.py b/src/borg/testsuite/locking.py index bc62650d..64a79281 100644 --- a/src/borg/testsuite/locking.py +++ b/src/borg/testsuite/locking.py @@ -1,22 +1,26 @@ +import random import time import pytest -from ..locking import get_id, TimeoutTimer, ExclusiveLock, UpgradableLock, LockRoster, \ - ADD, REMOVE, SHARED, EXCLUSIVE, LockTimeout - +from ..helpers import daemonize +from ..platform import get_process_id, process_alive +from ..locking import TimeoutTimer, ExclusiveLock, Lock, LockRoster, \ + ADD, REMOVE, SHARED, EXCLUSIVE, LockTimeout, NotLocked, NotMyLock ID1 = "foo", 1, 1 ID2 = "bar", 2, 2 -def test_id(): - hostname, pid, tid = get_id() - assert isinstance(hostname, str) - assert isinstance(pid, int) - assert isinstance(tid, int) - assert len(hostname) > 0 - assert pid > 0 +@pytest.fixture() +def free_pid(): + """Return a free PID not used by any process (naturally this is racy)""" + host, pid, tid = get_process_id() + while True: + # PIDs are often restricted to a small range. On Linux the range >32k is by default not used. + pid = random.randint(33000, 65000) + if not process_alive(host, pid, tid): + return pid class TestTimeoutTimer: @@ -57,53 +61,129 @@ class TestExclusiveLock: with pytest.raises(LockTimeout): ExclusiveLock(lockpath, id=ID2, timeout=0.1).acquire() + def test_kill_stale(self, lockpath, free_pid): + host, pid, tid = our_id = get_process_id() + dead_id = host, free_pid, tid + cant_know_if_dead_id = 'foo.bar.example.net', 1, 2 -class TestUpgradableLock: + dead_lock = ExclusiveLock(lockpath, id=dead_id).acquire() + with ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True): + with pytest.raises(NotMyLock): + dead_lock.release() + with pytest.raises(NotLocked): + dead_lock.release() + + with ExclusiveLock(lockpath, id=cant_know_if_dead_id): + with pytest.raises(LockTimeout): + ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire() + + def test_migrate_lock(self, lockpath): + old_id, new_id = ID1, ID2 + assert old_id[1] != new_id[1] # different PIDs (like when doing daemonize()) + lock = ExclusiveLock(lockpath, id=old_id).acquire() + assert lock.id == old_id # lock is for old id / PID + old_unique_name = lock.unique_name + assert lock.by_me() # we have the lock + lock.migrate_lock(old_id, new_id) # fix the lock + assert lock.id == new_id # lock corresponds to the new id / PID + new_unique_name = lock.unique_name + assert lock.by_me() # we still have the lock + assert old_unique_name != new_unique_name # locking filename is different now + + +class TestLock: def test_shared(self, lockpath): - lock1 = UpgradableLock(lockpath, exclusive=False, id=ID1).acquire() - lock2 = UpgradableLock(lockpath, exclusive=False, id=ID2).acquire() + lock1 = Lock(lockpath, exclusive=False, id=ID1).acquire() + lock2 = Lock(lockpath, exclusive=False, id=ID2).acquire() assert len(lock1._roster.get(SHARED)) == 2 assert len(lock1._roster.get(EXCLUSIVE)) == 0 + assert not lock1._roster.empty(SHARED, EXCLUSIVE) + assert lock1._roster.empty(EXCLUSIVE) lock1.release() lock2.release() def test_exclusive(self, lockpath): - with UpgradableLock(lockpath, exclusive=True, id=ID1) as lock: + with Lock(lockpath, exclusive=True, id=ID1) as lock: assert len(lock._roster.get(SHARED)) == 0 assert len(lock._roster.get(EXCLUSIVE)) == 1 + assert not lock._roster.empty(SHARED, EXCLUSIVE) def test_upgrade(self, lockpath): - with UpgradableLock(lockpath, exclusive=False) as lock: + with Lock(lockpath, exclusive=False) as lock: lock.upgrade() lock.upgrade() # NOP assert len(lock._roster.get(SHARED)) == 0 assert len(lock._roster.get(EXCLUSIVE)) == 1 + assert not lock._roster.empty(SHARED, EXCLUSIVE) def test_downgrade(self, lockpath): - with UpgradableLock(lockpath, exclusive=True) as lock: + with Lock(lockpath, exclusive=True) as lock: lock.downgrade() lock.downgrade() # NOP assert len(lock._roster.get(SHARED)) == 1 assert len(lock._roster.get(EXCLUSIVE)) == 0 + def test_got_exclusive_lock(self, lockpath): + lock = Lock(lockpath, exclusive=True, id=ID1) + assert not lock.got_exclusive_lock() + lock.acquire() + assert lock.got_exclusive_lock() + lock.release() + assert not lock.got_exclusive_lock() + def test_break(self, lockpath): - lock = UpgradableLock(lockpath, exclusive=True, id=ID1).acquire() + lock = Lock(lockpath, exclusive=True, id=ID1).acquire() lock.break_lock() assert len(lock._roster.get(SHARED)) == 0 assert len(lock._roster.get(EXCLUSIVE)) == 0 - with UpgradableLock(lockpath, exclusive=True, id=ID2): + with Lock(lockpath, exclusive=True, id=ID2): pass def test_timeout(self, lockpath): - with UpgradableLock(lockpath, exclusive=False, id=ID1): + with Lock(lockpath, exclusive=False, id=ID1): with pytest.raises(LockTimeout): - UpgradableLock(lockpath, exclusive=True, id=ID2, timeout=0.1).acquire() - with UpgradableLock(lockpath, exclusive=True, id=ID1): + Lock(lockpath, exclusive=True, id=ID2, timeout=0.1).acquire() + with Lock(lockpath, exclusive=True, id=ID1): with pytest.raises(LockTimeout): - UpgradableLock(lockpath, exclusive=False, id=ID2, timeout=0.1).acquire() - with UpgradableLock(lockpath, exclusive=True, id=ID1): + Lock(lockpath, exclusive=False, id=ID2, timeout=0.1).acquire() + with Lock(lockpath, exclusive=True, id=ID1): with pytest.raises(LockTimeout): - UpgradableLock(lockpath, exclusive=True, id=ID2, timeout=0.1).acquire() + Lock(lockpath, exclusive=True, id=ID2, timeout=0.1).acquire() + + def test_kill_stale(self, lockpath, free_pid): + host, pid, tid = our_id = get_process_id() + dead_id = host, free_pid, tid + cant_know_if_dead_id = 'foo.bar.example.net', 1, 2 + + dead_lock = Lock(lockpath, id=dead_id, exclusive=True).acquire() + roster = dead_lock._roster + with Lock(lockpath, id=our_id, kill_stale_locks=True): + assert roster.get(EXCLUSIVE) == set() + assert roster.get(SHARED) == {our_id} + assert roster.get(EXCLUSIVE) == set() + assert roster.get(SHARED) == set() + with pytest.raises(KeyError): + dead_lock.release() + + with Lock(lockpath, id=cant_know_if_dead_id, exclusive=True): + with pytest.raises(LockTimeout): + Lock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire() + + def test_migrate_lock(self, lockpath): + old_id, new_id = ID1, ID2 + assert old_id[1] != new_id[1] # different PIDs (like when doing daemonize()) + + lock = Lock(lockpath, id=old_id, exclusive=True).acquire() + assert lock.id == old_id + lock.migrate_lock(old_id, new_id) # fix the lock + assert lock.id == new_id + lock.release() + + lock = Lock(lockpath, id=old_id, exclusive=False).acquire() + assert lock.id == old_id + lock.migrate_lock(old_id, new_id) # fix the lock + assert lock.id == new_id + lock.release() @pytest.fixture() @@ -132,3 +212,39 @@ class TestLockRoster: roster2 = LockRoster(rosterpath, id=ID2) roster2.modify(SHARED, REMOVE) assert roster2.get(SHARED) == set() + + def test_kill_stale(self, rosterpath, free_pid): + host, pid, tid = our_id = get_process_id() + dead_id = host, free_pid, tid + + roster1 = LockRoster(rosterpath, id=dead_id) + assert roster1.get(SHARED) == set() + roster1.modify(SHARED, ADD) + assert roster1.get(SHARED) == {dead_id} + + cant_know_if_dead_id = 'foo.bar.example.net', 1, 2 + roster1 = LockRoster(rosterpath, id=cant_know_if_dead_id) + assert roster1.get(SHARED) == {dead_id} + roster1.modify(SHARED, ADD) + assert roster1.get(SHARED) == {dead_id, cant_know_if_dead_id} + + killer_roster = LockRoster(rosterpath, kill_stale_locks=True) + # Did kill the dead processes lock (which was alive ... I guess?!) + assert killer_roster.get(SHARED) == {cant_know_if_dead_id} + killer_roster.modify(SHARED, ADD) + assert killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id} + + other_killer_roster = LockRoster(rosterpath, kill_stale_locks=True) + # Did not kill us, since we're alive + assert other_killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id} + + def test_migrate_lock(self, rosterpath): + old_id, new_id = ID1, ID2 + assert old_id[1] != new_id[1] # different PIDs (like when doing daemonize()) + roster = LockRoster(rosterpath, id=old_id) + assert roster.id == old_id + roster.modify(SHARED, ADD) + assert roster.get(SHARED) == {old_id} + roster.migrate_lock(SHARED, old_id, new_id) # fix the lock + assert roster.id == new_id + assert roster.get(SHARED) == {new_id} diff --git a/src/borg/testsuite/lrucache.py b/src/borg/testsuite/lrucache.py index 9fb4f92b..eea171d6 100644 --- a/src/borg/testsuite/lrucache.py +++ b/src/borg/testsuite/lrucache.py @@ -19,7 +19,10 @@ class TestLRUCache: assert 'b' in c with pytest.raises(KeyError): c['a'] + assert c.get('a') is None + assert c.get('a', 'foo') == 'foo' assert c['b'] == 1 + assert c.get('b') == 1 assert c['c'] == 2 c['d'] = 3 assert len(c) == 2 diff --git a/src/borg/testsuite/nanorst.py b/src/borg/testsuite/nanorst.py new file mode 100644 index 00000000..06543609 --- /dev/null +++ b/src/borg/testsuite/nanorst.py @@ -0,0 +1,41 @@ + +import pytest + +from ..nanorst import rst_to_text + + +def test_inline(): + assert rst_to_text('*foo* and ``bar``.') == 'foo and bar.' + + +def test_inline_spread(): + assert rst_to_text('*foo and bar, thusly\nfoobar*.') == 'foo and bar, thusly\nfoobar.' + + +def test_comment_inline(): + assert rst_to_text('Foo and Bar\n.. foo\nbar') == 'Foo and Bar\n.. foo\nbar' + + +def test_inline_escape(): + assert rst_to_text('Such as "\\*" characters.') == 'Such as "*" characters.' + + +def test_comment(): + assert rst_to_text('Foo and Bar\n\n.. foo\nbar') == 'Foo and Bar\n\nbar' + + +def test_directive_note(): + assert rst_to_text('.. note::\n Note this and that') == 'Note:\n Note this and that' + + +def test_ref(): + references = { + 'foo': 'baz' + } + assert rst_to_text('See :ref:`fo\no`.', references=references) == 'See baz.' + + +def test_undefined_ref(): + with pytest.raises(ValueError) as exc_info: + rst_to_text('See :ref:`foo`.') + assert 'Undefined reference' in str(exc_info.value) diff --git a/src/borg/testsuite/nonces.py b/src/borg/testsuite/nonces.py new file mode 100644 index 00000000..d0bc85ea --- /dev/null +++ b/src/borg/testsuite/nonces.py @@ -0,0 +1,198 @@ +import os.path + +import pytest + +from ..crypto import nonces +from ..crypto.nonces import NonceManager +from ..crypto.key import bin_to_hex +from ..helpers import get_security_dir +from ..remote import InvalidRPCMethod + + +class TestNonceManager: + + class MockRepository: + class _Location: + orig = '/some/place' + + _location = _Location() + id = bytes(32) + id_str = bin_to_hex(id) + + def get_free_nonce(self): + return self.next_free + + def commit_nonce_reservation(self, next_unreserved, start_nonce): + assert start_nonce == self.next_free + self.next_free = next_unreserved + + class MockOldRepository(MockRepository): + def get_free_nonce(self): + raise InvalidRPCMethod("") + + def commit_nonce_reservation(self, next_unreserved, start_nonce): + pytest.fail("commit_nonce_reservation should never be called on an old repository") + + def setUp(self): + self.repository = None + + def cache_nonce(self): + with open(os.path.join(get_security_dir(self.repository.id_str), 'nonce'), "r") as fd: + return fd.read() + + def set_cache_nonce(self, nonce): + with open(os.path.join(get_security_dir(self.repository.id_str), 'nonce'), "w") as fd: + assert fd.write(nonce) + + def test_empty_cache_and_old_server(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockOldRepository() + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x2000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + + def test_empty_cache(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockRepository() + self.repository.next_free = 0x2000 + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x2000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + + def test_empty_nonce(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockRepository() + self.repository.next_free = None + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x2000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x2033 + + # enough space in reservation + next_nonce = manager.ensure_reservation(0x2013, 13) + assert next_nonce == 0x2013 + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x2033 + + # just barely enough space in reservation + next_nonce = manager.ensure_reservation(0x2020, 19) + assert next_nonce == 0x2020 + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x2033 + + # no space in reservation + next_nonce = manager.ensure_reservation(0x2033, 16) + assert next_nonce == 0x2033 + assert self.cache_nonce() == "0000000000002063" + assert self.repository.next_free == 0x2063 + + # spans reservation boundary + next_nonce = manager.ensure_reservation(0x2043, 64) + assert next_nonce == 0x2063 + assert self.cache_nonce() == "00000000000020c3" + assert self.repository.next_free == 0x20c3 + + def test_sync_nonce(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockRepository() + self.repository.next_free = 0x2000 + self.set_cache_nonce("0000000000002000") + + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x2000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x2033 + + def test_server_just_upgraded(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockRepository() + self.repository.next_free = None + self.set_cache_nonce("0000000000002000") + + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x2000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x2033 + + def test_transaction_abort_no_cache(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockRepository() + self.repository.next_free = 0x2000 + + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x1000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x2033 + + def test_transaction_abort_old_server(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockOldRepository() + self.set_cache_nonce("0000000000002000") + + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x1000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + + def test_transaction_abort_on_other_client(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockRepository() + self.repository.next_free = 0x2000 + self.set_cache_nonce("0000000000001000") + + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x1000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x2033 + + def test_interleaved(self, monkeypatch): + monkeypatch.setattr(nonces, 'NONCE_SPACE_RESERVATION', 0x20) + + self.repository = self.MockRepository() + self.repository.next_free = 0x2000 + self.set_cache_nonce("0000000000002000") + + manager = NonceManager(self.repository, 0x2000) + next_nonce = manager.ensure_reservation(0x2000, 19) + assert next_nonce == 0x2000 + + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x2033 + + # somehow the clients unlocks, another client reserves and this client relocks + self.repository.next_free = 0x4000 + + # enough space in reservation + next_nonce = manager.ensure_reservation(0x2013, 12) + assert next_nonce == 0x2013 + assert self.cache_nonce() == "0000000000002033" + assert self.repository.next_free == 0x4000 + + # spans reservation boundary + next_nonce = manager.ensure_reservation(0x201f, 21) + assert next_nonce == 0x4000 + assert self.cache_nonce() == "0000000000004035" + assert self.repository.next_free == 0x4035 diff --git a/src/borg/testsuite/patterns.py b/src/borg/testsuite/patterns.py new file mode 100644 index 00000000..ff447888 --- /dev/null +++ b/src/borg/testsuite/patterns.py @@ -0,0 +1,467 @@ +import argparse +import io +import os.path +import sys + +import pytest + +from ..patterns import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern +from ..patterns import load_exclude_file, load_pattern_file +from ..patterns import parse_pattern, PatternMatcher + + +def check_patterns(files, pattern, expected): + """Utility for testing patterns. + """ + assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths" + + matched = [f for f in files if pattern.match(f)] + + assert matched == (files if expected is None else expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/", []), + ("/home", ["/home"]), + ("/home///", ["/home"]), + ("/./home", ["/home"]), + ("/home/user", ["/home/user"]), + ("/home/user2", ["/home/user2"]), + ("/home/user/.bashrc", ["/home/user/.bashrc"]), + ]) +def test_patterns_full(pattern, expected): + files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ] + + check_patterns(files, PathFullPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", []), + ("relative", []), + ("relative/path/", ["relative/path"]), + ("relative/path", ["relative/path"]), + ]) +def test_patterns_full_relative(pattern, expected): + files = ["relative/path", "relative/path2", ] + + check_patterns(files, PathFullPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/", None), + ("/./", None), + ("", []), + ("/home/u", []), + ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc", ["/etc/server/config", "/etc/server/hosts"]), + ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv", ["/srv/messages", "/srv/dmesg"]), + ]) +def test_patterns_prefix(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", + ] + + check_patterns(files, PathPrefixPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", []), + ("foo", []), + ("relative", ["relative/path1", "relative/two"]), + ("more", ["more/relative"]), + ]) +def test_patterns_prefix_relative(pattern, expected): + files = ["relative/path1", "relative/two", "more/relative"] + + check_patterns(files, PathPrefixPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/*", None), + ("/./*", None), + ("*", None), + ("*/*", None), + ("*///*", None), + ("/home/u", []), + ("/home/*", + ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]), + ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), + ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv*", ["/srv/messages", "/srv/dmesg"]), + ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ]) +def test_patterns_fnmatch(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] + + check_patterns(files, FnmatchPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("*", None), + ("**/*", None), + ("/**/*", None), + ("/./*", None), + ("*/*", None), + ("*///*", None), + ("/home/u", []), + ("/home/*", + ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]), + ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]), + ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]), + ("*/.pr????e", []), + ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), + ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]), + ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]), + ("/srv/*", ["/srv/messages", "/srv/dmesg"]), + ("/srv2/**", ["/srv2", "/srv2/blafasel"]), + ("/srv2/**/", ["/srv2/blafasel"]), + ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]), + ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]), + ]) +def test_patterns_shell(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg", + "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] + + check_patterns(files, ShellPattern(pattern), expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", None), + (".*", None), + ("^/", None), + ("^abc$", []), + ("^[^/]", []), + ("^(?!/srv|/foo|/opt)", + ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", + "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]), + ]) +def test_patterns_regex(pattern, expected): + files = [ + '/srv/data', '/foo/bar', '/home', + '/home/user/.profile', '/home/user/.bashrc', + '/home/user2/.profile', '/home/user2/public_html/index.html', + '/opt/log/messages.txt', '/opt/log/dmesg.txt', + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] + + obj = RegexPattern(pattern) + assert str(obj) == pattern + assert obj.pattern == pattern + + check_patterns(files, obj, expected) + + +def test_regex_pattern(): + # The forward slash must match the platform-specific path separator + assert RegexPattern("^/$").match("/") + assert RegexPattern("^/$").match(os.path.sep) + assert not RegexPattern(r"^\\$").match("/") + + +def use_normalized_unicode(): + return sys.platform in ("darwin",) + + +def _make_test_patterns(pattern): + return [PathPrefixPattern(pattern), + FnmatchPattern(pattern), + RegexPattern("^{}/foo$".format(pattern)), + ShellPattern(pattern), + ] + + +@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}")) +def test_composed_unicode_pattern(pattern): + assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode() + + +@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}")) +def test_decomposed_unicode_pattern(pattern): + assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode() + assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + +@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1"))) +def test_invalid_unicode_pattern(pattern): + assert not pattern.match("ba/foo") + assert pattern.match(str(b"ba\x80/foo", "latin1")) + + +@pytest.mark.parametrize("lines, expected", [ + # "None" means all files, i.e. none excluded + ([], None), + (["# Comment only"], None), + (["*"], []), + (["# Comment", + "*/something00.txt", + " *whitespace* ", + # Whitespace before comment + " #/ws*", + # Empty line + "", + "# EOF"], + ["/more/data", "/home", " #/wsfoobar"]), + (["re:.*"], []), + (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]), + ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]), + (["", "", "", + "# This is a test with mixed pattern styles", + # Case-insensitive pattern + "re:(?i)BAR|ME$", + "", + "*whitespace*", + "fm:*/something00*"], + ["/more/data"]), + ([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]), + ([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]), + (["pp:./"], None), + (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]), + (["pp:aaabbb"], None), + (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), + (["/nomatch", "/more/*"], + ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), + # the order of exclude patterns shouldn't matter + (["/more/*", "/nomatch"], + ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), + ]) +def test_exclude_patterns_from_file(tmpdir, lines, expected): + files = [ + '/data/something00.txt', '/more/data', '/home', + ' #/wsfoobar', + '\tstart/whitespace', + '/whitespace/end\t', + ] + + def evaluate(filename): + patterns = [] + load_exclude_file(open(filename, "rt"), patterns) + matcher = PatternMatcher(fallback=True) + matcher.add_inclexcl(patterns) + return [path for path in files if matcher.match(path)] + + exclfile = tmpdir.join("exclude.txt") + + with exclfile.open("wt") as fh: + fh.write("\n".join(lines)) + + assert evaluate(str(exclfile)) == (files if expected is None else expected) + + +@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [ + # "None" means all files, i.e. none excluded + ([], [], 0), + (["# Comment only"], [], 0), + (["- *"], [], 1), + (["+fm:*/something00.txt", + "-/data"], [], 2), + (["R /"], ["/"], 0), + (["R /", + "# comment"], ["/"], 0), + (["# comment", + "- /data", + "R /home"], ["/home"], 1), +]) +def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns): + def evaluate(filename): + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + return roots, len(inclexclpatterns) + patternfile = tmpdir.join("patterns.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + roots, numpatterns = evaluate(str(patternfile)) + assert roots == expected_roots + assert numpatterns == expected_numpatterns + + +def test_switch_patterns_style(): + patterns = """\ + +0_initial_default_is_shell + p fm + +1_fnmatch + P re + +2_regex + +3_more_regex + P pp + +4_pathprefix + p fm + p sh + +5_shell + """ + pattern_file = io.StringIO(patterns) + roots, patterns = [], [] + load_pattern_file(pattern_file, roots, patterns) + assert len(patterns) == 6 + assert isinstance(patterns[0].val, ShellPattern) + assert isinstance(patterns[1].val, FnmatchPattern) + assert isinstance(patterns[2].val, RegexPattern) + assert isinstance(patterns[3].val, RegexPattern) + assert isinstance(patterns[4].val, PathPrefixPattern) + assert isinstance(patterns[5].val, ShellPattern) + + +@pytest.mark.parametrize("lines", [ + (["X /data"]), # illegal pattern type prefix + (["/data"]), # need a pattern type prefix +]) +def test_load_invalid_patterns_from_file(tmpdir, lines): + patternfile = tmpdir.join("patterns.txt") + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + filename = str(patternfile) + with pytest.raises(argparse.ArgumentTypeError): + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + + +@pytest.mark.parametrize("lines, expected", [ + # "None" means all files, i.e. none excluded + ([], None), + (["# Comment only"], None), + (["- *"], []), + # default match type is sh: for patterns -> * doesn't match a / + (["-*/something0?.txt"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something00.txt"], + ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something0?.txt"], + ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+/*/something0?.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+fm:*/something00.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + # include /home/leo and exclude the rest of /home: + (["+/home/leo", + "-/home/*"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), + # wrong order, /home/leo is already excluded by -/home/*: + (["-/home/*", + "+/home/leo"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']), + (["+fm:/home/leo", + "-/home/"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), +]) +def test_inclexcl_patterns_from_file(tmpdir, lines, expected): + files = [ + '/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other' + ] + + def evaluate(filename): + matcher = PatternMatcher(fallback=True) + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + matcher.add_inclexcl(inclexclpatterns) + return [path for path in files if matcher.match(path)] + + patternfile = tmpdir.join("patterns.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + assert evaluate(str(patternfile)) == (files if expected is None else expected) + + +@pytest.mark.parametrize("pattern, cls", [ + ("", FnmatchPattern), + + # Default style + ("*", FnmatchPattern), + ("/data/*", FnmatchPattern), + + # fnmatch style + ("fm:", FnmatchPattern), + ("fm:*", FnmatchPattern), + ("fm:/data/*", FnmatchPattern), + ("fm:fm:/data/*", FnmatchPattern), + + # Regular expression + ("re:", RegexPattern), + ("re:.*", RegexPattern), + ("re:^/something/", RegexPattern), + ("re:re:^/something/", RegexPattern), + + # Path prefix + ("pp:", PathPrefixPattern), + ("pp:/", PathPrefixPattern), + ("pp:/data/", PathPrefixPattern), + ("pp:pp:/data/", PathPrefixPattern), + + # Shell-pattern style + ("sh:", ShellPattern), + ("sh:*", ShellPattern), + ("sh:/data/*", ShellPattern), + ("sh:sh:/data/*", ShellPattern), + ]) +def test_parse_pattern(pattern, cls): + assert isinstance(parse_pattern(pattern), cls) + + +@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"]) +def test_parse_pattern_error(pattern): + with pytest.raises(ValueError): + parse_pattern(pattern) + + +def test_pattern_matcher(): + pm = PatternMatcher() + + assert pm.fallback is None + + for i in ["", "foo", "bar"]: + assert pm.match(i) is None + + # add extra entries to aid in testing + for target in ["A", "B", "Empty", "FileNotFound"]: + pm.is_include_cmd[target] = target + + pm.add([RegexPattern("^a")], "A") + pm.add([RegexPattern("^b"), RegexPattern("^z")], "B") + pm.add([RegexPattern("^$")], "Empty") + pm.fallback = "FileNotFound" + + assert pm.match("") == "Empty" + assert pm.match("aaa") == "A" + assert pm.match("bbb") == "B" + assert pm.match("ccc") == "FileNotFound" + assert pm.match("xyz") == "FileNotFound" + assert pm.match("z") == "B" + + assert PatternMatcher(fallback="hey!").fallback == "hey!" diff --git a/src/borg/testsuite/platform.py b/src/borg/testsuite/platform.py index 991c98b8..3d301457 100644 --- a/src/borg/testsuite/platform.py +++ b/src/borg/testsuite/platform.py @@ -1,11 +1,17 @@ +import functools import os +import random import shutil import sys import tempfile +if sys.platform != 'win32': + import pwd import unittest from ..platform import acl_get, acl_set, swidth -from . import BaseTestCase +from ..platform import get_process_id, process_alive +from . import BaseTestCase, unopened_tempfile +from .locking import free_pid ACCESS_ACL = """ @@ -30,11 +36,41 @@ mask::rw- other::r-- """.strip().encode('ascii') +_acls_working = None + def fakeroot_detected(): return 'FAKEROOTKEY' in os.environ +def user_exists(username): + if sys.platform == 'win32': + return True + try: + pwd.getpwnam(username) + return True + except (KeyError, ValueError): + return False + + +@functools.lru_cache() +def are_acls_working(): + with unopened_tempfile() as filepath: + open(filepath, 'w').close() + try: + access = b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n' + acl = {'acl_access': access} + acl_set(filepath, acl) + read_acl = {} + acl_get(filepath, read_acl, os.stat(filepath)) + read_acl_access = read_acl.get('acl_access', None) + if read_acl_access and b'user::rw-' in read_acl_access: + return True + except PermissionError: + pass + return False + + @unittest.skipUnless(sys.platform.startswith('linux'), 'linux only test') @unittest.skipIf(fakeroot_detected(), 'not compatible with fakeroot') class PlatformLinuxTestCase(BaseTestCase): @@ -54,6 +90,7 @@ class PlatformLinuxTestCase(BaseTestCase): item = {'acl_access': access, 'acl_default': default} acl_set(path, item, numeric_owner=numeric_owner) + @unittest.skipIf(not are_acls_working(), 'ACLs do not work') def test_access_acl(self): file = tempfile.NamedTemporaryFile() self.assert_equal(self.get_acl(file.name), {}) @@ -66,12 +103,15 @@ class PlatformLinuxTestCase(BaseTestCase): self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)['acl_access']) self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)['acl_access']) + @unittest.skipIf(not are_acls_working(), 'ACLs do not work') def test_default_acl(self): self.assert_equal(self.get_acl(self.tmpdir), {}) self.set_acl(self.tmpdir, access=ACCESS_ACL, default=DEFAULT_ACL) self.assert_equal(self.get_acl(self.tmpdir)['acl_access'], ACCESS_ACL) self.assert_equal(self.get_acl(self.tmpdir)['acl_default'], DEFAULT_ACL) + @unittest.skipIf(not user_exists('übel'), 'requires übel user') + @unittest.skipIf(not are_acls_working(), 'ACLs do not work') def test_non_ascii_acl(self): # Testing non-ascii ACL processing to see whether our code is robust. # I have no idea whether non-ascii ACLs are allowed by the standard, @@ -128,6 +168,7 @@ class PlatformDarwinTestCase(BaseTestCase): item = {'acl_extended': acl} acl_set(path, item, numeric_owner=numeric_owner) + @unittest.skipIf(not are_acls_working(), 'ACLs do not work') def test_access_acl(self): file = tempfile.NamedTemporaryFile() file2 = tempfile.NamedTemporaryFile() @@ -151,3 +192,22 @@ class PlatformPosixTestCase(BaseTestCase): def test_swidth_mixed(self): self.assert_equal(swidth("borgバックアップ"), 4 + 6 * 2) + + +def test_process_alive(free_pid): + id = get_process_id() + assert process_alive(*id) + host, pid, tid = id + assert process_alive(host + 'abc', pid, tid) + assert process_alive(host, pid, tid + 1) + assert not process_alive(host, free_pid, tid) + + +def test_process_id(): + hostname, pid, tid = get_process_id() + assert isinstance(hostname, str) + assert isinstance(pid, int) + assert isinstance(tid, int) + assert len(hostname) > 0 + assert pid > 0 + assert get_process_id() == (hostname, pid, tid) diff --git a/src/borg/testsuite/remote.py b/src/borg/testsuite/remote.py new file mode 100644 index 00000000..d9117717 --- /dev/null +++ b/src/borg/testsuite/remote.py @@ -0,0 +1,201 @@ +import errno +import os +import io +import time +from unittest.mock import patch + +import pytest + +from ..remote import SleepingBandwidthLimiter, RepositoryCache, cache_if_remote +from ..repository import Repository +from ..crypto.key import PlaintextKey +from ..compress import CompressionSpec +from ..helpers import IntegrityError +from .hashindex import H +from .key import TestKey + + +class TestSleepingBandwidthLimiter: + def expect_write(self, fd, data): + self.expected_fd = fd + self.expected_data = data + + def check_write(self, fd, data): + assert fd == self.expected_fd + assert data == self.expected_data + return len(data) + + def test_write_unlimited(self, monkeypatch): + monkeypatch.setattr(os, "write", self.check_write) + + it = SleepingBandwidthLimiter(0) + self.expect_write(5, b"test") + it.write(5, b"test") + + def test_write(self, monkeypatch): + monkeypatch.setattr(os, "write", self.check_write) + monkeypatch.setattr(time, "monotonic", lambda: now) + monkeypatch.setattr(time, "sleep", lambda x: None) + + now = 100 + + it = SleepingBandwidthLimiter(100) + + # all fits + self.expect_write(5, b"test") + it.write(5, b"test") + + # only partial write + self.expect_write(5, b"123456") + it.write(5, b"1234567890") + + # sleeps + self.expect_write(5, b"123456") + it.write(5, b"123456") + + # long time interval between writes + now += 10 + self.expect_write(5, b"1") + it.write(5, b"1") + + # long time interval between writes, filling up quota + now += 10 + self.expect_write(5, b"1") + it.write(5, b"1") + + # long time interval between writes, filling up quota to clip to maximum + now += 10 + self.expect_write(5, b"1") + it.write(5, b"1") + + +class TestRepositoryCache: + @pytest.yield_fixture + def repository(self, tmpdir): + self.repository_location = os.path.join(str(tmpdir), 'repository') + with Repository(self.repository_location, exclusive=True, create=True) as repository: + repository.put(H(1), b'1234') + repository.put(H(2), b'5678') + repository.put(H(3), bytes(100)) + yield repository + + @pytest.fixture + def cache(self, repository): + return RepositoryCache(repository) + + def test_simple(self, cache: RepositoryCache): + # Single get()s are not cached, since they are used for unique objects like archives. + assert cache.get(H(1)) == b'1234' + assert cache.misses == 1 + assert cache.hits == 0 + + assert list(cache.get_many([H(1)])) == [b'1234'] + assert cache.misses == 2 + assert cache.hits == 0 + + assert list(cache.get_many([H(1)])) == [b'1234'] + assert cache.misses == 2 + assert cache.hits == 1 + + assert cache.get(H(1)) == b'1234' + assert cache.misses == 2 + assert cache.hits == 2 + + def test_backoff(self, cache: RepositoryCache): + def query_size_limit(): + cache.size_limit = 0 + + assert list(cache.get_many([H(1), H(2)])) == [b'1234', b'5678'] + assert cache.misses == 2 + assert cache.evictions == 0 + iterator = cache.get_many([H(1), H(3), H(2)]) + assert next(iterator) == b'1234' + + # Force cache to back off + qsl = cache.query_size_limit + cache.query_size_limit = query_size_limit + cache.backoff() + cache.query_size_limit = qsl + # Evicted H(1) and H(2) + assert cache.evictions == 2 + assert H(1) not in cache.cache + assert H(2) not in cache.cache + assert next(iterator) == bytes(100) + assert cache.slow_misses == 0 + # Since H(2) was in the cache when we called get_many(), but has + # been evicted during iterating the generator, it will be a slow miss. + assert next(iterator) == b'5678' + assert cache.slow_misses == 1 + + def test_enospc(self, cache: RepositoryCache): + class enospc_open: + def __init__(self, *args): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def write(self, data): + raise OSError(errno.ENOSPC, 'foo') + + def truncate(self, n=None): + pass + + iterator = cache.get_many([H(1), H(2), H(3)]) + assert next(iterator) == b'1234' + + with patch('builtins.open', enospc_open): + assert next(iterator) == b'5678' + assert cache.enospc == 1 + # We didn't patch query_size_limit which would set size_limit to some low + # value, so nothing was actually evicted. + assert cache.evictions == 0 + + assert next(iterator) == bytes(100) + + @pytest.fixture + def key(self, repository, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') + key = PlaintextKey.create(repository, TestKey.MockArgs()) + key.compressor = CompressionSpec('none').compressor + return key + + def _put_encrypted_object(self, key, repository, data): + id_ = key.id_hash(data) + repository.put(id_, key.encrypt(data)) + return id_ + + @pytest.fixture + def H1(self, key, repository): + return self._put_encrypted_object(key, repository, b'1234') + + @pytest.fixture + def H2(self, key, repository): + return self._put_encrypted_object(key, repository, b'5678') + + @pytest.fixture + def H3(self, key, repository): + return self._put_encrypted_object(key, repository, bytes(100)) + + @pytest.fixture + def decrypted_cache(self, key, repository): + return cache_if_remote(repository, decrypted_cache=key, force_cache=True) + + def test_cache_corruption(self, decrypted_cache: RepositoryCache, H1, H2, H3): + list(decrypted_cache.get_many([H1, H2, H3])) + + iterator = decrypted_cache.get_many([H1, H2, H3]) + assert next(iterator) == (7, b'1234') + + with open(decrypted_cache.key_filename(H2), 'a+b') as fd: + fd.seek(-1, io.SEEK_END) + corrupted = (int.from_bytes(fd.read(), 'little') ^ 2).to_bytes(1, 'little') + fd.seek(-1, io.SEEK_END) + fd.write(corrupted) + fd.truncate() + + with pytest.raises(IntegrityError): + assert next(iterator) == (7, b'5678') diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index 11a2370f..25a1a7fd 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -6,20 +6,31 @@ import sys import tempfile from unittest.mock import patch +import msgpack + +import pytest + from ..hashindex import NSIndex from ..helpers import Location from ..helpers import IntegrityError -from ..locking import UpgradableLock, LockFailed -from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line -from ..repository import Repository, LoggedIO, MAGIC +from ..locking import Lock, LockFailed +from ..remote import RemoteRepository, InvalidRPCMethod, PathNotAllowed, ConnectionClosedWithHint, handle_remote_line +from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE from . import BaseTestCase +from .hashindex import H + + +UNSPECIFIED = object() # for default values where we can't use None class RepositoryTestCaseBase(BaseTestCase): key_size = 32 + exclusive = True - def open(self, create=False): - return Repository(os.path.join(self.tmppath, 'repository'), create=create) + def open(self, create=False, exclusive=UNSPECIFIED): + if exclusive is UNSPECIFIED: + exclusive = self.exclusive + return Repository(os.path.join(self.tmppath, 'repository'), exclusive=exclusive, create=create) def setUp(self): self.tmppath = tempfile.mkdtemp() @@ -30,18 +41,27 @@ class RepositoryTestCaseBase(BaseTestCase): self.repository.close() shutil.rmtree(self.tmppath) - def reopen(self): + def reopen(self, exclusive=UNSPECIFIED): if self.repository: self.repository.close() - self.repository = self.open() + self.repository = self.open(exclusive=exclusive) + + def add_keys(self): + self.repository.put(H(0), b'foo') + self.repository.put(H(1), b'bar') + self.repository.put(H(3), b'bar') + self.repository.commit() + self.repository.put(H(1), b'bar2') + self.repository.put(H(2), b'boo') + self.repository.delete(H(3)) class RepositoryTestCase(RepositoryTestCaseBase): def test1(self): for x in range(100): - self.repository.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') - key50 = ('%-32d' % 50).encode('ascii') + self.repository.put(H(x), b'SOMEDATA') + key50 = H(50) self.assert_equal(self.repository.get(key50), b'SOMEDATA') self.repository.delete(key50) self.assert_raises(Repository.ObjectNotFound, lambda: self.repository.get(key50)) @@ -52,69 +72,70 @@ class RepositoryTestCase(RepositoryTestCaseBase): for x in range(100): if x == 50: continue - self.assert_equal(repository2.get(('%-32d' % x).encode('ascii')), b'SOMEDATA') + self.assert_equal(repository2.get(H(x)), b'SOMEDATA') def test2(self): """Test multiple sequential transactions """ - self.repository.put(b'00000000000000000000000000000000', b'foo') - self.repository.put(b'00000000000000000000000000000001', b'foo') + self.repository.put(H(0), b'foo') + self.repository.put(H(1), b'foo') self.repository.commit() - self.repository.delete(b'00000000000000000000000000000000') - self.repository.put(b'00000000000000000000000000000001', b'bar') + self.repository.delete(H(0)) + self.repository.put(H(1), b'bar') self.repository.commit() - self.assert_equal(self.repository.get(b'00000000000000000000000000000001'), b'bar') + self.assert_equal(self.repository.get(H(1)), b'bar') def test_consistency(self): """Test cache consistency """ - self.repository.put(b'00000000000000000000000000000000', b'foo') - self.assert_equal(self.repository.get(b'00000000000000000000000000000000'), b'foo') - self.repository.put(b'00000000000000000000000000000000', b'foo2') - self.assert_equal(self.repository.get(b'00000000000000000000000000000000'), b'foo2') - self.repository.put(b'00000000000000000000000000000000', b'bar') - self.assert_equal(self.repository.get(b'00000000000000000000000000000000'), b'bar') - self.repository.delete(b'00000000000000000000000000000000') - self.assert_raises(Repository.ObjectNotFound, lambda: self.repository.get(b'00000000000000000000000000000000')) + self.repository.put(H(0), b'foo') + self.assert_equal(self.repository.get(H(0)), b'foo') + self.repository.put(H(0), b'foo2') + self.assert_equal(self.repository.get(H(0)), b'foo2') + self.repository.put(H(0), b'bar') + self.assert_equal(self.repository.get(H(0)), b'bar') + self.repository.delete(H(0)) + self.assert_raises(Repository.ObjectNotFound, lambda: self.repository.get(H(0))) def test_consistency2(self): """Test cache consistency2 """ - self.repository.put(b'00000000000000000000000000000000', b'foo') - self.assert_equal(self.repository.get(b'00000000000000000000000000000000'), b'foo') + self.repository.put(H(0), b'foo') + self.assert_equal(self.repository.get(H(0)), b'foo') self.repository.commit() - self.repository.put(b'00000000000000000000000000000000', b'foo2') - self.assert_equal(self.repository.get(b'00000000000000000000000000000000'), b'foo2') + self.repository.put(H(0), b'foo2') + self.assert_equal(self.repository.get(H(0)), b'foo2') self.repository.rollback() - self.assert_equal(self.repository.get(b'00000000000000000000000000000000'), b'foo') + self.assert_equal(self.repository.get(H(0)), b'foo') def test_overwrite_in_same_transaction(self): """Test cache consistency2 """ - self.repository.put(b'00000000000000000000000000000000', b'foo') - self.repository.put(b'00000000000000000000000000000000', b'foo2') + self.repository.put(H(0), b'foo') + self.repository.put(H(0), b'foo2') self.repository.commit() - self.assert_equal(self.repository.get(b'00000000000000000000000000000000'), b'foo2') + self.assert_equal(self.repository.get(H(0)), b'foo2') def test_single_kind_transactions(self): # put - self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.put(H(0), b'foo') self.repository.commit() self.repository.close() # replace self.repository = self.open() with self.repository: - self.repository.put(b'00000000000000000000000000000000', b'bar') + self.repository.put(H(0), b'bar') self.repository.commit() # delete self.repository = self.open() with self.repository: - self.repository.delete(b'00000000000000000000000000000000') + self.repository.delete(H(0)) self.repository.commit() def test_list(self): for x in range(100): - self.repository.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.repository.put(H(x), b'SOMEDATA') + self.repository.commit() all = self.repository.list() self.assert_equal(len(all), 100) first_half = self.repository.list(limit=50) @@ -125,6 +146,30 @@ class RepositoryTestCase(RepositoryTestCaseBase): self.assert_equal(second_half, all[50:]) self.assert_equal(len(self.repository.list(limit=50)), 50) + def test_scan(self): + for x in range(100): + self.repository.put(H(x), b'SOMEDATA') + self.repository.commit() + all = self.repository.scan() + assert len(all) == 100 + first_half = self.repository.scan(limit=50) + assert len(first_half) == 50 + assert first_half == all[:50] + second_half = self.repository.scan(marker=first_half[-1]) + assert len(second_half) == 50 + assert second_half == all[50:] + assert len(self.repository.scan(limit=50)) == 50 + # check result order == on-disk order (which is hash order) + for x in range(100): + assert all[x] == H(x) + + def test_max_data_size(self): + max_data = b'x' * MAX_DATA_SIZE + self.repository.put(H(0), max_data) + self.assert_equal(self.repository.get(H(0)), max_data) + self.assert_raises(IntegrityError, + lambda: self.repository.put(H(1), max_data + b'x')) + class LocalRepositoryTestCase(RepositoryTestCaseBase): # test case that doesn't work with remote repositories @@ -138,22 +183,22 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase): assert self.repository.compact[0] == 41 + 9 def test_sparse1(self): - self.repository.put(b'00000000000000000000000000000000', b'foo') - self.repository.put(b'00000000000000000000000000000001', b'123456789') + self.repository.put(H(0), b'foo') + self.repository.put(H(1), b'123456789') self.repository.commit() - self.repository.put(b'00000000000000000000000000000001', b'bar') + self.repository.put(H(1), b'bar') self._assert_sparse() def test_sparse2(self): - self.repository.put(b'00000000000000000000000000000000', b'foo') - self.repository.put(b'00000000000000000000000000000001', b'123456789') + self.repository.put(H(0), b'foo') + self.repository.put(H(1), b'123456789') self.repository.commit() - self.repository.delete(b'00000000000000000000000000000001') + self.repository.delete(H(1)) self._assert_sparse() def test_sparse_delete(self): - self.repository.put(b'00000000000000000000000000000000', b'1245') - self.repository.delete(b'00000000000000000000000000000000') + self.repository.put(H(0), b'1245') + self.repository.delete(H(0)) self.repository.io._write_fd.sync() # The on-line tracking works on a per-object basis... @@ -165,18 +210,26 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase): self.repository.commit() assert 0 not in [segment for segment, _ in self.repository.io.segment_iterator()] + def test_uncommitted_garbage(self): + # uncommitted garbage should be no problem, it is cleaned up automatically. + # we just have to be careful with invalidation of cached FDs in LoggedIO. + self.repository.put(H(0), b'foo') + self.repository.commit() + # write some crap to a uncommitted segment file + last_segment = self.repository.io.get_latest_segment() + with open(self.repository.io.segment_filename(last_segment + 1), 'wb') as f: + f.write(MAGIC + b'crapcrapcrap') + self.repository.close() + # usually, opening the repo and starting a transaction should trigger a cleanup. + self.repository = self.open() + with self.repository: + self.repository.put(H(0), b'bar') # this may trigger compact_segments() + self.repository.commit() + # the point here is that nothing blows up with an exception. + class RepositoryCommitTestCase(RepositoryTestCaseBase): - def add_keys(self): - self.repository.put(b'00000000000000000000000000000000', b'foo') - self.repository.put(b'00000000000000000000000000000001', b'bar') - self.repository.put(b'00000000000000000000000000000003', b'bar') - self.repository.commit() - self.repository.put(b'00000000000000000000000000000001', b'bar2') - self.repository.put(b'00000000000000000000000000000002', b'boo') - self.repository.delete(b'00000000000000000000000000000003') - def test_replay_of_missing_index(self): self.add_keys() for name in os.listdir(self.repository.path): @@ -199,17 +252,6 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.assert_equal(len(self.repository), 3) self.assert_equal(self.repository.check(), True) - def test_replay_of_readonly_repository(self): - self.add_keys() - for name in os.listdir(self.repository.path): - if name.startswith('index.'): - os.unlink(os.path.join(self.repository.path, name)) - with patch.object(UpgradableLock, 'upgrade', side_effect=LockFailed) as upgrade: - self.reopen() - with self.repository: - self.assert_raises(LockFailed, lambda: len(self.repository)) - upgrade.assert_called_once_with() - def test_crash_before_write_index(self): self.add_keys() self.repository.write_index = None @@ -222,6 +264,32 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.assert_equal(len(self.repository), 3) self.assert_equal(self.repository.check(), True) + def test_replay_lock_upgrade_old(self): + self.add_keys() + for name in os.listdir(self.repository.path): + if name.startswith('index.'): + os.unlink(os.path.join(self.repository.path, name)) + with patch.object(Lock, 'upgrade', side_effect=LockFailed) as upgrade: + self.reopen(exclusive=None) # simulate old client that always does lock upgrades + with self.repository: + # the repo is only locked by a shared read lock, but to replay segments, + # we need an exclusive write lock - check if the lock gets upgraded. + self.assert_raises(LockFailed, lambda: len(self.repository)) + upgrade.assert_called_once_with() + + def test_replay_lock_upgrade(self): + self.add_keys() + for name in os.listdir(self.repository.path): + if name.startswith('index.'): + os.unlink(os.path.join(self.repository.path, name)) + with patch.object(Lock, 'upgrade', side_effect=LockFailed) as upgrade: + self.reopen(exclusive=False) # current client usually does not do lock upgrade, except for replay + with self.repository: + # the repo is only locked by a shared read lock, but to replay segments, + # we need an exclusive write lock - check if the lock gets upgraded. + self.assert_raises(LockFailed, lambda: len(self.repository)) + upgrade.assert_called_once_with() + def test_crash_before_deleting_compacted_segments(self): self.add_keys() self.repository.io.delete_segment = None @@ -236,16 +304,88 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.assert_equal(len(self.repository), 3) def test_ignores_commit_tag_in_data(self): - self.repository.put(b'0' * 32, LoggedIO.COMMIT) + self.repository.put(H(0), LoggedIO.COMMIT) self.reopen() with self.repository: io = self.repository.io assert not io.is_committed_segment(io.get_latest_segment()) + def test_moved_deletes_are_tracked(self): + self.repository.put(H(1), b'1') + self.repository.put(H(2), b'2') + self.repository.commit() + self.repository.delete(H(1)) + self.repository.commit() + last_segment = self.repository.io.get_latest_segment() - 1 + num_deletes = 0 + for tag, key, offset, size in self.repository.io.iter_objects(last_segment): + if tag == TAG_DELETE: + assert key == H(1) + num_deletes += 1 + assert num_deletes == 1 + assert last_segment in self.repository.compact + self.repository.put(H(3), b'3') + self.repository.commit() + assert last_segment not in self.repository.compact + assert not self.repository.io.segment_exists(last_segment) + for segment, _ in self.repository.io.segment_iterator(): + for tag, key, offset, size in self.repository.io.iter_objects(segment): + assert tag != TAG_DELETE + + def test_shadowed_entries_are_preserved(self): + get_latest_segment = self.repository.io.get_latest_segment + self.repository.put(H(1), b'1') + # This is the segment with our original PUT of interest + put_segment = get_latest_segment() + self.repository.commit() + + # We now delete H(1), and force this segment to not be compacted, which can happen + # if it's not sparse enough (symbolized by H(2) here). + self.repository.delete(H(1)) + self.repository.put(H(2), b'1') + delete_segment = get_latest_segment() + + # We pretend these are mostly dense (not sparse) and won't be compacted + del self.repository.compact[put_segment] + del self.repository.compact[delete_segment] + + self.repository.commit() + + # Now we perform an unrelated operation on the segment containing the DELETE, + # causing it to be compacted. + self.repository.delete(H(2)) + self.repository.commit() + + assert self.repository.io.segment_exists(put_segment) + assert not self.repository.io.segment_exists(delete_segment) + + # Basic case, since the index survived this must be ok + assert H(1) not in self.repository + # Nuke index, force replay + os.unlink(os.path.join(self.repository.path, 'index.%d' % get_latest_segment())) + # Must not reappear + assert H(1) not in self.repository + + def test_shadow_index_rollback(self): + self.repository.put(H(1), b'1') + self.repository.delete(H(1)) + assert self.repository.shadow_index[H(1)] == [0] + self.repository.commit() + # note how an empty list means that nothing is shadowed for sure + assert self.repository.shadow_index[H(1)] == [] + self.repository.put(H(1), b'1') + self.repository.delete(H(1)) + # 0 put/delete; 1 commit; 2 compacted; 3 commit; 4 put/delete + assert self.repository.shadow_index[H(1)] == [4] + self.repository.rollback() + self.repository.put(H(2), b'1') + # After the rollback segment 4 shouldn't be considered anymore + assert self.repository.shadow_index[H(1)] == [] + class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): def open(self, create=False): - return Repository(os.path.join(self.tmppath, 'repository'), create=create, append_only=True) + return Repository(os.path.join(self.tmppath, 'repository'), exclusive=True, create=create, append_only=True) def test_destroy_append_only(self): # Can't destroy append only repo (via the API) @@ -256,34 +396,133 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): def test_append_only(self): def segments_in_repository(): return len(list(self.repository.io.segment_iterator())) - self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.put(H(0), b'foo') self.repository.commit() self.repository.append_only = False assert segments_in_repository() == 2 - self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.put(H(0), b'foo') self.repository.commit() # normal: compact squashes the data together, only one segment assert segments_in_repository() == 4 self.repository.append_only = True assert segments_in_repository() == 4 - self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.put(H(0), b'foo') self.repository.commit() # append only: does not compact, only new segments written assert segments_in_repository() == 6 +class RepositoryFreeSpaceTestCase(RepositoryTestCaseBase): + def test_additional_free_space(self): + self.add_keys() + self.repository.config.set('repository', 'additional_free_space', '1000T') + self.repository.save_key(b'shortcut to save_config') + self.reopen() + + with self.repository: + self.repository.put(H(0), b'foobar') + with pytest.raises(Repository.InsufficientFreeSpaceError): + self.repository.commit() + assert os.path.exists(self.repository.path) + + def test_create_free_space(self): + self.repository.additional_free_space = 1e20 + with pytest.raises(Repository.InsufficientFreeSpaceError): + self.add_keys() + assert not os.path.exists(self.repository.path) + + +class QuotaTestCase(RepositoryTestCaseBase): + def test_tracking(self): + assert self.repository.storage_quota_use == 0 + self.repository.put(H(1), bytes(1234)) + assert self.repository.storage_quota_use == 1234 + 41 + self.repository.put(H(2), bytes(5678)) + assert self.repository.storage_quota_use == 1234 + 5678 + 2 * 41 + self.repository.delete(H(1)) + assert self.repository.storage_quota_use == 5678 + 41 + self.repository.commit() + self.reopen() + with self.repository: + # Open new transaction; hints and thus quota data is not loaded unless needed. + self.repository.put(H(3), b'') + self.repository.delete(H(3)) + assert self.repository.storage_quota_use == 5678 + 41 + + def test_exceed_quota(self): + assert self.repository.storage_quota_use == 0 + self.repository.storage_quota = 50 + self.repository.put(H(1), b'') + assert self.repository.storage_quota_use == 41 + self.repository.commit() + with pytest.raises(Repository.StorageQuotaExceeded): + self.repository.put(H(2), b'') + assert self.repository.storage_quota_use == 82 + with pytest.raises(Repository.StorageQuotaExceeded): + self.repository.commit() + assert self.repository.storage_quota_use == 82 + self.reopen() + with self.repository: + self.repository.storage_quota = 50 + # Open new transaction; hints and thus quota data is not loaded unless needed. + self.repository.put(H(1), b'') + assert self.repository.storage_quota_use == 41 + + +class NonceReservation(RepositoryTestCaseBase): + def test_get_free_nonce_asserts(self): + self.reopen(exclusive=False) + with pytest.raises(AssertionError): + with self.repository: + self.repository.get_free_nonce() + + def test_get_free_nonce(self): + with self.repository: + assert self.repository.get_free_nonce() is None + + with open(os.path.join(self.repository.path, "nonce"), "w") as fd: + fd.write("0000000000000000") + assert self.repository.get_free_nonce() == 0 + + with open(os.path.join(self.repository.path, "nonce"), "w") as fd: + fd.write("5000000000000000") + assert self.repository.get_free_nonce() == 0x5000000000000000 + + def test_commit_nonce_reservation_asserts(self): + self.reopen(exclusive=False) + with pytest.raises(AssertionError): + with self.repository: + self.repository.commit_nonce_reservation(0x200, 0x100) + + def test_commit_nonce_reservation(self): + with self.repository: + with pytest.raises(Exception): + self.repository.commit_nonce_reservation(0x200, 15) + + self.repository.commit_nonce_reservation(0x200, None) + with open(os.path.join(self.repository.path, "nonce"), "r") as fd: + assert fd.read() == "0000000000000200" + + with pytest.raises(Exception): + self.repository.commit_nonce_reservation(0x200, 15) + + self.repository.commit_nonce_reservation(0x400, 0x200) + with open(os.path.join(self.repository.path, "nonce"), "r") as fd: + assert fd.read() == "0000000000000400" + + class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): def setUp(self): super().setUp() - self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.put(H(0), b'foo') self.repository.commit() self.repository.close() def do_commit(self): with self.repository: - self.repository.put(b'00000000000000000000000000000000', b'fox') + self.repository.put(H(0), b'fox') self.repository.commit() def test_corrupted_hints(self): @@ -317,6 +556,42 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): with self.repository: assert len(self.repository) == 1 + def _corrupt_index(self): + # HashIndex is able to detect incorrect headers and file lengths, + # but on its own it can't tell if the data is correct. + index_path = os.path.join(self.repository.path, 'index.1') + with open(index_path, 'r+b') as fd: + index_data = fd.read() + # Flip one bit in a key stored in the index + corrupted_key = (int.from_bytes(H(0), 'little') ^ 1).to_bytes(32, 'little') + corrupted_index_data = index_data.replace(H(0), corrupted_key) + assert corrupted_index_data != index_data + assert len(corrupted_index_data) == len(index_data) + fd.seek(0) + fd.write(corrupted_index_data) + + def test_index_corrupted(self): + # HashIndex is able to detect incorrect headers and file lengths, + # but on its own it can't tell if the data itself is correct. + self._corrupt_index() + with self.repository: + # Data corruption is detected due to mismatching checksums + # and fixed by rebuilding the index. + assert len(self.repository) == 1 + assert self.repository.get(H(0)) == b'foo' + + def test_index_corrupted_without_integrity(self): + self._corrupt_index() + integrity_path = os.path.join(self.repository.path, 'integrity.1') + os.unlink(integrity_path) + with self.repository: + # Since the corrupted key is not noticed, the repository still thinks + # it contains one key... + assert len(self.repository) == 1 + with pytest.raises(Repository.ObjectNotFound): + # ... but the real, uncorrupted key is not found in the corrupted index. + self.repository.get(H(0)) + def test_unreadable_index(self): index = os.path.join(self.repository.path, 'index.1') os.unlink(index) @@ -324,6 +599,66 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): with self.assert_raises(OSError): self.do_commit() + def test_unknown_integrity_version(self): + # For now an unknown integrity data version is ignored and not an error. + integrity_path = os.path.join(self.repository.path, 'integrity.1') + with open(integrity_path, 'r+b') as fd: + msgpack.pack({ + # Borg only understands version 2 + b'version': 4.7, + }, fd) + fd.truncate() + with self.repository: + # No issues accessing the repository + assert len(self.repository) == 1 + assert self.repository.get(H(0)) == b'foo' + + def _subtly_corrupted_hints_setup(self): + with self.repository: + self.repository.append_only = True + assert len(self.repository) == 1 + assert self.repository.get(H(0)) == b'foo' + self.repository.put(H(1), b'bar') + self.repository.put(H(2), b'baz') + self.repository.commit() + self.repository.put(H(2), b'bazz') + self.repository.commit() + + hints_path = os.path.join(self.repository.path, 'hints.5') + with open(hints_path, 'r+b') as fd: + hints = msgpack.unpack(fd) + fd.seek(0) + # Corrupt segment refcount + assert hints[b'segments'][2] == 1 + hints[b'segments'][2] = 0 + msgpack.pack(hints, fd) + fd.truncate() + + def test_subtly_corrupted_hints(self): + self._subtly_corrupted_hints_setup() + with self.repository: + self.repository.append_only = False + self.repository.put(H(3), b'1234') + # Do a compaction run. Succeeds, since the failed checksum prompted a rebuild of the index+hints. + self.repository.commit() + + assert len(self.repository) == 4 + assert self.repository.get(H(0)) == b'foo' + assert self.repository.get(H(1)) == b'bar' + assert self.repository.get(H(2)) == b'bazz' + + def test_subtly_corrupted_hints_without_integrity(self): + self._subtly_corrupted_hints_setup() + integrity_path = os.path.join(self.repository.path, 'integrity.5') + os.unlink(integrity_path) + with self.repository: + self.repository.append_only = False + self.repository.put(H(3), b'1234') + # Do a compaction run. Fails, since the corrupted refcount was not detected and leads to an assertion failure. + with pytest.raises(AssertionError) as exc_info: + self.repository.commit() + assert 'Corrupted segment reference count' in str(exc_info.value) + class RepositoryCheckTestCase(RepositoryTestCaseBase): @@ -337,12 +672,12 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): def get_objects(self, *ids): for id_ in ids: - self.repository.get(('%032d' % id_).encode('ascii')) + self.repository.get(H(id_)) def add_objects(self, segments): for ids in segments: for id_ in ids: - self.repository.put(('%032d' % id_).encode('ascii'), b'data') + self.repository.put(H(id_), b'data') self.repository.commit() def get_head(self): @@ -353,7 +688,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): def corrupt_object(self, id_): idx = self.open_index() - segment, offset = idx[('%032d' % id_).encode('ascii')] + segment, offset = idx[H(id_)] with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)), 'r+b') as fd: fd.seek(offset) fd.write(b'BOOM') @@ -444,28 +779,84 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) def test_crash_before_compact(self): - self.repository.put(bytes(32), b'data') - self.repository.put(bytes(32), b'data2') + self.repository.put(H(0), b'data') + self.repository.put(H(0), b'data2') # Simulate a crash before compact with patch.object(Repository, 'compact_segments') as compact: self.repository.commit() - compact.assert_called_once_with(save_space=False) + compact.assert_called_once_with() self.reopen() with self.repository: self.check(repair=True) - self.assert_equal(self.repository.get(bytes(32)), b'data2') + self.assert_equal(self.repository.get(H(0)), b'data2') class RemoteRepositoryTestCase(RepositoryTestCase): + repository = None # type: RemoteRepository def open(self, create=False): if sys.platform != 'win32': - return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), + exclusive=True, create=create) else: - return RemoteRepository(Location(os.path.join(self.tmppath, 'repository')), create=create) + return RemoteRepository(Location(os.path.join(self.tmppath, 'repository')), exclusive=True, create=create) def test_invalid_rpc(self): - self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None)) + self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', {})) + + def test_rpc_exception_transport(self): + s1 = 'test string' + + try: + self.repository.call('inject_exception', {'kind': 'DoesNotExist'}) + except Repository.DoesNotExist as e: + assert len(e.args) == 1 + assert e.args[0] == self.repository.location.orig + + try: + self.repository.call('inject_exception', {'kind': 'AlreadyExists'}) + except Repository.AlreadyExists as e: + assert len(e.args) == 1 + assert e.args[0] == self.repository.location.orig + + try: + self.repository.call('inject_exception', {'kind': 'CheckNeeded'}) + except Repository.CheckNeeded as e: + assert len(e.args) == 1 + assert e.args[0] == self.repository.location.orig + + try: + self.repository.call('inject_exception', {'kind': 'IntegrityError'}) + except IntegrityError as e: + assert len(e.args) == 1 + assert e.args[0] == s1 + + try: + self.repository.call('inject_exception', {'kind': 'PathNotAllowed'}) + except PathNotAllowed as e: + assert len(e.args) == 1 + assert e.args[0] == 'foo' + + try: + self.repository.call('inject_exception', {'kind': 'ObjectNotFound'}) + except Repository.ObjectNotFound as e: + assert len(e.args) == 2 + assert e.args[0] == s1 + assert e.args[1] == self.repository.location.orig + + try: + self.repository.call('inject_exception', {'kind': 'InvalidRPCMethod'}) + except InvalidRPCMethod as e: + assert len(e.args) == 1 + assert e.args[0] == s1 + + try: + self.repository.call('inject_exception', {'kind': 'divide'}) + except RemoteRepository.RPCError as e: + assert e.unpacked + assert e.get_message() == 'ZeroDivisionError: integer division or modulo by zero\n' + assert e.exception_class == 'ZeroDivisionError' + assert len(e.exception_full) > 0 def test_ssh_cmd(self): assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', 'example.com'] @@ -479,20 +870,62 @@ class RemoteRepositoryTestCase(RepositoryTestCase): class MockArgs: remote_path = 'borg' umask = 0o077 + debug_topics = [] + + def __contains__(self, item): + # To behave like argparse.Namespace + return hasattr(self, item) assert self.repository.borg_cmd(None, testing=True) == [sys.executable, '-m', 'borg.archiver', 'serve'] args = MockArgs() + # XXX without next line we get spurious test fails when using pytest-xdist, root cause unknown: + logging.getLogger().setLevel(logging.INFO) # note: test logger is on info log level, so --info gets added automagically assert self.repository.borg_cmd(args, testing=False) == ['borg', 'serve', '--umask=077', '--info'] args.remote_path = 'borg-0.28.2' assert self.repository.borg_cmd(args, testing=False) == ['borg-0.28.2', 'serve', '--umask=077', '--info'] + args.debug_topics = ['something_client_side', 'repository_compaction'] + assert self.repository.borg_cmd(args, testing=False) == ['borg-0.28.2', 'serve', '--umask=077', '--info', + '--debug-topic=borg.debug.repository_compaction'] + args = MockArgs() + args.storage_quota = 0 + assert self.repository.borg_cmd(args, testing=False) == ['borg', 'serve', '--umask=077', '--info'] + args.storage_quota = 314159265 + assert self.repository.borg_cmd(args, testing=False) == ['borg', 'serve', '--umask=077', '--info', + '--storage-quota=314159265'] + + +class RemoteLegacyFree(RepositoryTestCaseBase): + # Keep testing this so we can someday safely remove the legacy tuple format. + + def open(self, create=False): + with patch.object(RemoteRepository, 'dictFormat', True): + return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), + exclusive=True, create=create) + + def test_legacy_free(self): + # put + self.repository.put(H(0), b'foo') + self.repository.commit() + self.repository.close() + # replace + self.repository = self.open() + with self.repository: + self.repository.put(H(0), b'bar') + self.repository.commit() + # delete + self.repository = self.open() + with self.repository: + self.repository.delete(H(0)) + self.repository.commit() class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): def open(self, create=False): if sys.platform != 'win32': - return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), + exclusive=True, create=create) else: return RemoteRepository(Location(os.path.join(self.tmppath, 'repository')), create=create) @@ -517,16 +950,22 @@ class RemoteLoggerTestCase(BaseTestCase): sys.stderr = self.old_stderr def test_stderr_messages(self): - handle_remote_line("unstructured stderr message") + handle_remote_line("unstructured stderr message\n") self.assert_equal(self.stream.getvalue(), '') # stderr messages don't get an implicit newline - self.assert_equal(self.stderr.getvalue(), 'Remote: unstructured stderr message') + self.assert_equal(self.stderr.getvalue(), 'Remote: unstructured stderr message\n') + + def test_stderr_progress_messages(self): + handle_remote_line("unstructured stderr progress message\r") + self.assert_equal(self.stream.getvalue(), '') + # stderr messages don't get an implicit newline + self.assert_equal(self.stderr.getvalue(), 'Remote: unstructured stderr progress message\r') def test_pre11_format_messages(self): self.handler.setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) - handle_remote_line("$LOG INFO Remote: borg < 1.1 format message") + handle_remote_line("$LOG INFO Remote: borg < 1.1 format message\n") self.assert_equal(self.stream.getvalue(), 'Remote: borg < 1.1 format message\n') self.assert_equal(self.stderr.getvalue(), '') @@ -534,7 +973,7 @@ class RemoteLoggerTestCase(BaseTestCase): self.handler.setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) - handle_remote_line("$LOG INFO borg.repository Remote: borg >= 1.1 format message") + handle_remote_line("$LOG INFO borg.repository Remote: borg >= 1.1 format message\n") self.assert_equal(self.stream.getvalue(), 'Remote: borg >= 1.1 format message\n') self.assert_equal(self.stderr.getvalue(), '') @@ -543,7 +982,7 @@ class RemoteLoggerTestCase(BaseTestCase): self.handler.setLevel(logging.WARNING) logging.getLogger().setLevel(logging.WARNING) - handle_remote_line("$LOG INFO borg.repository Remote: new format info message") + handle_remote_line("$LOG INFO borg.repository Remote: new format info message\n") self.assert_equal(self.stream.getvalue(), '') self.assert_equal(self.stderr.getvalue(), '') @@ -563,7 +1002,7 @@ class RemoteLoggerTestCase(BaseTestCase): foo_handler.setLevel(logging.INFO) logging.getLogger('borg.repository.foo').handlers[:] = [foo_handler] - handle_remote_line("$LOG INFO borg.repository Remote: new format child message") + handle_remote_line("$LOG INFO borg.repository Remote: new format child message\n") self.assert_equal(foo_stream.getvalue(), '') self.assert_equal(child_stream.getvalue(), 'Remote: new format child message\n') self.assert_equal(self.stream.getvalue(), '') diff --git a/src/borg/testsuite/shellpattern.py b/src/borg/testsuite/shellpattern.py index a84de5d3..bd3682bc 100644 --- a/src/borg/testsuite/shellpattern.py +++ b/src/borg/testsuite/shellpattern.py @@ -114,3 +114,14 @@ def test_match(path, patterns): def test_mismatch(path, patterns): for p in patterns: assert not check(path, p) + + +def test_match_end(): + regex = shellpattern.translate("*-home") # default is match_end == string end + assert re.match(regex, '2017-07-03-home') + assert not re.match(regex, '2017-07-03-home.checkpoint') + + match_end = r'(%s)?\Z' % r'\.checkpoint(\.\d+)?' # with/without checkpoint ending + regex = shellpattern.translate("*-home", match_end=match_end) + assert re.match(regex, '2017-07-03-home') + assert re.match(regex, '2017-07-03-home.checkpoint') diff --git a/src/borg/testsuite/upgrader.py b/src/borg/testsuite/upgrader.py index 088ee63b..08c0693b 100644 --- a/src/borg/testsuite/upgrader.py +++ b/src/borg/testsuite/upgrader.py @@ -1,19 +1,36 @@ import os +import tarfile import pytest -try: - import attic.repository - import attic.key - import attic.helpers -except ImportError: - attic = None - from ..constants import * # NOQA +from ..crypto.key import KeyfileKey from ..upgrader import AtticRepositoryUpgrader, AtticKeyfileKey from ..helpers import get_keys_dir -from ..key import KeyfileKey from ..repository import Repository +from . import are_hardlinks_supported + + +# tar with a repo and repo keyfile from attic +ATTIC_TAR = os.path.join(os.path.dirname(__file__), 'attic.tar.gz') + + +def untar(tarfname, path, what): + """ + extract tar archive to , all stuff starting with . + + return path to . + """ + + def files(members): + for tarinfo in members: + if tarinfo.name.startswith(what): + yield tarinfo + + with tarfile.open(tarfname, 'r') as tf: + tf.extractall(path, members=files(tf)) + + return os.path.join(path, what) def repo_valid(path): @@ -23,7 +40,7 @@ def repo_valid(path): :param path: the path to the repository :returns: if borg can check the repository """ - with Repository(str(path), create=False) as repository: + with Repository(str(path), exclusive=True, create=False) as repository: # can't check raises() because check() handles the error return repository.check() @@ -47,15 +64,10 @@ def attic_repo(tmpdir): create an attic repo with some stuff in it :param tmpdir: path to the repository to be created - :returns: a attic.repository.Repository object + :returns: path to attic repository """ - attic_repo = attic.repository.Repository(str(tmpdir), create=True) - # throw some stuff in that repo, copied from `RepositoryTestCase.test1` - for x in range(100): - attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') - attic_repo.commit() - attic_repo.close() - return attic_repo + # there is some stuff in that repo, copied from `RepositoryTestCase.test1` + return untar(ATTIC_TAR, str(tmpdir), 'repo') @pytest.fixture(params=[True, False]) @@ -63,100 +75,82 @@ def inplace(request): return request.param -@pytest.mark.skipif(attic is None, reason='cannot find an attic install') -def test_convert_segments(tmpdir, attic_repo, inplace): +def test_convert_segments(attic_repo, inplace): """test segment conversion this will load the given attic repository, list all the segments then convert them one at a time. we need to close the repo before conversion otherwise we have errors from borg - :param tmpdir: a temporary directory to run the test in (builtin - fixture) :param attic_repo: a populated attic repository (fixture) """ - # check should fail because of magic number - assert not repo_valid(tmpdir) - repository = AtticRepositoryUpgrader(str(tmpdir), create=False) + repo_path = attic_repo + with pytest.raises(Repository.AtticRepository): + repo_valid(repo_path) + repository = AtticRepositoryUpgrader(repo_path, create=False) with repository: segments = [filename for i, filename in repository.io.segment_iterator()] repository.convert_segments(segments, dryrun=False, inplace=inplace) repository.convert_cache(dryrun=False) - assert repo_valid(tmpdir) - - -class MockArgs: - """ - mock attic location - - this is used to simulate a key location with a properly loaded - repository object to create a key file - """ - def __init__(self, path): - self.repository = attic.helpers.Location(path) + assert repo_valid(repo_path) @pytest.fixture() -def attic_key_file(attic_repo, tmpdir): +def attic_key_file(tmpdir, monkeypatch): """ create an attic key file from the given repo, in the keys subdirectory of the given tmpdir - :param attic_repo: an attic.repository.Repository object (fixture - define above) :param tmpdir: a temporary directory (a builtin fixture) - :returns: the KeyfileKey object as returned by - attic.key.KeyfileKey.create() + :returns: path to key file """ - keys_dir = str(tmpdir.mkdir('keys')) + keys_dir = untar(ATTIC_TAR, str(tmpdir), 'keys') # we use the repo dir for the created keyfile, because we do # not want to clutter existing keyfiles - os.environ['ATTIC_KEYS_DIR'] = keys_dir + monkeypatch.setenv('ATTIC_KEYS_DIR', keys_dir) # we use the same directory for the converted files, which # will clutter the previously created one, which we don't care # about anyways. in real runs, the original key will be retained. - os.environ['BORG_KEYS_DIR'] = keys_dir - os.environ['ATTIC_PASSPHRASE'] = 'test' - return attic.key.KeyfileKey.create(attic_repo, - MockArgs(keys_dir)) + monkeypatch.setenv('BORG_KEYS_DIR', keys_dir) + monkeypatch.setenv('ATTIC_PASSPHRASE', 'test') + + return os.path.join(keys_dir, 'repo') -@pytest.mark.skipif(attic is None, reason='cannot find an attic install') -def test_keys(tmpdir, attic_repo, attic_key_file): +def test_keys(attic_repo, attic_key_file): """test key conversion test that we can convert the given key to a properly formatted borg key. assumes that the ATTIC_KEYS_DIR and BORG_KEYS_DIR have been properly populated by the attic_key_file fixture. - :param tmpdir: a temporary directory (a builtin fixture) - :param attic_repo: an attic.repository.Repository object (fixture - define above) - :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + :param attic_repo: path to an attic repository (fixture defined above) + :param attic_key_file: path to an attic key file (fixture defined above) """ - with AtticRepositoryUpgrader(str(tmpdir), create=False) as repository: + keyfile_path = attic_key_file + assert not key_valid(keyfile_path) # not upgraded yet + with AtticRepositoryUpgrader(attic_repo, create=False) as repository: keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryUpgrader.convert_keyfiles(keyfile, dryrun=False) - assert key_valid(attic_key_file.path) + assert key_valid(keyfile_path) -@pytest.mark.skipif(attic is None, reason='cannot find an attic install') -def test_convert_all(tmpdir, attic_repo, attic_key_file, inplace): +def test_convert_all(attic_repo, attic_key_file, inplace): """test all conversion steps this runs everything. mostly redundant test, since everything is done above. yet we expect a NotImplementedError because we do not convert caches yet. - :param tmpdir: a temporary directory (a builtin fixture) - :param attic_repo: an attic.repository.Repository object (fixture - define above) - :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + :param attic_repo: path to an attic repository (fixture defined above) + :param attic_key_file: path to an attic key file (fixture defined above) """ - # check should fail because of magic number - assert not repo_valid(tmpdir) + repo_path = attic_repo + + with pytest.raises(Repository.AtticRepository): + repo_valid(repo_path) def stat_segment(path): return os.stat(os.path.join(path, 'data', '0', '0')) @@ -164,8 +158,8 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file, inplace): def first_inode(path): return stat_segment(path).st_ino - orig_inode = first_inode(attic_repo.path) - with AtticRepositoryUpgrader(str(tmpdir), create=False) as repository: + orig_inode = first_inode(repo_path) + with AtticRepositoryUpgrader(repo_path, create=False) as repository: # replicate command dispatch, partly os.umask(UMASK_DEFAULT) backup = repository.upgrade(dryrun=False, inplace=inplace) @@ -177,12 +171,14 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file, inplace): assert first_inode(repository.path) != first_inode(backup) # i have seen cases where the copied tree has world-readable # permissions, which is wrong - assert stat_segment(backup).st_mode & UMASK_DEFAULT == 0 + if 'BORG_TESTS_IGNORE_MODES' not in os.environ: + assert stat_segment(backup).st_mode & UMASK_DEFAULT == 0 - assert key_valid(attic_key_file.path) - assert repo_valid(tmpdir) + assert key_valid(attic_key_file) + assert repo_valid(repo_path) +@pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported') def test_hardlink(tmpdir, inplace): """test that we handle hard links properly diff --git a/src/borg/testsuite/version.py b/src/borg/testsuite/version.py new file mode 100644 index 00000000..d17dee0e --- /dev/null +++ b/src/borg/testsuite/version.py @@ -0,0 +1,53 @@ +import pytest + +from ..version import parse_version, format_version + + +@pytest.mark.parametrize("version_str, version_tuple", [ + # setuptools < 8.0 uses "-" + ('1.0.0a1.dev204-g8866961.d20170606', (1, 0, 0, -4, 1)), + ('1.0.0a1.dev204-g8866961', (1, 0, 0, -4, 1)), + ('1.0.0-d20170606', (1, 0, 0, -1)), + # setuptools >= 8.0 uses "+" + ('1.0.0a1.dev204+g8866961.d20170606', (1, 0, 0, -4, 1)), + ('1.0.0a1.dev204+g8866961', (1, 0, 0, -4, 1)), + ('1.0.0+d20170606', (1, 0, 0, -1)), + # pre-release versions: + ('1.0.0a1', (1, 0, 0, -4, 1)), + ('1.0.0a2', (1, 0, 0, -4, 2)), + ('1.0.0b3', (1, 0, 0, -3, 3)), + ('1.0.0rc4', (1, 0, 0, -2, 4)), + # release versions: + ('0.0.0', (0, 0, 0, -1)), + ('0.0.11', (0, 0, 11, -1)), + ('0.11.0', (0, 11, 0, -1)), + ('11.0.0', (11, 0, 0, -1)), +]) +def test_parse_version(version_str, version_tuple): + assert parse_version(version_str) == version_tuple + + +def test_parse_version_invalid(): + with pytest.raises(ValueError): + assert parse_version('') # we require x.y.z versions + with pytest.raises(ValueError): + assert parse_version('1') # we require x.y.z versions + with pytest.raises(ValueError): + assert parse_version('1.2') # we require x.y.z versions + with pytest.raises(ValueError): + assert parse_version('crap') + + +@pytest.mark.parametrize("version_str, version_tuple", [ + ('1.0.0a1', (1, 0, 0, -4, 1)), + ('1.0.0', (1, 0, 0, -1)), + ('1.0.0a2', (1, 0, 0, -4, 2)), + ('1.0.0b3', (1, 0, 0, -3, 3)), + ('1.0.0rc4', (1, 0, 0, -2, 4)), + ('0.0.0', (0, 0, 0, -1)), + ('0.0.11', (0, 0, 11, -1)), + ('0.11.0', (0, 11, 0, -1)), + ('11.0.0', (11, 0, 0, -1)), +]) +def test_format_version(version_str, version_tuple): + assert format_version(version_tuple) == version_str diff --git a/src/borg/testsuite/xattr.py b/src/borg/testsuite/xattr.py index df0130c9..709d773e 100644 --- a/src/borg/testsuite/xattr.py +++ b/src/borg/testsuite/xattr.py @@ -2,7 +2,9 @@ import os import tempfile import unittest -from ..xattr import is_enabled, getxattr, setxattr, listxattr +import pytest + +from ..xattr import is_enabled, getxattr, setxattr, listxattr, buffer, split_lstring from . import BaseTestCase @@ -11,7 +13,7 @@ class XattrTestCase(BaseTestCase): def setUp(self): self.tmpfile = tempfile.NamedTemporaryFile() - self.symlink = os.path.join(os.path.dirname(self.tmpfile.name), 'symlink') + self.symlink = self.tmpfile.name + '.symlink' os.symlink(self.tmpfile.name, self.symlink) def tearDown(self): @@ -38,3 +40,33 @@ class XattrTestCase(BaseTestCase): self.assert_equal(getxattr(self.tmpfile.fileno(), 'user.foo'), b'bar') self.assert_equal(getxattr(self.symlink, 'user.foo'), b'bar') self.assert_equal(getxattr(self.tmpfile.name, 'user.empty'), None) + + def test_listxattr_buffer_growth(self): + # make it work even with ext4, which imposes rather low limits + buffer.resize(size=64, init=True) + # xattr raw key list will be size 9 * (10 + 1), which is > 64 + keys = ['user.attr%d' % i for i in range(9)] + for key in keys: + setxattr(self.tmpfile.name, key, b'x') + got_keys = listxattr(self.tmpfile.name) + self.assert_equal_se(got_keys, keys) + self.assert_equal(len(buffer), 128) + + def test_getxattr_buffer_growth(self): + # make it work even with ext4, which imposes rather low limits + buffer.resize(size=64, init=True) + value = b'x' * 126 + setxattr(self.tmpfile.name, 'user.big', value) + got_value = getxattr(self.tmpfile.name, 'user.big') + self.assert_equal(value, got_value) + self.assert_equal(len(buffer), 128) + + +@pytest.mark.parametrize('lstring, splitted', ( + (b'', []), + (b'\x00', [b'']), + (b'\x01a', [b'a']), + (b'\x01a\x02cd', [b'a', b'cd']), +)) +def test_split_lstring(lstring, splitted): + assert split_lstring(lstring) == splitted diff --git a/src/borg/upgrader.py b/src/borg/upgrader.py index af692bb6..1044f649 100644 --- a/src/borg/upgrader.py +++ b/src/borg/upgrader.py @@ -3,21 +3,23 @@ import os import shutil import time -import logging -logger = logging.getLogger(__name__) - -from .helpers import get_home_dir, get_keys_dir, get_cache_dir +from .crypto.key import KeyfileKey, KeyfileNotFoundError +from .constants import REPOSITORY_README from .helpers import ProgressIndicatorPercent -from .key import KeyfileKey, KeyfileNotFoundError -from .locking import UpgradableLock +from .helpers import get_home_dir, get_keys_dir, get_cache_dir +from .locking import Lock +from .logger import create_logger from .repository import Repository, MAGIC +logger = create_logger(__name__) + ATTIC_MAGIC = b'ATTICSEG' class AtticRepositoryUpgrader(Repository): def __init__(self, *args, **kw): kw['lock'] = False # do not create borg lock files (now) in attic repo + kw['check_segment_magic'] = False # skip the Attic check when upgrading super().__init__(*args, **kw) def upgrade(self, dryrun=True, inplace=False, progress=False): @@ -34,13 +36,13 @@ class AtticRepositoryUpgrader(Repository): with self: backup = None if not inplace: - backup = '{}.upgrade-{:%Y-%m-%d-%H:%M:%S}'.format(self.path, datetime.datetime.now()) + backup = '{}.before-upgrade-{:%Y-%m-%d-%H:%M:%S}'.format(self.path, datetime.datetime.now()) logger.info('making a hardlink copy in %s', backup) if not dryrun: shutil.copytree(self.path, backup, copy_function=os.link) logger.info("opening attic repository with borg and converting") # now lock the repo, after we have made the copy - self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True, timeout=1.0).acquire() + self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True, timeout=1.0).acquire() segments = [filename for i, filename in self.io.segment_iterator()] try: keyfile = self.find_attic_keyfile() @@ -49,8 +51,7 @@ class AtticRepositoryUpgrader(Repository): else: self.convert_keyfiles(keyfile, dryrun) # partial open: just hold on to the lock - self.lock = UpgradableLock(os.path.join(self.path, 'lock'), - exclusive=True).acquire() + self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True).acquire() try: self.convert_cache(dryrun) self.convert_repo_index(dryrun=dryrun, inplace=inplace) @@ -65,7 +66,7 @@ class AtticRepositoryUpgrader(Repository): readme = os.path.join(self.path, 'README') os.remove(readme) with open(readme, 'w') as fd: - fd.write('This is a Borg repository\n') + fd.write(REPOSITORY_README) @staticmethod def convert_segments(segments, dryrun=True, inplace=False, progress=False): @@ -78,7 +79,7 @@ class AtticRepositoryUpgrader(Repository): replace the 8 first bytes of all regular files in there.""" logger.info("converting %d segments..." % len(segments)) segment_count = len(segments) - pi = ProgressIndicatorPercent(total=segment_count, msg="Converting segments %3.0f%%", same_line=True) + pi = ProgressIndicatorPercent(total=segment_count, msg="Converting segments %3.0f%%") for i, filename in enumerate(segments): if progress: pi.show(i) @@ -131,7 +132,6 @@ class AtticRepositoryUpgrader(Repository): @staticmethod def convert_keyfiles(keyfile, dryrun): - """convert key files from attic to borg replacement pattern is `s/ATTIC KEY/BORG_KEY/` in diff --git a/src/borg/version.py b/src/borg/version.py new file mode 100644 index 00000000..a7a997f7 --- /dev/null +++ b/src/borg/version.py @@ -0,0 +1,49 @@ +import re + + +def parse_version(version): + """ + Simplistic parser for setuptools_scm versions. + + Supports final versions and alpha ('a'), beta ('b') and release candidate ('rc') versions. + It does not try to parse anything else than that, even if there is more in the version string. + + Output is a version tuple containing integers. It ends with one or two elements that ensure that relational + operators yield correct relations for alpha, beta and rc versions, too. + For final versions the last element is a -1. + For prerelease versions the last two elements are a smaller negative number and the number of e.g. the beta. + + This version format is part of the remote protocol, don‘t change in breaking ways. + """ + version_re = r""" + (?P\d+)\.(?P\d+)\.(?P\d+) # version, e.g. 1.2.33 + (?P(?Pa|b|rc)(?P\d+))? # optional prerelease, e.g. a1 or b2 or rc33 + """ + m = re.match(version_re, version, re.VERBOSE) + if m is None: + raise ValueError('Invalid version string %s' % version) + gd = m.groupdict() + version = [int(gd['major']), int(gd['minor']), int(gd['patch'])] + if m.lastgroup == 'prerelease': + p_type = {'a': -4, 'b': -3, 'rc': -2}[gd['ptype']] + p_num = int(gd['pnum']) + version += [p_type, p_num] + else: + version += [-1] + return tuple(version) + + +def format_version(version): + """a reverse for parse_version (obviously without the dropped information)""" + f = [] + it = iter(version) + while True: + part = next(it) + if part >= 0: + f.append(str(part)) + elif part == -1: + break + else: + f[-1] = f[-1] + {-2: 'rc', -3: 'b', -4: 'a'}[part] + str(next(it)) + break + return '.'.join(f) diff --git a/src/borg/xattr.py b/src/borg/xattr.py index 9f4ab34d..c1f33146 100644 --- a/src/borg/xattr.py +++ b/src/borg/xattr.py @@ -1,5 +1,5 @@ -"""A basic extended attributes (xattr) implementation for Linux and MacOS X -""" +"""A basic extended attributes (xattr) implementation for Linux, FreeBSD and MacOS X.""" + import errno import os import re @@ -10,8 +10,17 @@ from ctypes import CDLL, create_string_buffer, c_ssize_t, c_size_t, c_char_p, c_ from ctypes.util import find_library from distutils.version import LooseVersion -from .logger import create_logger -logger = create_logger() +from .helpers import Buffer, prepare_subprocess_env + + +try: + ENOATTR = errno.ENOATTR +except AttributeError: + # on some platforms, ENOATTR is missing, use ENODATA there + ENOATTR = errno.ENODATA + + +buffer = Buffer(create_string_buffer, limit=2**24) def is_enabled(path=None): @@ -26,13 +35,33 @@ def is_enabled(path=None): def get_all(path, follow_symlinks=True): + """ + Return all extended attributes on *path* as a mapping. + + *path* can either be a path (str or bytes) or an open file descriptor (int). + *follow_symlinks* indicates whether symlinks should be followed + and only applies when *path* is not an open file descriptor. + + The returned mapping maps xattr names (str) to values (bytes or None). + None indicates, as a xattr value, an empty value, i.e. a value of length zero. + """ try: - return dict((name, getxattr(path, name, follow_symlinks=follow_symlinks)) - for name in listxattr(path, follow_symlinks=follow_symlinks)) + result = {} + names = listxattr(path, follow_symlinks=follow_symlinks) + for name in names: + try: + result[name] = getxattr(path, name, follow_symlinks=follow_symlinks) + except OSError as e: + # if we get ENOATTR, a race has happened: xattr names were deleted after list. + # we just ignore the now missing ones. if you want consistency, do snapshots. + if e.errno != ENOATTR: + raise + return result except OSError as e: if e.errno in (errno.ENOTSUP, errno.EPERM): return {} + libc_name = find_library('c') if libc_name is None: # find_library didn't work, maybe we are on some minimal system that misses essential @@ -46,7 +75,7 @@ if libc_name is None: libc_name = 'libc.dylib' else: msg = "Can't find C library. No fallback known. Try installing ldconfig, gcc/cc or objdump." - logger.error(msg) + print(msg, file=sys.stderr) # logger isn't initialized at this stage raise Exception(msg) # If we are running with fakeroot on Linux, then use the xattr functions of fakeroot. This is needed by @@ -59,7 +88,9 @@ if sys.platform.startswith('linux'): preloads = re.split("[ :]", LD_PRELOAD) for preload in preloads: if preload.startswith("libfakeroot"): - fakeroot_version = LooseVersion(subprocess.check_output(['fakeroot', '-v']).decode('ascii').split()[-1]) + env = prepare_subprocess_env(system=True) + fakeroot_output = subprocess.check_output(['fakeroot', '-v'], env=env) + fakeroot_version = LooseVersion(fakeroot_output.decode('ascii').split()[-1]) if fakeroot_version >= LooseVersion("1.20.2"): # 1.20.2 has been confirmed to have xattr support # 1.18.2 has been confirmed not to have xattr support @@ -75,11 +106,88 @@ except OSError as e: raise Exception(msg) -def _check(rv, path=None): +def split_string0(buf): + """split a list of zero-terminated strings into python not-zero-terminated bytes""" + return buf.split(b'\0')[:-1] + + +def split_lstring(buf): + """split a list of length-prefixed strings into python not-length-prefixed bytes""" + result = [] + mv = memoryview(buf) + while mv: + length = mv[0] + result.append(bytes(mv[1:1 + length])) + mv = mv[1 + length:] + return result + + +class BufferTooSmallError(Exception): + """the buffer given to a xattr function was too small for the result.""" + + +def _check(rv, path=None, detect_buffer_too_small=False): if rv < 0: - raise OSError(get_errno(), path) + e = get_errno() + if detect_buffer_too_small and e == errno.ERANGE: + # listxattr and getxattr signal with ERANGE that they need a bigger result buffer. + # setxattr signals this way that e.g. a xattr key name is too long / inacceptable. + raise BufferTooSmallError + else: + try: + msg = os.strerror(e) + except ValueError: + msg = '' + if isinstance(path, int): + path = '' % path + raise OSError(e, msg, path) + if detect_buffer_too_small and rv >= len(buffer): + # freebsd does not error with ERANGE if the buffer is too small, + # it just fills the buffer, truncates and returns. + # so, we play sure and just assume that result is truncated if + # it happens to be a full buffer. + raise BufferTooSmallError return rv + +def _listxattr_inner(func, path): + if isinstance(path, str): + path = os.fsencode(path) + size = len(buffer) + while True: + buf = buffer.get(size) + try: + n = _check(func(path, buf, size), path, detect_buffer_too_small=True) + except BufferTooSmallError: + size *= 2 + else: + return n, buf.raw + + +def _getxattr_inner(func, path, name): + if isinstance(path, str): + path = os.fsencode(path) + name = os.fsencode(name) + size = len(buffer) + while True: + buf = buffer.get(size) + try: + n = _check(func(path, name, buf, size), path, detect_buffer_too_small=True) + except BufferTooSmallError: + size *= 2 + else: + return n, buf.raw + + +def _setxattr_inner(func, path, name, value): + if isinstance(path, str): + path = os.fsencode(path) + name = os.fsencode(name) + value = value and os.fsencode(value) + size = len(value) if value else 0 + _check(func(path, name, value, size), path, detect_buffer_too_small=False) + + if sys.platform.startswith('linux'): # pragma: linux only libc.llistxattr.argtypes = (c_char_p, c_char_p, c_size_t) libc.llistxattr.restype = c_ssize_t @@ -95,54 +203,44 @@ if sys.platform.startswith('linux'): # pragma: linux only libc.fgetxattr.restype = c_ssize_t def listxattr(path, *, follow_symlinks=True): - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.flistxattr - elif follow_symlinks: - func = libc.listxattr - else: - func = libc.llistxattr - n = _check(func(path, None, 0), path) - if n == 0: - return [] - namebuf = create_string_buffer(n) - n2 = _check(func(path, namebuf, n), path) - if n2 != n: - raise Exception('listxattr failed') - return [os.fsdecode(name) for name in namebuf.raw.split(b'\0')[:-1] if not name.startswith(b'system.posix_acl_')] + def func(path, buf, size): + if isinstance(path, int): + return libc.flistxattr(path, buf, size) + else: + if follow_symlinks: + return libc.listxattr(path, buf, size) + else: + return libc.llistxattr(path, buf, size) + + n, buf = _listxattr_inner(func, path) + return [os.fsdecode(name) for name in split_string0(buf[:n]) + if name and not name.startswith(b'system.posix_acl_')] def getxattr(path, name, *, follow_symlinks=True): - name = os.fsencode(name) - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.fgetxattr - elif follow_symlinks: - func = libc.getxattr - else: - func = libc.lgetxattr - n = _check(func(path, name, None, 0)) - if n == 0: - return - valuebuf = create_string_buffer(n) - n2 = _check(func(path, name, valuebuf, n), path) - if n2 != n: - raise Exception('getxattr failed') - return valuebuf.raw + def func(path, name, buf, size): + if isinstance(path, int): + return libc.fgetxattr(path, name, buf, size) + else: + if follow_symlinks: + return libc.getxattr(path, name, buf, size) + else: + return libc.lgetxattr(path, name, buf, size) + + n, buf = _getxattr_inner(func, path, name) + return buf[:n] or None def setxattr(path, name, value, *, follow_symlinks=True): - name = os.fsencode(name) - value = value and os.fsencode(value) - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.fsetxattr - elif follow_symlinks: - func = libc.setxattr - else: - func = libc.lsetxattr - _check(func(path, name, value, len(value) if value else 0, 0), path) + def func(path, name, value, size): + flags = 0 + if isinstance(path, int): + return libc.fsetxattr(path, name, value, size, flags) + else: + if follow_symlinks: + return libc.setxattr(path, name, value, size, flags) + else: + return libc.lsetxattr(path, name, value, size, flags) + + _setxattr_inner(func, path, name, value) elif sys.platform == 'darwin': # pragma: darwin only libc.listxattr.argtypes = (c_char_p, c_char_p, c_size_t, c_int) @@ -158,60 +256,48 @@ elif sys.platform == 'darwin': # pragma: darwin only libc.fgetxattr.argtypes = (c_int, c_char_p, c_char_p, c_size_t, c_uint32, c_int) libc.fgetxattr.restype = c_ssize_t + XATTR_NOFLAGS = 0x0000 XATTR_NOFOLLOW = 0x0001 def listxattr(path, *, follow_symlinks=True): - func = libc.listxattr - flags = 0 - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.flistxattr - elif not follow_symlinks: - flags = XATTR_NOFOLLOW - n = _check(func(path, None, 0, flags), path) - if n == 0: - return [] - namebuf = create_string_buffer(n) - n2 = _check(func(path, namebuf, n, flags), path) - if n2 != n: - raise Exception('listxattr failed') - return [os.fsdecode(name) for name in namebuf.raw.split(b'\0')[:-1]] + def func(path, buf, size): + if isinstance(path, int): + return libc.flistxattr(path, buf, size, XATTR_NOFLAGS) + else: + if follow_symlinks: + return libc.listxattr(path, buf, size, XATTR_NOFLAGS) + else: + return libc.listxattr(path, buf, size, XATTR_NOFOLLOW) + + n, buf = _listxattr_inner(func, path) + return [os.fsdecode(name) for name in split_string0(buf[:n]) if name] def getxattr(path, name, *, follow_symlinks=True): - name = os.fsencode(name) - func = libc.getxattr - flags = 0 - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.fgetxattr - elif not follow_symlinks: - flags = XATTR_NOFOLLOW - n = _check(func(path, name, None, 0, 0, flags)) - if n == 0: - return - valuebuf = create_string_buffer(n) - n2 = _check(func(path, name, valuebuf, n, 0, flags), path) - if n2 != n: - raise Exception('getxattr failed') - return valuebuf.raw + def func(path, name, buf, size): + if isinstance(path, int): + return libc.fgetxattr(path, name, buf, size, 0, XATTR_NOFLAGS) + else: + if follow_symlinks: + return libc.getxattr(path, name, buf, size, 0, XATTR_NOFLAGS) + else: + return libc.getxattr(path, name, buf, size, 0, XATTR_NOFOLLOW) + + n, buf = _getxattr_inner(func, path, name) + return buf[:n] or None def setxattr(path, name, value, *, follow_symlinks=True): - name = os.fsencode(name) - value = value and os.fsencode(value) - func = libc.setxattr - flags = 0 - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.fsetxattr - elif not follow_symlinks: - flags = XATTR_NOFOLLOW - _check(func(path, name, value, len(value) if value else 0, 0, flags), path) + def func(path, name, value, size): + if isinstance(path, int): + return libc.fsetxattr(path, name, value, size, 0, XATTR_NOFLAGS) + else: + if follow_symlinks: + return libc.setxattr(path, name, value, size, 0, XATTR_NOFLAGS) + else: + return libc.setxattr(path, name, value, size, 0, XATTR_NOFOLLOW) + + _setxattr_inner(func, path, name, value) elif sys.platform.startswith('freebsd'): # pragma: freebsd only - EXTATTR_NAMESPACE_USER = 0x0001 libc.extattr_list_fd.argtypes = (c_int, c_int, c_char_p, c_size_t) libc.extattr_list_fd.restype = c_ssize_t libc.extattr_list_link.argtypes = (c_char_p, c_int, c_char_p, c_size_t) @@ -230,70 +316,75 @@ elif sys.platform.startswith('freebsd'): # pragma: freebsd only libc.extattr_set_link.restype = c_int libc.extattr_set_file.argtypes = (c_char_p, c_int, c_char_p, c_char_p, c_size_t) libc.extattr_set_file.restype = c_int + ns = EXTATTR_NAMESPACE_USER = 0x0001 def listxattr(path, *, follow_symlinks=True): - ns = EXTATTR_NAMESPACE_USER - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.extattr_list_fd - elif follow_symlinks: - func = libc.extattr_list_file - else: - func = libc.extattr_list_link - n = _check(func(path, ns, None, 0), path) - if n == 0: - return [] - namebuf = create_string_buffer(n) - n2 = _check(func(path, ns, namebuf, n), path) - if n2 != n: - raise Exception('listxattr failed') - names = [] - mv = memoryview(namebuf.raw) - while mv: - length = mv[0] - names.append(os.fsdecode(bytes(mv[1:1 + length]))) - mv = mv[1 + length:] - return names + def func(path, buf, size): + if isinstance(path, int): + return libc.extattr_list_fd(path, ns, buf, size) + else: + if follow_symlinks: + return libc.extattr_list_file(path, ns, buf, size) + else: + return libc.extattr_list_link(path, ns, buf, size) + + n, buf = _listxattr_inner(func, path) + return [os.fsdecode(name) for name in split_lstring(buf[:n]) if name] def getxattr(path, name, *, follow_symlinks=True): - name = os.fsencode(name) - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.extattr_get_fd - elif follow_symlinks: - func = libc.extattr_get_file - else: - func = libc.extattr_get_link - n = _check(func(path, EXTATTR_NAMESPACE_USER, name, None, 0)) - if n == 0: - return - valuebuf = create_string_buffer(n) - n2 = _check(func(path, EXTATTR_NAMESPACE_USER, name, valuebuf, n), path) - if n2 != n: - raise Exception('getxattr failed') - return valuebuf.raw + def func(path, name, buf, size): + if isinstance(path, int): + return libc.extattr_get_fd(path, ns, name, buf, size) + else: + if follow_symlinks: + return libc.extattr_get_file(path, ns, name, buf, size) + else: + return libc.extattr_get_link(path, ns, name, buf, size) + + n, buf = _getxattr_inner(func, path, name) + return buf[:n] or None def setxattr(path, name, value, *, follow_symlinks=True): - name = os.fsencode(name) - value = value and os.fsencode(value) - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.extattr_set_fd - elif follow_symlinks: - func = libc.extattr_set_file - else: - func = libc.extattr_set_link - _check(func(path, EXTATTR_NAMESPACE_USER, name, value, len(value) if value else 0), path) + def func(path, name, value, size): + if isinstance(path, int): + return libc.extattr_set_fd(path, ns, name, value, size) + else: + if follow_symlinks: + return libc.extattr_set_file(path, ns, name, value, size) + else: + return libc.extattr_set_link(path, ns, name, value, size) + + _setxattr_inner(func, path, name, value) else: # pragma: unknown platform only def listxattr(path, *, follow_symlinks=True): + """ + Return list of xattr names on a file. + + *path* can either be a path (str or bytes) or an open file descriptor (int). + *follow_symlinks* indicates whether symlinks should be followed + and only applies when *path* is not an open file descriptor. + """ return [] def getxattr(path, name, *, follow_symlinks=True): - pass + """ + Read xattr and return its value (as bytes) or None if its empty. + + *path* can either be a path (str or bytes) or an open file descriptor (int). + *name* is the name of the xattr to read (str). + *follow_symlinks* indicates whether symlinks should be followed + and only applies when *path* is not an open file descriptor. + """ def setxattr(path, name, value, *, follow_symlinks=True): - pass + """ + Write xattr on *path*. + + *path* can either be a path (str or bytes) or an open file descriptor (int). + *name* is the name of the xattr to read (str). + *value* is the value to write. It is either bytes or None. The latter + signals that the value shall be empty (size equals zero). + *follow_symlinks* indicates whether symlinks should be followed + and only applies when *path* is not an open file descriptor. + """ diff --git a/tox.ini b/tox.ini index b5dbed60..8fb3f31b 100644 --- a/tox.ini +++ b/tox.ini @@ -2,17 +2,17 @@ # fakeroot -u tox --recreate [tox] -envlist = py{34,35,36},flake8 +envlist = py{35,36},flake8 [testenv] deps = -rrequirements.d/development.txt - -rrequirements.d/attic.txt -commands = py.test --cov=borg --cov-config=.coveragerc --benchmark-skip --pyargs {posargs:borg.testsuite} + -rrequirements.d/fuse.txt +commands = py.test -n {env:XDISTN:4} -rs --cov=borg --cov-config=.coveragerc --benchmark-skip --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * [testenv:flake8] changedir = deps = flake8 -commands = flake8 +commands = flake8 src scripts conftest.py