diff --git a/.coveragerc b/.coveragerc index 7c4ccf9e..e2e8fe40 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,14 +2,19 @@ branch = True source = borg omit = - borg/__init__.py - borg/__main__.py - borg/_version.py - borg/support/*.py + */borg/__init__.py + */borg/__main__.py + */borg/_version.py + */borg/fuse.py + */borg/support/* + */borg/testsuite/* + */borg/hash_sizes.py [report] exclude_lines = pragma: no cover + pragma: freebsd only + pragma: unknown platform only def __repr__ raise AssertionError raise NotImplementedError diff --git a/.gitignore b/.gitignore index 73d508fe..2d77951b 100644 --- a/.gitignore +++ b/.gitignore @@ -15,8 +15,6 @@ platform_linux.c *.pyc *.pyo *.so -docs/usage/*.inc -docs/api.rst .idea/ .cache/ borg/_version.py diff --git a/.travis.yml b/.travis.yml index 156391f7..0ec266ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,26 +8,15 @@ cache: matrix: include: - - python: 3.2 - os: linux - env: TOXENV=py32 - - python: 3.3 - os: linux - env: TOXENV=py33 - python: 3.4 os: linux env: TOXENV=py34 - python: 3.5 os: linux env: TOXENV=py35 - - language: generic - os: osx - osx_image: xcode6.4 - env: TOXENV=py32 - - language: generic - os: osx - osx_image: xcode6.4 - env: TOXENV=py33 + - python: 3.5 + os: linux + env: TOXENV=flake8 - language: generic os: osx osx_image: xcode6.4 diff --git a/.travis/install.sh b/.travis/install.sh index 4de5e0c1..73e292dd 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -18,27 +18,19 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then brew outdated pyenv || brew upgrade pyenv case "${TOXENV}" in - py32) - pyenv install 3.2.6 - pyenv global 3.2.6 - ;; - py33) - pyenv install 3.3.6 - pyenv global 3.3.6 - ;; py34) pyenv install 3.4.3 pyenv global 3.4.3 ;; py35) - pyenv install 3.5.0 - pyenv global 3.5.0 + pyenv install 3.5.1 + pyenv global 3.5.1 ;; esac pyenv rehash - python -m pip install --user virtualenv + python -m pip install --user 'virtualenv<14.0' else - pip install virtualenv + pip install 'virtualenv<14.0' sudo add-apt-repository -y ppa:gezakovacs/lz4 sudo apt-get update sudo apt-get install -y liblz4-dev diff --git a/AUTHORS b/AUTHORS index 6812638d..077386e2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,23 +1,25 @@ -Contributors ("The Borg Collective") -==================================== +Borg Contributors ("The Borg Collective") +========================================= - Thomas Waldmann - Antoine Beaupré - Radek Podgorny - Yuri D'Elia +- Michael Hanselmann + +Borg is a fork of Attic. Attic authors ------------- -Borg is a fork of Attic. Attic is written and maintained -by Jonas Borgström and various contributors: +Attic is written and maintained by Jonas Borgström and various contributors: -Development Lead -```````````````` +Attic Development Lead +`````````````````````` - Jonas Borgström -Patches and Suggestions -``````````````````````` +Attic Patches and Suggestions +````````````````````````````` - Brian Johnson - Cyril Roussillon - Dan Christensen diff --git a/LICENSE b/LICENSE index ad958c54..251e7027 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2015 The Borg Collective (see AUTHORS file) +Copyright (C) 2015-2016 The Borg Collective (see AUTHORS file) Copyright (C) 2010-2014 Jonas Borgström All rights reserved. diff --git a/README.rst b/README.rst index 63cd7dc8..0d2d800b 100644 --- a/README.rst +++ b/README.rst @@ -1,12 +1,8 @@ |screencast| -.. |screencast| image:: https://asciinema.org/a/28691.png - :alt: BorgBackup Installation and Basic Usage - :target: https://asciinema.org/a/28691?autoplay=1&speed=2 - - What is BorgBackup? =================== + BorgBackup (short: Borg) is a deduplicating backup program. Optionally, it supports compression and authenticated encryption. @@ -19,7 +15,7 @@ fully trusted targets. See the `installation manual`_ or, if you have already downloaded Borg, ``docs/installation.rst`` to get started with Borg. -.. _installation manual: https://borgbackup.readthedocs.org/installation.html +.. _installation manual: https://borgbackup.readthedocs.org/en/stable/installation.html Main features ------------- @@ -34,20 +30,15 @@ Main features Compared to other deduplication approaches, this method does NOT depend on: - * file/directory names staying the same + * file/directory names staying the same: So you can move your stuff around + without killing the deduplication, even between machines sharing a repo. - So you can move your stuff around without killing the deduplication, - even between machines sharing a repo. + * complete files or time stamps staying the same: If a big file changes a + little, only a few new chunks need to be stored - this is great for VMs or + raw disks. - * complete files or time stamps staying the same - - If a big file changes a little, only a few new chunks will be stored - - this is great for VMs or raw disks. - - * the absolute position of a data chunk inside a file - - Stuff may get shifted and will still be found by the deduplication - algorithm. + * The absolute position of a data chunk inside a file: Stuff may get shifted + and will still be found by the deduplication algorithm. **Speed** * performance critical code (chunking, compression, encryption) is @@ -57,7 +48,7 @@ Main features **Data encryption** All data can be protected using 256-bit AES encryption, data integrity and - authenticity is verified using HMAC-SHA256. + authenticity is verified using HMAC-SHA256. Data is encrypted clientside. **Compression** All data can be compressed by lz4 (super fast, low compression), zlib @@ -73,9 +64,8 @@ Main features backup examination and restores (e.g. by using a regular file manager). **Easy installation on multiple platforms** - We offer single-file binaries - that does not require installing anything - you can just run it on - the supported platforms: + We offer single-file binaries that do not require installing anything - + you can just run them on these platforms: * Linux * Mac OS X @@ -109,53 +99,47 @@ Now doing another backup, just to show off the great deduplication:: This archive: 57.16 MB 46.78 MB 151.67 kB <--- ! All archives: 114.02 MB 93.46 MB 44.81 MB -For a graphical frontend refer to our complementary project `BorgWeb`_. +For a graphical frontend refer to our complementary project `BorgWeb `_. Links ===== - * `Main Web Site `_ - * `Releases `_ - * `PyPI packages `_ - * `ChangeLog `_ - * `GitHub `_ - * `Issue Tracker `_ - * `Bounties & Fundraisers `_ - * `Mailing List `_ - * `License `_ - -Related Projects ----------------- - - * `BorgWeb `_ - * `Atticmatic `_ - * `Attic `_ +* `Main Web Site `_ +* `Releases `_ +* `PyPI packages `_ +* `ChangeLog `_ +* `GitHub `_ +* `Issue Tracker `_ +* `Bounties & Fundraisers `_ +* `Mailing List `_ +* `License `_ Notes ----- Borg is a fork of `Attic`_ and maintained by "`The Borg collective`_". -.. _The Borg collective: https://borgbackup.readthedocs.org/authors.html +.. _Attic: https://github.com/jborg/attic +.. _The Borg collective: https://borgbackup.readthedocs.org/en/latest/authors.html Differences between Attic and Borg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Here's a (incomplete) list of some major changes: - * more open, faster paced development (see `issue #1 `_) - * lots of attic issues fixed (see `issue #5 `_) - * less chunk management overhead via --chunker-params option (less memory and disk usage) - * faster remote cache resync (useful when backing up multiple machines into same repo) - * compression: no, lz4, zlib or lzma compression, adjustable compression levels - * repokey replaces problematic passphrase mode (you can't change the passphrase nor the pbkdf2 iteration count in "passphrase" mode) - * simple sparse file support, great for virtual machine disk files - * can read special files (e.g. block devices) or from stdin, write to stdout - * mkdir-based locking is more compatible than attic's posix locking - * uses fadvise to not spoil / blow up the fs cache - * better error messages / exception handling - * better output for verbose mode, progress indication - * tested on misc. Linux systems, 32 and 64bit, FreeBSD, OpenBSD, NetBSD, Mac OS X +* more open, faster paced development (see `issue #1 `_) +* lots of attic issues fixed (see `issue #5 `_) +* less chunk management overhead via --chunker-params option (less memory and disk usage) +* faster remote cache resync (useful when backing up multiple machines into same repo) +* compression: no, lz4, zlib or lzma compression, adjustable compression levels +* repokey replaces problematic passphrase mode (you can't change the passphrase nor the pbkdf2 iteration count in "passphrase" mode) +* simple sparse file support, great for virtual machine disk files +* can read special files (e.g. block devices) or from stdin, write to stdout +* mkdir-based locking is more compatible than attic's posix locking +* uses fadvise to not spoil / blow up the fs cache +* better error messages / exception handling +* better logging, screen output, progress indication +* tested on misc. Linux systems, 32 and 64bit, FreeBSD, OpenBSD, NetBSD, Mac OS X Please read the `ChangeLog`_ (or ``CHANGES.rst`` in the source distribution) for more information. @@ -172,12 +156,20 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. Borg is distributed under a 3-clause BSD license, see `License`_ for the complete license. -|build| |coverage| +|doc| |build| |coverage| + +.. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable + :alt: Documentation + :target: https://borgbackup.readthedocs.org/en/stable/ .. |build| image:: https://travis-ci.org/borgbackup/borg.svg :alt: Build Status :target: https://travis-ci.org/borgbackup/borg -.. |coverage| image:: http://codecov.io/github/borgbackup/borg/coverage.svg?branch=master +.. |coverage| image:: https://codecov.io/github/borgbackup/borg/coverage.svg?branch=master :alt: Test Coverage - :target: http://codecov.io/github/borgbackup/borg?branch=master + :target: https://codecov.io/github/borgbackup/borg?branch=master + +.. |screencast| image:: https://asciinema.org/a/28691.png + :alt: BorgBackup Installation and Basic Usage + :target: https://asciinema.org/a/28691?autoplay=1&speed=2 diff --git a/Vagrantfile b/Vagrantfile index 90864f58..bfe0f764 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -28,8 +28,10 @@ def packages_debianoid # for building python: apt-get install -y zlib1g-dev libbz2-dev libncurses5-dev libreadline-dev liblzma-dev libsqlite3-dev # this way it works on older dists (like ubuntu 12.04) also: - easy_install3 pip - pip3 install virtualenv + # for python 3.2 on ubuntu 12.04 we need pip<8 and virtualenv<14 as + # newer versions are not compatible with py 3.2 any more. + easy_install3 'pip<8.0' + pip3 install 'virtualenv<14.0' touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile EOF end @@ -42,6 +44,8 @@ def packages_redhatted yum install -y openssl-devel openssl libacl-devel libacl lz4-devel fuse-devel fuse pkgconfig usermod -a -G fuse vagrant yum install -y fakeroot gcc git patch + # needed to compile msgpack-python (otherwise it will use slow fallback code): + yum install -y gcc-c++ # for building python: yum install -y zlib-devel bzip2-devel ncurses-devel readline-devel xz-devel sqlite-devel #yum install -y python-pip @@ -53,9 +57,9 @@ end def packages_darwin return <<-EOF # get osxfuse 3.0.x pre-release code from github: - curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.5/osxfuse-3.0.5.dmg >osxfuse.dmg + curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.9/osxfuse-3.0.9.dmg >osxfuse.dmg MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \ - && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.5.pkg" -target / + && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.9.pkg" -target / sudo chown -R vagrant /usr/local # brew must be able to create stuff here ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" brew update @@ -74,7 +78,7 @@ def packages_freebsd pkg install -y openssl liblz4 fusefs-libs pkgconf pkg install -y fakeroot git bash # for building python: - pkg install sqlite3 + pkg install -y sqlite3 # make bash default / work: chsh -s bash vagrant mount -t fdescfs fdesc /dev/fd @@ -125,7 +129,9 @@ def packages_netbsd ln -s /usr/pkg/lib/liblz4* /usr/local/opt/lz4/lib/ touch /etc/openssl/openssl.cnf # avoids a flood of "can't open ..." mozilla-rootcerts install - # llfuse does not support netbsd + pkg_add pkg-config # avoids some "pkg-config missing" error msg, even without fuse + # pkg_add fuse # llfuse 0.41.1 supports netbsd, but is still buggy. + # https://bitbucket.org/nikratio/python-llfuse/issues/70/perfuse_open-setsockopt-no-buffer-space pkg_add python34 py34-setuptools ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python3 @@ -142,6 +148,7 @@ def install_pyenv(boxname) echo 'eval "$(pyenv init -)"' >> ~/.bash_profile echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bash_profile echo 'export PYTHON_CONFIGURE_OPTS="--enable-shared"' >> ~/.bash_profile + echo 'export LANG=en_US.UTF-8' >> ~/.bash_profile EOF end @@ -154,11 +161,9 @@ end def install_pythons(boxname) return <<-EOF . ~/.bash_profile - pyenv install 3.2.2 # tests, 3.2(.0) and 3.2.1 deadlock, issue #221 - pyenv install 3.3.0 # tests pyenv install 3.4.0 # tests pyenv install 3.5.0 # tests - #pyenv install 3.5.1 # binary build, use latest 3.5.x release + pyenv install 3.5.1 # binary build, use latest 3.5.x release pyenv rehash EOF end @@ -176,8 +181,8 @@ def build_pyenv_venv(boxname) . ~/.bash_profile cd /vagrant/borg # use the latest 3.5 release - pyenv global 3.5.0 - pyenv virtualenv 3.5.0 borg-env + pyenv global 3.5.1 + pyenv virtualenv 3.5.1 borg-env ln -s ~/.pyenv/versions/borg-env . EOF end @@ -193,7 +198,8 @@ def install_borg(boxname) rm -f borg/*.so borg/*.cpy* rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__ - pip install 'llfuse<0.41' # 0.41 does not install due to UnicodeDecodeError + pip install 'llfuse<0.41' # 0.41.1 throws UnicodeDecodeError at install time: + # https://bitbucket.org/nikratio/python-llfuse/issues/69/unicode-exception-at-install-time pip install -r requirements.d/development.txt pip install -e . EOF @@ -206,7 +212,7 @@ def install_pyinstaller(boxname) . borg-env/bin/activate git clone https://github.com/pyinstaller/pyinstaller.git cd pyinstaller - git checkout master + git checkout v3.1 pip install -e . EOF end @@ -218,7 +224,7 @@ def install_pyinstaller_bootloader(boxname) . borg-env/bin/activate git clone https://github.com/pyinstaller/pyinstaller.git cd pyinstaller - git checkout master + git checkout v3.1 # build bootloader, if it is not included cd bootloader python ./waf all @@ -233,7 +239,7 @@ def build_binary_with_pyinstaller(boxname) cd /vagrant/borg . borg-env/bin/activate cd borg - pyinstaller -F -n borg.exe --distpath=/vagrant/borg --clean --hidden-import=logging.config borg/__main__.py + pyinstaller -F -n borg.exe --distpath=/vagrant/borg --clean borg/__main__.py EOF end @@ -244,7 +250,7 @@ def run_tests(boxname) . ../borg-env/bin/activate if which pyenv > /dev/null; then # for testing, use the earliest point releases of the supported python versions: - pyenv global 3.2.2 3.3.0 3.4.0 3.5.0 + pyenv global 3.4.0 3.5.0 fi # otherwise: just use the system python if which fakeroot > /dev/null; then @@ -266,7 +272,7 @@ end Vagrant.configure(2) do |config| # use rsync to copy content to the folder - config.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync" + config.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"] # do not let the VM access . on the host machine via the default shared folder! config.vm.synced_folder ".", "/vagrant", disabled: true diff --git a/borg/__main__.py b/borg/__main__.py index b38dc4e9..3e7f4745 100644 --- a/borg/__main__.py +++ b/borg/__main__.py @@ -1,3 +1,2 @@ from borg.archiver import main main() - diff --git a/borg/_chunker.c b/borg/_chunker.c index b7586948..8cadbb99 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -76,19 +76,18 @@ buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len } typedef struct { - int window_size, chunk_mask, min_size; - size_t buf_size; + uint32_t chunk_mask; uint32_t *table; uint8_t *data; PyObject *fd; int fh; int done, eof; - size_t remaining, position, last; + size_t min_size, buf_size, window_size, remaining, position, last; off_t bytes_read, bytes_yielded; } Chunker; static Chunker * -chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed) +chunker_init(size_t window_size, uint32_t chunk_mask, size_t min_size, size_t max_size, uint32_t seed) { Chunker *c = calloc(sizeof(Chunker), 1); c->window_size = window_size; @@ -191,25 +190,12 @@ chunker_fill(Chunker *c, PyThreadState **tstatep) return 1; } -static PyObject * -PyBuffer_FromMemory(void *data, Py_ssize_t len) -{ - Py_buffer buffer; - PyObject *mv; - - PyBuffer_FillInfo(&buffer, NULL, data, len, 1, PyBUF_CONTIG_RO); - mv = PyMemoryView_FromBuffer(&buffer); - PyBuffer_Release(&buffer); - return mv; -} - - static PyObject * chunker_process(Chunker *c) { - uint32_t sum, chunk_mask = c->chunk_mask, min_size = c->min_size, window_size = c->window_size; - int n = 0, rc = 0; - int old_last; + uint32_t sum, chunk_mask = c->chunk_mask; + size_t n = 0, old_last, min_size = c->min_size, window_size = c->window_size; + int rc = 0; PyThreadState *tstate; if(c->done) { @@ -231,7 +217,7 @@ chunker_process(Chunker *c) c->done = 1; if(c->remaining) { c->bytes_yielded += c->remaining; - return PyBuffer_FromMemory(c->data + c->position, c->remaining); + return PyMemoryView_FromMemory(c->data + c->position, c->remaining, PyBUF_READ); } else { if(c->bytes_read == c->bytes_yielded) @@ -266,5 +252,5 @@ chunker_process(Chunker *c) n = c->last - old_last; c->bytes_yielded += n; PyEval_RestoreThread(tstate); // acquire GIL - return PyBuffer_FromMemory(c->data + old_last, n); + return PyMemoryView_FromMemory(c->data + old_last, n, PyBUF_READ); } diff --git a/borg/_hashindex.c b/borg/_hashindex.c index e1ff936f..f1aa0aa8 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -40,20 +40,43 @@ typedef struct { int upper_limit; } HashIndex; +/* prime (or w/ big prime factors) hash table sizes + * not sure we need primes for borg's usage (as we have a hash function based + * on sha256, we can assume an even, seemingly random distribution of values), + * but OTOH primes don't harm. + * also, growth of the sizes starts with fast-growing 2x steps, but slows down + * more and more down to 1.1x. this is to avoid huge jumps in memory allocation, + * like e.g. 4G -> 8G. + * these values are generated by hash_sizes.py. + */ +static int hash_sizes[] = { + 1031, 2053, 4099, 8209, 16411, 32771, 65537, 131101, 262147, 445649, + 757607, 1287917, 2189459, 3065243, 4291319, 6007867, 8410991, + 11775359, 16485527, 23079703, 27695653, 33234787, 39881729, 47858071, + 57429683, 68915617, 82698751, 99238507, 119086189, 144378011, 157223263, + 173476439, 190253911, 209915011, 230493629, 253169431, 278728861, + 306647623, 337318939, 370742809, 408229973, 449387209, 493428073, + 543105119, 596976533, 657794869, 722676499, 795815791, 874066969, + 962279771, 1057701643, 1164002657, 1280003147, 1407800297, 1548442699, + 1703765389, 1873768367, 2062383853, /* 32bit int ends about here */ +}; + +#define HASH_MIN_LOAD .25 +#define HASH_MAX_LOAD .75 /* don't go higher than 0.75, otherwise performance severely suffers! */ + +#define MAX(x, y) ((x) > (y) ? (x): (y)) +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) + #define EMPTY _htole32(0xffffffff) #define DELETED _htole32(0xfffffffe) -#define MAX_BUCKET_SIZE 512 -#define BUCKET_LOWER_LIMIT .25 -#define BUCKET_UPPER_LIMIT .90 -#define MIN_BUCKETS 1024 -#define MAX(x, y) ((x) > (y) ? (x): (y)) + #define BUCKET_ADDR(index, idx) (index->buckets + (idx * index->bucket_size)) +#define BUCKET_MATCHES_KEY(index, idx, key) (memcmp(key, BUCKET_ADDR(index, idx), index->key_size) == 0) + #define BUCKET_IS_DELETED(index, idx) (*((uint32_t *)(BUCKET_ADDR(index, idx) + index->key_size)) == DELETED) #define BUCKET_IS_EMPTY(index, idx) (*((uint32_t *)(BUCKET_ADDR(index, idx) + index->key_size)) == EMPTY) -#define BUCKET_MATCHES_KEY(index, idx, key) (memcmp(key, BUCKET_ADDR(index, idx), index->key_size) == 0) - #define BUCKET_MARK_DELETED(index, idx) (*((uint32_t *)(BUCKET_ADDR(index, idx) + index->key_size)) = DELETED) #define BUCKET_MARK_EMPTY(index, idx) (*((uint32_t *)(BUCKET_ADDR(index, idx) + index->key_size)) = EMPTY) @@ -113,12 +136,13 @@ hashindex_resize(HashIndex *index, int capacity) { HashIndex *new; void *key = NULL; + int32_t key_size = index->key_size; - if(!(new = hashindex_init(capacity, index->key_size, index->value_size))) { + if(!(new = hashindex_init(capacity, key_size, index->value_size))) { return 0; } while((key = hashindex_next_key(index, key))) { - hashindex_set(new, key, hashindex_get(index, key)); + hashindex_set(new, key, key + key_size); } free(index->buckets); index->buckets = new->buckets; @@ -129,6 +153,53 @@ hashindex_resize(HashIndex *index, int capacity) return 1; } +int get_lower_limit(int num_buckets){ + int min_buckets = hash_sizes[0]; + if (num_buckets <= min_buckets) + return 0; + return (int)(num_buckets * HASH_MIN_LOAD); +} + +int get_upper_limit(int num_buckets){ + int max_buckets = hash_sizes[NELEMS(hash_sizes) - 1]; + if (num_buckets >= max_buckets) + return num_buckets; + return (int)(num_buckets * HASH_MAX_LOAD); +} + +int size_idx(int size){ + /* find the hash_sizes index with entry >= size */ + int elems = NELEMS(hash_sizes); + int entry, i=0; + do{ + entry = hash_sizes[i++]; + }while((entry < size) && (i < elems)); + if (i >= elems) + return elems - 1; + i--; + return i; +} + +int fit_size(int current){ + int i = size_idx(current); + return hash_sizes[i]; +} + +int grow_size(int current){ + int i = size_idx(current) + 1; + int elems = NELEMS(hash_sizes); + if (i >= elems) + return hash_sizes[elems - 1]; + return hash_sizes[i]; +} + +int shrink_size(int current){ + int i = size_idx(current) - 1; + if (i < 0) + return hash_sizes[0]; + return hash_sizes[i]; +} + /* Public API */ static HashIndex * hashindex_read(const char *path) @@ -171,7 +242,7 @@ hashindex_read(const char *path) goto fail; } buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size); - if(length != sizeof(HashHeader) + buckets_length) { + if((size_t) length != sizeof(HashHeader) + buckets_length) { EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)", (uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length); goto fail; @@ -206,8 +277,8 @@ hashindex_read(const char *path) index->key_size = header.key_size; index->value_size = header.value_size; index->bucket_size = index->key_size + index->value_size; - index->lower_limit = index->num_buckets > MIN_BUCKETS ? ((int)(index->num_buckets * BUCKET_LOWER_LIMIT)) : 0; - index->upper_limit = (int)(index->num_buckets * BUCKET_UPPER_LIMIT); + index->lower_limit = get_lower_limit(index->num_buckets); + index->upper_limit = get_upper_limit(index->num_buckets); fail: if(fclose(fd) < 0) { EPRINTF_PATH(path, "fclose failed"); @@ -218,17 +289,15 @@ fail: static HashIndex * hashindex_init(int capacity, int key_size, int value_size) { - off_t buckets_length; HashIndex *index; int i; - capacity = MAX(MIN_BUCKETS, capacity); + capacity = fit_size(capacity); if(!(index = malloc(sizeof(HashIndex)))) { EPRINTF("malloc header failed"); return NULL; } - buckets_length = (off_t)capacity * (key_size + value_size); - if(!(index->buckets = calloc(buckets_length, 1))) { + if(!(index->buckets = calloc(capacity, key_size + value_size))) { EPRINTF("malloc buckets failed"); free(index); return NULL; @@ -238,8 +307,8 @@ hashindex_init(int capacity, int key_size, int value_size) index->value_size = value_size; index->num_buckets = capacity; index->bucket_size = index->key_size + index->value_size; - index->lower_limit = index->num_buckets > MIN_BUCKETS ? ((int)(index->num_buckets * BUCKET_LOWER_LIMIT)) : 0; - index->upper_limit = (int)(index->num_buckets * BUCKET_UPPER_LIMIT); + index->lower_limit = get_lower_limit(index->num_buckets); + index->upper_limit = get_upper_limit(index->num_buckets); for(i = 0; i < capacity; i++) { BUCKET_MARK_EMPTY(index, i); } @@ -275,7 +344,7 @@ hashindex_write(HashIndex *index, const char *path) EPRINTF_PATH(path, "fwrite header failed"); ret = 0; } - if(fwrite(index->buckets, 1, buckets_length, fd) != buckets_length) { + if(fwrite(index->buckets, 1, buckets_length, fd) != (size_t) buckets_length) { EPRINTF_PATH(path, "fwrite buckets failed"); ret = 0; } @@ -303,7 +372,7 @@ hashindex_set(HashIndex *index, const void *key, const void *value) if(idx < 0) { if(index->num_entries > index->upper_limit) { - if(!hashindex_resize(index, index->num_buckets * 2)) { + if(!hashindex_resize(index, grow_size(index->num_buckets))) { return 0; } } @@ -333,7 +402,7 @@ hashindex_delete(HashIndex *index, const void *key) BUCKET_MARK_DELETED(index, idx); index->num_entries -= 1; if(index->num_entries < index->lower_limit) { - if(!hashindex_resize(index, index->num_buckets / 2)) { + if(!hashindex_resize(index, shrink_size(index->num_buckets))) { return 0; } } @@ -391,21 +460,24 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs *total_chunks = chunks; } +static void +hashindex_add(HashIndex *index, const void *key, int32_t *other_values) +{ + int32_t *my_values = (int32_t *)hashindex_get(index, key); + if(my_values == NULL) { + hashindex_set(index, key, other_values); + } else { + *my_values += *other_values; + } +} + static void hashindex_merge(HashIndex *index, HashIndex *other) { int32_t key_size = index->key_size; - const int32_t *other_values; - int32_t *my_values; void *key = NULL; while((key = hashindex_next_key(other, key))) { - other_values = key + key_size; - my_values = (int32_t *)hashindex_get(index, key); - if(my_values == NULL) { - hashindex_set(index, key, other_values); - } else { - *my_values += *other_values; - } + hashindex_add(index, key, key + key_size); } } diff --git a/borg/archive.py b/borg/archive.py index 47491499..5070a0b2 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -3,8 +3,6 @@ from datetime import datetime from getpass import getuser from itertools import groupby import errno -import threading -import logging from .logger import create_logger logger = create_logger() @@ -16,43 +14,34 @@ import os import socket import stat import sys +import threading import time from io import BytesIO from . import xattr from .helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, group2gid, format_timedelta, \ - Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, have_cython, \ - st_atime_ns, st_ctime_ns, st_mtime_ns, make_queue, TerminatedQueue -if have_cython(): - from .platform import acl_get, acl_set - from .chunker import Chunker - from .hashindex import ChunkIndex - import msgpack -else: - import mock - msgpack = mock.Mock() + Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \ + make_queue, TerminatedQueue, ProgressIndicatorPercent +from .platform import acl_get, acl_set +from .chunker import Chunker +from .hashindex import ChunkIndex +import msgpack ITEMS_BUFFER = 1024 * 1024 -CHUNK_MIN_EXP = 10 # 2**10 == 1kiB +CHUNK_MIN_EXP = 19 # 2**19 == 512kiB CHUNK_MAX_EXP = 23 # 2**23 == 8MiB HASH_WINDOW_SIZE = 0xfff # 4095B -HASH_MASK_BITS = 16 # results in ~64kiB chunks statistically +HASH_MASK_BITS = 21 # results in ~2MiB chunks statistically # defaults, use --chunker-params to override CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE) -utime_supports_fd = os.utime in getattr(os, 'supports_fd', {}) -utime_supports_follow_symlinks = os.utime in getattr(os, 'supports_follow_symlinks', {}) -has_mtime_ns = sys.version >= '3.3' +# chunker params for the items metadata stream, finer granularity +ITEMS_CHUNKER_PARAMS = (12, 16, 14, HASH_WINDOW_SIZE) + has_lchmod = hasattr(os, 'lchmod') has_lchflags = hasattr(os, 'lchflags') -# Python <= 3.2 raises OSError instead of PermissionError (See #164) -try: - PermissionError = PermissionError -except NameError: - PermissionError = OSError - class DownloadPipeline: @@ -82,7 +71,7 @@ class DownloadPipeline: class ChunkBuffer: BUFFER_SIZE = 1 * 1024 * 1024 - def __init__(self, key, chunker_params=CHUNKER_PARAMS): + def __init__(self, key, chunker_params=ITEMS_CHUNKER_PARAMS): self.buffer = BytesIO() self.packer = msgpack.Packer(unicode_errors='surrogateescape') self.chunks = [] @@ -117,7 +106,7 @@ class ChunkBuffer: class CacheChunkBuffer(ChunkBuffer): - def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS): + def __init__(self, cache, key, stats, chunker_params=ITEMS_CHUNKER_PARAMS): super().__init__(key, chunker_params) self.cache = cache self.stats = stats @@ -337,7 +326,6 @@ class Archive: self.hard_links = {} self.stats = Statistics() self.show_progress = progress - self.last_progress = time.time() self.name = name self.checkpoint_interval = checkpoint_interval self.numeric_owner = numeric_owner @@ -346,7 +334,7 @@ class Archive: self.pipeline = DownloadPipeline(self.repository, self.key) if create: self.pp = ParallelProcessor(self) - self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params) + self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats) self.chunker = Chunker(self.key.chunk_seed, *chunker_params) if name in manifest.archives: raise self.AlreadyExists(name) @@ -394,7 +382,7 @@ class Archive: @property def duration(self): - return format_timedelta(self.end-self.start) + return format_timedelta(self.end - self.start) def __str__(self): return '''Archive name: {0.name} @@ -415,9 +403,11 @@ Number of files: {0.stats.nfiles}'''.format(self) self.pp.reader_queue.put(item) def add_item(self, item): - if self.show_progress and time.time() - self.last_progress > 0.2: - self.stats.show_progress(item=item) - self.last_progress = time.time() + unknown_keys = set(item) - ITEM_KEYS + assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s', + ','.join(k.decode('ascii') for k in unknown_keys)) + if self.show_progress: + self.stats.show_progress(item=item, dt=0.2) self.items_buffer.add(item) if time.time() - self.last_checkpoint > self.checkpoint_interval: self.write_checkpoint() @@ -503,7 +493,7 @@ Number of files: {0.stats.nfiles}'''.format(self) # of a hardlink - a still empty inode that needs to be filled. pass except UnicodeEncodeError: - raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) + raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None except OSError: pass mode = item[b'mode'] @@ -548,7 +538,10 @@ Number of files: {0.stats.nfiles}'''.format(self) source = item[b'source'] if os.path.exists(path): os.unlink(path) - os.symlink(source, path) + try: + os.symlink(source, path) + except UnicodeEncodeError: + raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None self.restore_attrs(path, item, symlink=True) elif stat.S_ISFIFO(mode): if not os.path.exists(os.path.dirname(path)): @@ -599,12 +592,10 @@ Number of files: {0.stats.nfiles}'''.format(self) else: # old archives only had mtime in item metadata atime = mtime - if fd and utime_supports_fd: # Python >= 3.3 + if fd: os.utime(fd, None, ns=(atime, mtime)) - elif utime_supports_follow_symlinks: # Python >= 3.3 + else: os.utime(path, None, ns=(atime, mtime), follow_symlinks=False) - elif not symlink: - os.utime(path, (atime / 1e9, mtime / 1e9)) acl_set(path, item, self.numeric_owner) # Only available on OS X and FreeBSD if has_lchflags and b'bsdflags' in item: @@ -625,16 +616,21 @@ Number of files: {0.stats.nfiles}'''.format(self) self.cache.chunk_decref(self.id, self.stats) del self.manifest.archives[self.name] - def delete(self, stats): + def delete(self, stats, progress=False): unpacker = msgpack.Unpacker(use_list=False) - for items_id, data in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])): + items_ids = self.metadata[b'items'] + pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True) + for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))): + if progress: + pi.show(i) unpacker.feed(self.key.decrypt(items_id, data)) self.cache.chunk_decref(items_id, stats) for item in unpacker: if b'chunks' in item: for chunk_id, size, csize in item[b'chunks']: self.cache.chunk_decref(chunk_id, stats) - + if progress: + pi.finish() self.cache.chunk_decref(self.id, stats) del self.manifest.archives[self.name] @@ -643,9 +639,9 @@ Number of files: {0.stats.nfiles}'''.format(self) b'mode': st.st_mode, b'uid': st.st_uid, b'user': uid2user(st.st_uid), b'gid': st.st_gid, b'group': gid2group(st.st_gid), - b'atime': int_to_bigint(st_atime_ns(st)), - b'ctime': int_to_bigint(st_ctime_ns(st)), - b'mtime': int_to_bigint(st_mtime_ns(st)), + b'atime': int_to_bigint(st.st_atime_ns), + b'ctime': int_to_bigint(st.st_ctime_ns), + b'mtime': int_to_bigint(st.st_mtime_ns), } if self.numeric_owner: item[b'user'] = item[b'group'] = None @@ -687,13 +683,14 @@ Number of files: {0.stats.nfiles}'''.format(self) def process_stdin(self, path, cache): uid, gid = 0, 0 + t = int_to_bigint(int(time.time()) * 1000000000) item = { b'path': path, b'fd': sys.stdin.buffer, # binary b'mode': 0o100660, # regular file, ug=rw b'uid': uid, b'user': uid2user(uid), b'gid': gid, b'group': gid2group(gid), - b'mtime': int_to_bigint(int(time.time()) * 1000000000) + b'mtime': t, b'atime': t, b'ctime': t, } self.add_item_queued(item) return 'i' # stdin @@ -740,7 +737,6 @@ Number of files: {0.stats.nfiles}'''.format(self) } item.update(self.stat_attrs(st, path)) self.add_item_queued(item) - return status @staticmethod @@ -792,13 +788,18 @@ Number of files: {0.stats.nfiles}'''.format(self) return Archive._open_rb(path, st) +# this set must be kept complete, otherwise the RobustUnpacker might malfunction: +ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', + b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', + b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) + + class RobustUnpacker: """A restartable/robust version of the streaming msgpack unpacker """ - item_keys = [msgpack.packb(name) for name in ('path', 'mode', 'source', 'chunks', 'rdev', 'xattrs', 'user', 'group', 'uid', 'gid', 'mtime')] - def __init__(self, validator): super().__init__() + self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS] self.validator = validator self._buffered_data = [] self._resync = False @@ -857,21 +858,25 @@ class ArchiveChecker: self.error_found = False self.possibly_superseded = set() - def check(self, repository, repair=False, archive=None, last=None): - self.report_progress('Starting archive consistency check...') - self.check_all = archive is None and last is None + def check(self, repository, repair=False, archive=None, last=None, prefix=None, save_space=False): + logger.info('Starting archive consistency check...') + self.check_all = archive is None and last is None and prefix is None self.repair = repair self.repository = repository self.init_chunks() self.key = self.identify_key(repository) if Manifest.MANIFEST_ID not in self.chunks: + logger.error("Repository manifest not found!") + self.error_found = True self.manifest = self.rebuild_manifest() else: self.manifest, _ = Manifest.load(repository, key=self.key) - self.rebuild_refcounts(archive=archive, last=last) + self.rebuild_refcounts(archive=archive, last=last, prefix=prefix) self.orphan_chunks_check() - self.finish() - if not self.error_found: + self.finish(save_space=save_space) + if self.error_found: + logger.error('Archive consistency check complete, problems found.') + else: logger.info('Archive consistency check complete, no problems found.') return self.repair or not self.error_found @@ -891,11 +896,6 @@ class ArchiveChecker: for id_ in result: self.chunks[id_] = (0, 0, 0) - def report_progress(self, msg, error=False): - if error: - self.error_found = True - logger.log(logging.ERROR if error else logging.WARNING, msg) - def identify_key(self, repository): cdata = repository.get(next(self.chunks.iteritems())[0]) return key_factory(repository, cdata) @@ -905,7 +905,7 @@ class ArchiveChecker: Iterates through all objects in the repository looking for archive metadata blocks. """ - self.report_progress('Rebuilding missing manifest, this might take some time...', error=True) + logger.info('Rebuilding missing manifest, this might take some time...') manifest = Manifest(self.key, self.repository) for chunk_id, _ in self.chunks.iteritems(): cdata = self.repository.get(chunk_id) @@ -922,12 +922,12 @@ class ArchiveChecker: except (TypeError, ValueError, StopIteration): continue if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive: - self.report_progress('Found archive ' + archive[b'name'].decode('utf-8'), error=True) + logger.info('Found archive %s', archive[b'name'].decode('utf-8')) manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']} - self.report_progress('Manifest rebuild complete', error=True) + logger.info('Manifest rebuild complete.') return manifest - def rebuild_refcounts(self, archive=None, last=None): + def rebuild_refcounts(self, archive=None, last=None, prefix=None): """Rebuild object reference counts by walking the metadata Missing and/or incorrect data is repaired when detected @@ -966,7 +966,8 @@ class ArchiveChecker: for chunk_id, size, csize in item[b'chunks']: if chunk_id not in self.chunks: # If a file chunk is missing, create an all empty replacement chunk - self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True) + logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size)) + self.error_found = True data = bytes(size) chunk_id = self.key.id_hash(data) cdata = self.key.encrypt(data) @@ -992,64 +993,81 @@ class ArchiveChecker: _state += 1 return _state + def report(msg, chunk_id, chunk_no): + cid = hexlify(chunk_id).decode('ascii') + msg += ' [chunk: %06d_%s]' % (chunk_no, cid) # see debug-dump-archive-items + self.error_found = True + logger.error(msg) + + i = 0 for state, items in groupby(archive[b'items'], missing_chunk_detector): items = list(items) if state % 2: - self.report_progress('Archive metadata damage detected', error=True) + for chunk_id in items: + report('item metadata chunk missing', chunk_id, i) + i += 1 continue if state > 0: unpacker.resync() for chunk_id, cdata in zip(items, repository.get_many(items)): unpacker.feed(self.key.decrypt(chunk_id, cdata)) - for item in unpacker: - if not isinstance(item, dict): - self.report_progress('Did not get expected metadata dict - archive corrupted!', - error=True) - continue - yield item + try: + for item in unpacker: + if isinstance(item, dict): + yield item + else: + report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i) + except Exception: + report('Exception while unpacking item metadata', chunk_id, i) + raise + i += 1 - repository = cache_if_remote(self.repository) if archive is None: # we need last N or all archives archive_items = sorted(self.manifest.archives.items(), reverse=True, key=lambda name_info: name_info[1][b'time']) - num_archives = len(self.manifest.archives) + if prefix is not None: + archive_items = [item for item in archive_items if item[0].startswith(prefix)] + num_archives = len(archive_items) end = None if last is None else min(num_archives, last) else: # we only want one specific archive archive_items = [item for item in self.manifest.archives.items() if item[0] == archive] num_archives = 1 end = 1 - for i, (name, info) in enumerate(archive_items[:end]): - logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) - archive_id = info[b'id'] - if archive_id not in self.chunks: - self.report_progress('Archive metadata block is missing', error=True) - del self.manifest.archives[name] - continue - mark_as_possibly_superseded(archive_id) - cdata = self.repository.get(archive_id) - data = self.key.decrypt(archive_id, cdata) - archive = StableDict(msgpack.unpackb(data)) - if archive[b'version'] != 1: - raise Exception('Unknown archive metadata version') - decode_dict(archive, (b'name', b'hostname', b'username', b'time')) - archive[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in archive[b'cmdline']] - items_buffer = ChunkBuffer(self.key) - items_buffer.write_chunk = add_callback - for item in robust_iterator(archive): - if b'chunks' in item: - verify_file_chunks(item) - items_buffer.add(item) - items_buffer.flush(flush=True) - for previous_item_id in archive[b'items']: - mark_as_possibly_superseded(previous_item_id) - archive[b'items'] = items_buffer.chunks - data = msgpack.packb(archive, unicode_errors='surrogateescape') - new_archive_id = self.key.id_hash(data) - cdata = self.key.encrypt(data) - add_reference(new_archive_id, len(data), len(cdata), cdata) - info[b'id'] = new_archive_id + + with cache_if_remote(self.repository) as repository: + for i, (name, info) in enumerate(archive_items[:end]): + logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) + archive_id = info[b'id'] + if archive_id not in self.chunks: + logger.error('Archive metadata block is missing!') + self.error_found = True + del self.manifest.archives[name] + continue + mark_as_possibly_superseded(archive_id) + cdata = self.repository.get(archive_id) + data = self.key.decrypt(archive_id, cdata) + archive = StableDict(msgpack.unpackb(data)) + if archive[b'version'] != 1: + raise Exception('Unknown archive metadata version') + decode_dict(archive, (b'name', b'hostname', b'username', b'time')) + archive[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in archive[b'cmdline']] + items_buffer = ChunkBuffer(self.key) + items_buffer.write_chunk = add_callback + for item in robust_iterator(archive): + if b'chunks' in item: + verify_file_chunks(item) + items_buffer.add(item) + items_buffer.flush(flush=True) + for previous_item_id in archive[b'items']: + mark_as_possibly_superseded(previous_item_id) + archive[b'items'] = items_buffer.chunks + data = msgpack.packb(archive, unicode_errors='surrogateescape') + new_archive_id = self.key.id_hash(data) + cdata = self.key.encrypt(data) + add_reference(new_archive_id, len(data), len(cdata), cdata) + info[b'id'] = new_archive_id def orphan_chunks_check(self): if self.check_all: @@ -1059,14 +1077,15 @@ class ArchiveChecker: unused.add(id_) orphaned = unused - self.possibly_superseded if orphaned: - self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True) + logger.error('{} orphaned objects found!'.format(len(orphaned))) + self.error_found = True if self.repair: for id_ in unused: self.repository.delete(id_) else: - self.report_progress('Orphaned objects check skipped (needs all archives checked)') + logger.warning('Orphaned objects check skipped (needs all archives checked).') - def finish(self): + def finish(self, save_space=False): if self.repair: self.manifest.write() - self.repository.commit() + self.repository.commit(save_space=save_space) diff --git a/borg/archiver.py b/borg/archiver.py index 71a5ee06..4948a8a8 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1,13 +1,13 @@ -from .support import argparse # see support/__init__.py docstring - # DEPRECATED - remove after requiring py 3.4 - -from binascii import hexlify +from binascii import hexlify, unhexlify from datetime import datetime +from hashlib import sha256 from operator import attrgetter +import argparse import functools import inspect import io import os +import shlex import signal import stat import sys @@ -16,37 +16,55 @@ import traceback from . import __version__ from .helpers import Error, location_validator, format_time, format_file_size, \ - format_file_mode, ExcludePattern, IncludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ - get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ + parse_pattern, PathPrefixPattern, to_localtime, timestamp, \ + get_cache_dir, get_keys_dir, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec, have_cython, \ - EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR + dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ + EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher from .logger import create_logger, setup_logging logger = create_logger() -if have_cython(): - from .compress import Compressor, COMPR_BUFFER - from .upgrader import AtticRepositoryUpgrader - from .repository import Repository - from .cache import Cache - from .key import key_creator +from .compress import Compressor, COMPR_BUFFER +from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader +from .repository import Repository +from .cache import Cache +from .key import key_creator, RepoKey, PassphraseKey from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS -from .remote import RepositoryServer, RemoteRepository +from .remote import RepositoryServer, RemoteRepository, cache_if_remote has_lchflags = hasattr(os, 'lchflags') +# default umask, overriden by --umask, defaults to read/write only for owner +UMASK_DEFAULT = 0o077 + +DASHES = '-' * 78 + + +class ToggleAction(argparse.Action): + """argparse action to handle "toggle" flags easily + + toggle flags are in the form of ``--foo``, ``--no-foo``. + + the ``--no-foo`` argument still needs to be passed to the + ``add_argument()`` call, but it simplifies the ``--no`` + detection. + """ + def __call__(self, parser, ns, values, option): + """set the given flag to true unless ``--no`` is passed""" + setattr(ns, self.dest, not option.startswith('--no-')) + class Archiver: - def __init__(self, verbose=False): + def __init__(self, lock_wait=None): self.exit_code = EXIT_SUCCESS - self.verbose = verbose + self.lock_wait = lock_wait - def open_repository(self, location, create=False, exclusive=False): + def open_repository(self, args, create=False, exclusive=False, lock=True): + location = args.location # note: 'location' must be always present in args if location.proto == 'ssh': - repository = RemoteRepository(location, create=create) + repository = RemoteRepository(location, create=create, lock_wait=self.lock_wait, lock=lock, args=args) else: - repository = Repository(location.path, create=create, exclusive=exclusive) - repository._location = location + repository = Repository(location.path, create=create, exclusive=exclusive, lock_wait=self.lock_wait, lock=lock) return repository def print_error(self, msg, *args): @@ -59,13 +77,9 @@ class Archiver: self.exit_code = EXIT_WARNING # we do not terminate here, so it is a warning logger.warning(msg) - def print_info(self, msg, *args): - if self.verbose: - msg = args and msg % args or msg - logger.info(msg) - - def print_status(self, status, path): - self.print_info("%1s %s", status, remove_surrogates(path)) + def print_file_status(self, status, path): + if self.output_list and (self.output_filter is None or status in self.output_filter): + logger.info("%1s %s", status, remove_surrogates(path)) def do_serve(self, args): """Start in server mode. This command is usually not used manually. @@ -74,76 +88,78 @@ class Archiver: def do_init(self, args): """Initialize an empty repository""" - logger.info('Initializing repository at "%s"' % args.repository.orig) - repository = self.open_repository(args.repository, create=True, exclusive=True) + logger.info('Initializing repository at "%s"' % args.location.canonical_path()) + repository = self.open_repository(args, create=True, exclusive=True) key = key_creator(repository, args) manifest = Manifest(key, repository) manifest.key = key manifest.write() repository.commit() - Cache(repository, key, manifest, warn_if_unencrypted=False) + with Cache(repository, key, manifest, warn_if_unencrypted=False): + pass return self.exit_code def do_check(self, args): """Check repository consistency""" - repository = self.open_repository(args.repository, exclusive=args.repair) + repository = self.open_repository(args, exclusive=args.repair) if args.repair: - while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): - self.print_warning("""'check --repair' is an experimental feature that might result -in data loss. - -Type "Yes I am sure" if you understand this and want to continue.\n""") - if input('Do you want to continue? ') == 'Yes I am sure': - break + msg = ("'check --repair' is an experimental feature that might result in data loss." + + "\n" + + "Type 'YES' if you understand this and want to continue: ") + if not yes(msg, false_msg="Aborting.", truish=('YES', ), + env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): + return EXIT_ERROR if not args.archives_only: - logger.info('Starting repository check...') - if repository.check(repair=args.repair): - logger.info('Repository check complete, no problems found.') - else: + if not repository.check(repair=args.repair, save_space=args.save_space): return EXIT_WARNING if not args.repo_only and not ArchiveChecker().check( - repository, repair=args.repair, archive=args.repository.archive, last=args.last): + repository, repair=args.repair, archive=args.location.archive, + last=args.last, prefix=args.prefix, save_space=args.save_space): return EXIT_WARNING return EXIT_SUCCESS def do_change_passphrase(self, args): """Change repository key file passphrase""" - repository = self.open_repository(args.repository) + repository = self.open_repository(args) manifest, key = Manifest.load(repository) key.change_passphrase() return EXIT_SUCCESS + def do_migrate_to_repokey(self, args): + """Migrate passphrase -> repokey""" + repository = self.open_repository(args) + manifest_data = repository.get(Manifest.MANIFEST_ID) + key_old = PassphraseKey.detect(repository, manifest_data) + key_new = RepoKey(repository) + key_new.target = repository + key_new.repository_id = repository.id + key_new.enc_key = key_old.enc_key + key_new.enc_hmac_key = key_old.enc_hmac_key + key_new.id_key = key_old.id_key + key_new.chunk_seed = key_old.chunk_seed + key_new.change_passphrase() # option to change key protection passphrase, save + return EXIT_SUCCESS + def do_create(self, args): """Create new archive""" - dry_run = args.dry_run - t0 = datetime.now() - if not dry_run: - repository = self.open_repository(args.archive, exclusive=True) - manifest, key = Manifest.load(repository) - compr_args = dict(buffer=COMPR_BUFFER) - compr_args.update(args.compression) - key.compressor = Compressor(**compr_args) - cache = Cache(repository, key, manifest, do_files=args.cache_files) - archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, - create=True, checkpoint_interval=args.checkpoint_interval, - numeric_owner=args.numeric_owner, progress=args.progress, - chunker_params=args.chunker_params, start=t0) - else: - archive = cache = None - try: + matcher = PatternMatcher(fallback=True) + if args.excludes: + matcher.add(args.excludes, False) + + def create_inner(archive, cache): # Add cache dir to inode_skip list skip_inodes = set() try: st = os.stat(get_cache_dir()) skip_inodes.add((st.st_ino, st.st_dev)) - except IOError: + except OSError: pass # Add local repository dir to inode_skip list - if not args.archive.host: + if not args.location.host: try: - st = os.stat(args.archive.path) + st = os.stat(args.location.path) skip_inodes.add((st.st_ino, st.st_dev)) - except IOError: + except OSError: pass for path in args.paths: if path == '-': # stdin @@ -151,12 +167,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if not dry_run: try: status = archive.process_stdin(path, cache) - except IOError as e: + except OSError as e: status = 'E' self.print_warning('%s: %s', path, e) else: status = '-' - self.print_status(status, path) + self.print_file_status(status, path) continue path = os.path.normpath(path) if args.one_file_system: @@ -167,7 +183,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") continue else: restrict_dev = None - self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev, + self._process(archive, cache, matcher, args.exclude_caches, args.exclude_if_present, + args.keep_tag_files, skip_inodes, path, restrict_dev, read_special=args.read_special, dry_run=dry_run) if not dry_run: archive.save(timestamp=args.timestamp) @@ -175,21 +192,42 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") archive.stats.show_progress(final=True) if args.stats: archive.end = datetime.now() - print('-' * 78) - print(str(archive)) - print() - print(str(archive.stats)) - print(str(cache)) - print('-' * 78) - finally: - if not dry_run: - archive.close() + log_multi(DASHES, + str(archive), + DASHES, + str(archive.stats), + str(cache), + DASHES) + + self.output_filter = args.output_filter + self.output_list = args.output_list + dry_run = args.dry_run + t0 = datetime.now() + if not dry_run: + repository = self.open_repository(args, exclusive=True) + manifest, key = Manifest.load(repository) + compr_args = dict(buffer=COMPR_BUFFER) + compr_args.update(args.compression) + key.compressor = Compressor(**compr_args) + with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: + archive = Archive(repository, key, manifest, args.location.archive, cache=cache, + create=True, checkpoint_interval=args.checkpoint_interval, + numeric_owner=args.numeric_owner, progress=args.progress, + chunker_params=args.chunker_params, start=t0) + try: + create_inner(archive, cache) + finally: + archive.close() + else: + create_inner(None, None) return self.exit_code - def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev, + def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present, + keep_tag_files, skip_inodes, path, restrict_dev, read_special=False, dry_run=False): - if exclude_path(path, excludes): + if not matcher.match(path): return + try: st = os.lstat(path) except OSError as e: @@ -204,16 +242,22 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Ignore if nodump flag is set if has_lchflags and (st.st_flags & stat.UF_NODUMP): return - if (stat.S_ISREG(st.st_mode) or - read_special and not stat.S_ISDIR(st.st_mode)): + if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode): if not dry_run: try: status = archive.process_file(path, st, cache) - except IOError as e: + except OSError as e: status = 'E' self.print_warning('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): - if exclude_caches and is_cachedir(path): + tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) + if tag_paths: + if keep_tag_files and not dry_run: + archive.process_dir(path, st) + for tag_path in tag_paths: + self._process(archive, cache, matcher, exclude_caches, exclude_if_present, + keep_tag_files, skip_inodes, tag_path, restrict_dev, + read_special=read_special, dry_run=dry_run) return if not dry_run: status = archive.process_dir(path, st) @@ -225,9 +269,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") else: for filename in sorted(entries): entry_path = os.path.normpath(os.path.join(path, filename)) - self._process(archive, cache, excludes, exclude_caches, skip_inodes, - entry_path, restrict_dev, read_special=read_special, - dry_run=dry_run) + self._process(archive, cache, matcher, exclude_caches, exclude_if_present, + keep_tag_files, skip_inodes, entry_path, restrict_dev, + read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): if not dry_run: status = archive.process_symlink(path, st) @@ -244,44 +288,51 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") self.print_warning('Unknown file type: %s', path) return # Status output - # A lowercase character means a file type other than a regular file, - # borg usually just stores them. E.g. (d)irectory. - # Hardlinks to already seen content are indicated by (h). - # A uppercase character means a regular file that was (A)dded, - # (M)odified or was (U)nchanged. - # Note: A/M/U is relative to the "files" cache, not to the repo. - # This would be an issue if the files cache is not used. if status is None: if not dry_run: status = '?' # need to add a status code somewhere else: status = '-' # dry run, item was not backed up - # output ALL the stuff - it can be easily filtered using grep. - # even stuff considered unchanged might be interesting. - self.print_status(status, path) + self.print_file_status(status, path) def do_extract(self, args): """Extract archive contents""" # be restrictive when restoring files, restore permissions later if sys.getfilesystemencoding() == 'ascii': logger.warning('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.') - repository = self.open_repository(args.archive) + if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )): + logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8') + repository = self.open_repository(args) manifest, key = Manifest.load(repository) - archive = Archive(repository, key, manifest, args.archive.archive, + archive = Archive(repository, key, manifest, args.location.archive, numeric_owner=args.numeric_owner) - patterns = adjust_patterns(args.paths, args.excludes) + + matcher = PatternMatcher() + if args.excludes: + matcher.add(args.excludes, False) + + include_patterns = [] + + if args.paths: + include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths) + matcher.add(include_patterns, True) + + matcher.fallback = not include_patterns + + output_list = args.output_list dry_run = args.dry_run stdout = args.stdout sparse = args.sparse strip_components = args.strip_components dirs = [] - for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True): + for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True): orig_path = item[b'path'] if strip_components: item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:]) if not item[b'path']: continue - self.print_info(remove_surrogates(orig_path)) + if output_list: + logger.info(remove_surrogates(orig_path)) try: if dry_run: archive.extract_item(item, dry_run=True) @@ -291,7 +342,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") archive.extract_item(item, restore_attrs=False) else: archive.extract_item(item, stdout=stdout, sparse=sparse) - except IOError as e: + except OSError as e: self.print_warning('%s: %s', remove_surrogates(orig_path), e) if not args.dry_run: @@ -300,53 +351,57 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # processing time, archive order is not as traversal order on "create". while dirs: archive.extract_item(dirs.pop(-1)) - for pattern in (patterns or []): - if isinstance(pattern, IncludePattern) and pattern.match_count == 0: + for pattern in include_patterns: + if pattern.match_count == 0: self.print_warning("Include pattern '%s' never matched.", pattern) return self.exit_code def do_rename(self, args): """Rename an existing archive""" - repository = self.open_repository(args.archive, exclusive=True) + repository = self.open_repository(args, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) - archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) - archive.rename(args.name) - manifest.write() - repository.commit() - cache.commit() + with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: + archive = Archive(repository, key, manifest, args.location.archive, cache=cache) + archive.rename(args.name) + manifest.write() + repository.commit() + cache.commit() return self.exit_code def do_delete(self, args): """Delete an existing repository or archive""" - repository = self.open_repository(args.target, exclusive=True) + repository = self.open_repository(args, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest, do_files=args.cache_files) - if args.target.archive: - archive = Archive(repository, key, manifest, args.target.archive, cache=cache) - stats = Statistics() - archive.delete(stats) - manifest.write() - repository.commit() - cache.commit() - if args.stats: - logger.info(stats.summary.format(label='Deleted data:', stats=stats)) - logger.info(str(cache)) - else: - if not args.cache_only: - print("You requested to completely DELETE the repository *including* all archives it contains:", file=sys.stderr) - for archive_info in manifest.list_archive_infos(sort_by='ts'): - print(format_archive(archive_info), file=sys.stderr) - if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): - print("""Type "YES" if you understand this and want to continue.\n""", file=sys.stderr) - # XXX: prompt may end up on stdout, but we'll assume that input() does the right thing - if input('Do you want to continue? ') != 'YES': + with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: + if args.location.archive: + archive = Archive(repository, key, manifest, args.location.archive, cache=cache) + stats = Statistics() + archive.delete(stats, progress=args.progress) + manifest.write() + repository.commit(save_space=args.save_space) + cache.commit() + logger.info("Archive deleted.") + if args.stats: + log_multi(DASHES, + stats.summary.format(label='Deleted data:', stats=stats), + str(cache), + DASHES) + else: + if not args.cache_only: + msg = [] + msg.append("You requested to completely DELETE the repository *including* all archives it contains:") + for archive_info in manifest.list_archive_infos(sort_by='ts'): + msg.append(format_archive(archive_info)) + msg.append("Type 'YES' if you understand this and want to continue: ") + msg = '\n'.join(msg) + if not yes(msg, false_msg="Aborting.", truish=('YES', ), + env_var_override='BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'): self.exit_code = EXIT_ERROR return self.exit_code - repository.destroy() - logger.info("Repository deleted.") - cache.destroy() - logger.info("Cache deleted.") + repository.destroy() + logger.info("Repository deleted.") + cache.destroy() + logger.info("Cache deleted.") return self.exit_code def do_mount(self, args): @@ -361,38 +416,38 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") self.print_error('%s: Mountpoint must be a writable directory' % args.mountpoint) return self.exit_code - repository = self.open_repository(args.src) + repository = self.open_repository(args) try: - manifest, key = Manifest.load(repository) - if args.src.archive: - archive = Archive(repository, key, manifest, args.src.archive) - else: - archive = None - operations = FuseOperations(key, repository, manifest, archive) - self.print_info("Mounting filesystem") - try: - operations.mount(args.mountpoint, args.options, args.foreground) - except RuntimeError: - # Relevant error message already printed to stderr by fuse - self.exit_code = EXIT_ERROR + with cache_if_remote(repository) as cached_repo: + manifest, key = Manifest.load(repository) + if args.location.archive: + archive = Archive(repository, key, manifest, args.location.archive) + else: + archive = None + operations = FuseOperations(key, repository, manifest, archive, cached_repo) + logger.info("Mounting filesystem") + try: + operations.mount(args.mountpoint, args.options, args.foreground) + except RuntimeError: + # Relevant error message already printed to stderr by fuse + self.exit_code = EXIT_ERROR finally: repository.close() return self.exit_code def do_list(self, args): """List archive or repository contents""" - repository = self.open_repository(args.src) + repository = self.open_repository(args) manifest, key = Manifest.load(repository) - if args.src.archive: - archive = Archive(repository, key, manifest, args.src.archive) + if args.location.archive: + archive = Archive(repository, key, manifest, args.location.archive) if args.short: for item in archive.iter_items(): print(remove_surrogates(item[b'path'])) else: - tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'} for item in archive.iter_items(): - type = tmap.get(item[b'mode'] // 4096, '?') - mode = format_file_mode(item[b'mode']) + mode = stat.filemode(item[b'mode']) + type = mode[0] size = 0 if type == '-': try: @@ -401,53 +456,57 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") pass try: mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9) - except ValueError: + except OverflowError: # likely a broken mtime and datetime did not want to go beyond year 9999 mtime = datetime(9999, 12, 31, 23, 59, 59) if b'source' in item: if type == 'l': extra = ' -> %s' % item[b'source'] else: - type = 'h' + mode = 'h' + mode[1:] extra = ' link to %s' % item[b'source'] else: extra = '' - print('%s%s %-6s %-6s %8d %s %s%s' % ( - type, mode, item[b'user'] or item[b'uid'], + print('%s %-6s %-6s %8d %s %s%s' % ( + mode, item[b'user'] or item[b'uid'], item[b'group'] or item[b'gid'], size, format_time(mtime), remove_surrogates(item[b'path']), extra)) else: for archive_info in manifest.list_archive_infos(sort_by='ts'): - print(format_archive(archive_info)) + if args.prefix and not archive_info.name.startswith(args.prefix): + continue + if args.short: + print(archive_info.name) + else: + print(format_archive(archive_info)) return self.exit_code def do_info(self, args): """Show archive details such as disk space used""" - repository = self.open_repository(args.archive) + repository = self.open_repository(args) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest, do_files=args.cache_files) - archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) - stats = archive.calc_stats(cache) - print('Name:', archive.name) - print('Fingerprint: %s' % hexlify(archive.id).decode('ascii')) - print('Hostname:', archive.metadata[b'hostname']) - print('Username:', archive.metadata[b'username']) - print('Time: %s' % to_localtime(archive.ts).strftime('%c')) - print('Command line:', remove_surrogates(' '.join(archive.metadata[b'cmdline']))) - print('Number of files: %d' % stats.nfiles) - print() - print(str(stats)) - print(str(cache)) + with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: + archive = Archive(repository, key, manifest, args.location.archive, cache=cache) + stats = archive.calc_stats(cache) + print('Name:', archive.name) + print('Fingerprint: %s' % hexlify(archive.id).decode('ascii')) + print('Hostname:', archive.metadata[b'hostname']) + print('Username:', archive.metadata[b'username']) + print('Time: %s' % format_time(to_localtime(archive.ts))) + print('Command line:', remove_surrogates(' '.join(archive.metadata[b'cmdline']))) + print('Number of files: %d' % stats.nfiles) + print() + print(str(stats)) + print(str(cache)) return self.exit_code def do_prune(self, args): """Prune repository archives according to specified rules""" - repository = self.open_repository(args.repository, exclusive=True) + repository = self.open_repository(args, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest, do_files=args.cache_files) archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None: - self.print_error('At least one of the "within", "keep-hourly", "keep-daily", "keep-weekly", ' + self.print_error('At least one of the "keep-within", "keep-hourly", "keep-daily", "keep-weekly", ' '"keep-monthly" or "keep-yearly" settings must be specified') return self.exit_code if args.prefix: @@ -469,21 +528,24 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") keep.sort(key=attrgetter('ts'), reverse=True) to_delete = [a for a in archives if a not in keep] stats = Statistics() - for archive in keep: - self.print_info('Keeping archive: %s' % format_archive(archive)) - for archive in to_delete: - if args.dry_run: - self.print_info('Would prune: %s' % format_archive(archive)) - else: - self.print_info('Pruning archive: %s' % format_archive(archive)) - Archive(repository, key, manifest, archive.name, cache).delete(stats) - if to_delete and not args.dry_run: - manifest.write() - repository.commit() - cache.commit() - if args.stats: - logger.info(stats.summary.format(label='Deleted data:', stats=stats)) - logger.info(str(cache)) + with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: + for archive in keep: + logger.info('Keeping archive: %s' % format_archive(archive)) + for archive in to_delete: + if args.dry_run: + logger.info('Would prune: %s' % format_archive(archive)) + else: + logger.info('Pruning archive: %s' % format_archive(archive)) + Archive(repository, key, manifest, archive.name, cache).delete(stats) + if to_delete and not args.dry_run: + manifest.write() + repository.commit(save_space=args.save_space) + cache.commit() + if args.stats: + log_multi(DASHES, + stats.summary.format(label='Deleted data:', stats=stats), + str(cache), + DASHES) return self.exit_code def do_upgrade(self, args): @@ -497,26 +559,154 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # to be implemented. # XXX: should auto-detect if it is an attic repository here - repo = AtticRepositoryUpgrader(args.repository.path, create=False) + repo = AtticRepositoryUpgrader(args.location.path, create=False) try: - repo.upgrade(args.dry_run, inplace=args.inplace) + repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress) + except NotImplementedError as e: + print("warning: %s" % e) + repo = BorgRepositoryUpgrader(args.location.path, create=False) + try: + repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress) except NotImplementedError as e: print("warning: %s" % e) return self.exit_code + def do_debug_dump_archive_items(self, args): + """dump (decrypted, decompressed) archive items metadata (not: data)""" + repository = self.open_repository(args) + manifest, key = Manifest.load(repository) + archive = Archive(repository, key, manifest, args.location.archive) + for i, item_id in enumerate(archive.metadata[b'items']): + data = key.decrypt(item_id, repository.get(item_id)) + filename = '%06d_%s.items' % (i, hexlify(item_id).decode('ascii')) + print('Dumping', filename) + with open(filename, 'wb') as fd: + fd.write(data) + print('Done.') + return EXIT_SUCCESS + + def do_debug_get_obj(self, args): + """get object contents from the repository and write it into file""" + repository = self.open_repository(args) + manifest, key = Manifest.load(repository) + hex_id = args.id + try: + id = unhexlify(hex_id) + except ValueError: + print("object id %s is invalid." % hex_id) + else: + try: + data = repository.get(id) + except repository.ObjectNotFound: + print("object %s not found." % hex_id) + else: + with open(args.path, "wb") as f: + f.write(data) + print("object %s fetched." % hex_id) + return EXIT_SUCCESS + + def do_debug_put_obj(self, args): + """put file(s) contents into the repository""" + repository = self.open_repository(args) + manifest, key = Manifest.load(repository) + for path in args.paths: + with open(path, "rb") as f: + data = f.read() + h = sha256(data) # XXX hardcoded + repository.put(h.digest(), data) + print("object %s put." % h.hexdigest()) + repository.commit() + return EXIT_SUCCESS + + def do_debug_delete_obj(self, args): + """delete the objects with the given IDs from the repo""" + repository = self.open_repository(args) + manifest, key = Manifest.load(repository) + modified = False + for hex_id in args.ids: + try: + id = unhexlify(hex_id) + except ValueError: + print("object id %s is invalid." % hex_id) + else: + try: + repository.delete(id) + modified = True + print("object %s deleted." % hex_id) + except repository.ObjectNotFound: + print("object %s not found." % hex_id) + if modified: + repository.commit() + print('Done.') + return EXIT_SUCCESS + + def do_break_lock(self, args): + """Break the repository lock (e.g. in case it was left by a dead borg.""" + repository = self.open_repository(args, lock=False) + try: + repository.break_lock() + Cache.break_lock(repository) + finally: + repository.close() + return self.exit_code + helptext = {} - helptext['patterns'] = ''' - Exclude patterns use a variant of shell pattern syntax, with '*' matching any - number of characters, '?' matching any single character, '[...]' matching any - single character specified, including ranges, and '[!...]' matching any - character not specified. For the purpose of these patterns, the path - separator ('\\' for Windows and '/' on other systems) is not treated - specially. For a path to match a pattern, it must completely match from - start to end, or must match from the start to just before a path separator. - Except for the root path, paths will never end in the path separator when - matching is attempted. Thus, if a given pattern ends in a path separator, a - '*' is appended before matching is attempted. Patterns with wildcards should - be quoted to protect them from shell expansion. + helptext['patterns'] = textwrap.dedent(''' + Exclusion patterns support four separate styles, fnmatch, shell, regular + expressions and path prefixes. If followed by a colon (':') the first two + characters of a pattern are used as a style selector. Explicit style + selection is necessary when a non-default style is desired or when the + desired pattern starts with two alphanumeric characters followed by a colon + (i.e. `aa:something/*`). + + `Fnmatch `_, selector `fm:` + + These patterns use a variant of shell pattern syntax, with '*' matching + any number of characters, '?' matching any single character, '[...]' + matching any single character specified, including ranges, and '[!...]' + matching any character not specified. For the purpose of these patterns, + the path separator ('\\' for Windows and '/' on other systems) is not + treated specially. Wrap meta-characters in brackets for a literal match + (i.e. `[?]` to match the literal character `?`). For a path to match + a pattern, it must completely match from start to end, or must match from + the start to just before a path separator. Except for the root path, + paths will never end in the path separator when matching is attempted. + Thus, if a given pattern ends in a path separator, a '*' is appended + before matching is attempted. + + Shell-style patterns, selector `sh:` + + Like fnmatch patterns these are similar to shell patterns. The difference + is that the pattern may include `**/` for matching zero or more directory + levels, `*` for matching zero or more arbitrary characters with the + exception of any path separator. + + Regular expressions, selector `re:` + + Regular expressions similar to those found in Perl are supported. Unlike + shell patterns regular expressions are not required to match the complete + path and any substring match is sufficient. It is strongly recommended to + anchor patterns to the start ('^'), to the end ('$') or both. Path + separators ('\\' for Windows and '/' on other systems) in paths are + always normalized to a forward slash ('/') before applying a pattern. The + regular expression syntax is described in the `Python documentation for + the re module `_. + + Prefix path, selector `pp:` + + This pattern style is useful to match whole sub-directories. The pattern + `pp:/data/bar` matches `/data/bar` and everything therein. + + Exclusions can be passed via the command line option `--exclude`. When used + from within a shell the patterns should be quoted to protect them from + expansion. + + The `--exclude-from` option permits loading exclusion patterns from a text + file with one pattern per line. Lines empty or starting with the number sign + ('#') after removing whitespace on both ends are ignored. The optional style + selector prefix is also supported for patterns loaded from a file. Due to + whitespace removal paths with whitespace at the beginning or end can only be + excluded using regular expressions. Examples: @@ -532,7 +722,22 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # The file '/home/user/cache/important' is *not* backed up: $ borg create -e /home/user/cache/ backup / /home/user/cache/important - ''' + + # The contents of directories in '/home' are not backed up when their name + # ends in '.tmp' + $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / + + # Load exclusions from file + $ cat >exclude.txt <malloc(size) - if not key: - raise MemoryError - try: - rv = PKCS5_PBKDF2_HMAC(password, len(password), salt, len(salt), iterations, EVP_sha256(), size, key) - if not rv: - raise Exception('PKCS5_PBKDF2_HMAC failed') - return key[:size] - finally: - free(key) - - -def get_random_bytes(n): - """Return n cryptographically strong pseudo-random bytes - """ - cdef unsigned char *buf = malloc(n) - if not buf: - raise MemoryError - try: - if RAND_bytes(buf, n) < 1: - raise Exception('RAND_bytes failed') - return buf[:n] - finally: - free(buf) - - cdef class AES: """A thin wrapper around the OpenSSL EVP cipher API """ diff --git a/borg/fuse.py b/borg/fuse.py index 417811fe..c726a563 100644 --- a/borg/fuse.py +++ b/borg/fuse.py @@ -7,11 +7,9 @@ import stat import tempfile import time from .archive import Archive -from .helpers import daemonize, have_cython -from .remote import cache_if_remote +from .helpers import daemonize -if have_cython(): - import msgpack +import msgpack # Does this version of llfuse support ns precision? have_fuse_xtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns') @@ -29,17 +27,17 @@ class ItemCache: def get(self, inode): self.fd.seek(inode - self.offset, io.SEEK_SET) - return next(msgpack.Unpacker(self.fd)) + return next(msgpack.Unpacker(self.fd, read_size=1024)) class FuseOperations(llfuse.Operations): """Export archive as a fuse filesystem """ - def __init__(self, key, repository, manifest, archive): + def __init__(self, key, repository, manifest, archive, cached_repo): super().__init__() self._inode_count = 0 self.key = key - self.repository = cache_if_remote(repository) + self.repository = cached_repo self.items = {} self.parent = {} self.contents = defaultdict(dict) @@ -175,7 +173,7 @@ class FuseOperations(llfuse.Operations): try: return item.get(b'xattrs', {})[name] except KeyError: - raise llfuse.FUSEError(errno.ENODATA) + raise llfuse.FUSEError(errno.ENODATA) from None def _load_pending_archive(self, inode): # Check if this is an archive we need to load @@ -211,7 +209,7 @@ class FuseOperations(llfuse.Operations): continue n = min(size, s - offset) chunk = self.key.decrypt(id, self.repository.get(id)) - parts.append(chunk[offset:offset+n]) + parts.append(chunk[offset:offset + n]) offset = 0 size -= n if not size: diff --git a/borg/hash_sizes.py b/borg/hash_sizes.py new file mode 100644 index 00000000..68e6e160 --- /dev/null +++ b/borg/hash_sizes.py @@ -0,0 +1,103 @@ +""" +Compute hashtable sizes with nices properties +- prime sizes (for small to medium sizes) +- 2 prime-factor sizes (for big sizes) +- fast growth for small sizes +- slow growth for big sizes + +Note: + this is just a tool for developers. + within borgbackup, it is just used to generate hash_sizes definition for _hashindex.c. +""" + +from collections import namedtuple + +K, M, G = 2**10, 2**20, 2**30 + +# hash table size (in number of buckets) +start, end_p1, end_p2 = 1 * K, 127 * M, 2 * G - 10 * M # stay well below 2^31 - 1 + +Policy = namedtuple("Policy", "upto grow") + +policies = [ + # which growth factor to use when growing a hashtable of size < upto + # grow fast (*2.0) at the start so we do not have to resize too often (expensive). + # grow slow (*1.1) for huge hash tables (do not jump too much in memory usage) + Policy(256*K, 2.0), + Policy(2*M, 1.7), + Policy(16*M, 1.4), + Policy(128*M, 1.2), + Policy(2*G-1, 1.1), +] + + +# slightly modified version of: +# http://www.macdevcenter.com/pub/a/python/excerpt/pythonckbk_chap1/index1.html?page=2 +def eratosthenes(): + """Yields the sequence of prime numbers via the Sieve of Eratosthenes.""" + D = {} # map each composite integer to its first-found prime factor + q = 2 # q gets 2, 3, 4, 5, ... ad infinitum + while True: + p = D.pop(q, None) + if p is None: + # q not a key in D, so q is prime, therefore, yield it + yield q + # mark q squared as not-prime (with q as first-found prime factor) + D[q * q] = q + else: + # let x <- smallest (N*p)+q which wasn't yet known to be composite + # we just learned x is composite, with p first-found prime factor, + # since p is the first-found prime factor of q -- find and mark it + x = p + q + while x in D: + x += p + D[x] = p + q += 1 + + +def two_prime_factors(pfix=65537): + """Yields numbers with 2 prime factors pfix and p.""" + for p in eratosthenes(): + yield pfix * p + + +def get_grow_factor(size): + for p in policies: + if size < p.upto: + return p.grow + + +def find_bigger_prime(gen, i): + while True: + p = next(gen) + if p >= i: + return p + + +def main(): + sizes = [] + i = start + + gen = eratosthenes() + while i < end_p1: + grow_factor = get_grow_factor(i) + p = find_bigger_prime(gen, i) + sizes.append(p) + i = int(i * grow_factor) + + gen = two_prime_factors() # for lower ram consumption + while i < end_p2: + grow_factor = get_grow_factor(i) + p = find_bigger_prime(gen, i) + sizes.append(p) + i = int(i * grow_factor) + + print("""\ +static int hash_sizes[] = { + %s +}; +""" % ', '.join(str(size) for size in sizes)) + + +if __name__ == '__main__': + main() diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 0b4dc260..5fc8d6e4 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -15,6 +15,7 @@ cdef extern from "_hashindex.c": long long *unique_size, long long *unique_csize, long long *total_unique_chunks, long long *total_chunks) void hashindex_merge(HashIndex *index, HashIndex *other) + void hashindex_add(HashIndex *index, void *key, void *value) int hashindex_get_size(HashIndex *index) int hashindex_write(HashIndex *index, char *path) void *hashindex_get(HashIndex *index, void *key) @@ -196,6 +197,14 @@ cdef class ChunkIndex(IndexBase): &total_unique_chunks, &total_chunks) return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks + def add(self, key, refs, size, csize): + assert len(key) == self.key_size + cdef int[3] data + data[0] = _htole32(refs) + data[1] = _htole32(size) + data[2] = _htole32(csize) + hashindex_add(self.index, key, data) + def merge(self, ChunkIndex other): hashindex_merge(self.index, other.index) diff --git a/borg/helpers.py b/borg/helpers.py index 48519db0..3fb68595 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,5 +1,4 @@ -from .support import argparse # see support/__init__.py docstring, DEPRECATED - remove after requiring py 3.4 - +import argparse import binascii from collections import namedtuple from functools import wraps @@ -8,41 +7,27 @@ import os import pwd import queue import re -try: - from shutil import get_terminal_size -except ImportError: - def get_terminal_size(fallback=(80, 24)): - TerminalSize = namedtuple('TerminalSize', ['columns', 'lines']) - return TerminalSize(int(os.environ.get('COLUMNS', fallback[0])), int(os.environ.get('LINES', fallback[1]))) +from shutil import get_terminal_size import sys +import platform import time import unicodedata +import logging +from .logger import create_logger +logger = create_logger() + from datetime import datetime, timezone, timedelta from fnmatch import translate from operator import attrgetter - -def have_cython(): - """allow for a way to disable Cython includes - - this is used during usage docs build, in setup.py. It is to avoid - loading the Cython libraries which are built, but sometimes not in - the search path (namely, during Tox runs). - - we simply check an environment variable (``BORG_CYTHON_DISABLE``) - which, when set (to anything) will disable includes of Cython - libraries in key places to enable usage docs to be built. - - :returns: True if Cython is available, False otherwise. - """ - return not os.environ.get('BORG_CYTHON_DISABLE') - -if have_cython(): - from . import hashindex - from . import chunker - from . import crypto - import msgpack +from . import __version__ as borg_version +from . import hashindex +from . import chunker +from . import crypto +from . import shellpattern +import msgpack +import msgpack.fallback # return codes returned by borg command @@ -61,12 +46,19 @@ class Error(Exception): # exception handler (that exits short after with the given exit_code), # it is always a (fatal and abrupt) EXIT_ERROR, never just a warning. exit_code = EXIT_ERROR + # show a traceback? + traceback = False def get_message(self): return type(self).__doc__.format(*self.args) -class IntegrityError(Error): +class ErrorWithTraceback(Error): + """like Error, but show a traceback also""" + traceback = True + + +class IntegrityError(ErrorWithTraceback): """Data integrity error""" @@ -140,7 +132,7 @@ class Manifest: def prune_within(archives, within): - multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365} + multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365} try: hours = int(within[:-1]) * multiplier[within[-1]] except (KeyError, ValueError): @@ -148,7 +140,7 @@ def prune_within(archives, within): raise argparse.ArgumentTypeError('Unable to parse --within option: "%s"' % within) if hours <= 0: raise argparse.ArgumentTypeError('Number specified using --within option must be positive') - target = datetime.now(timezone.utc) - timedelta(seconds=hours*60*60) + target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600) return [a for a in archives if a.ts > target] @@ -172,6 +164,7 @@ class Statistics: def __init__(self): self.osize = self.csize = self.usize = self.nfiles = 0 + self.last_progress = 0 # timestamp when last progress was shown def update(self, size, csize, unique): self.osize += size @@ -201,25 +194,27 @@ class Statistics: def csize_fmt(self): return format_file_size(self.csize) - def show_progress(self, item=None, final=False, stream=None): - columns, lines = get_terminal_size() - if not final: - msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) - path = remove_surrogates(item[b'path']) if item else '' - space = columns - len(msg) - if space < len('...') + len(path): - path = '%s...%s' % (path[:(space//2)-len('...')], path[-space//2:]) - msg += "{0:<{space}}".format(path, space=space) - else: - msg = ' ' * columns - print(msg, file=stream or sys.stderr, end="\r") - (stream or sys.stderr).flush() + def show_progress(self, item=None, final=False, stream=None, dt=None): + now = time.time() + if dt is None or now - self.last_progress > dt: + self.last_progress = now + columns, lines = get_terminal_size() + if not final: + msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) + path = remove_surrogates(item[b'path']) if item else '' + space = columns - len(msg) + if space < len('...') + len(path): + path = '%s...%s' % (path[:(space // 2) - len('...')], path[-space // 2:]) + msg += "{0:<{space}}".format(path, space=space) + else: + msg = ' ' * columns + print(msg, file=stream or sys.stderr, end="\r", flush=True) def get_keys_dir(): """Determine where to repository keys and cache""" - return os.environ.get('BORG_KEYS_DIR', - os.path.join(os.path.expanduser('~'), '.borg', 'keys')) + xdg_config = os.environ.get('XDG_CONFIG_HOME', os.path.join(os.path.expanduser('~'), '.config')) + return os.environ.get('BORG_KEYS_DIR', os.path.join(xdg_config, 'borg', 'keys')) def get_cache_dir(): @@ -235,47 +230,50 @@ def to_localtime(ts): def parse_timestamp(timestamp): """Parse a ISO 8601 timestamp string""" - if '.' in timestamp: # microseconds might not be pressent + if '.' in timestamp: # microseconds might not be present return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f').replace(tzinfo=timezone.utc) else: return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) +def load_excludes(fh): + """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on + both line ends are ignored. + """ + patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#')) + return [parse_pattern(pattern) for pattern in patterns if pattern] + + def update_excludes(args): - """Merge exclude patterns from files with those on command line. - Empty lines and lines starting with '#' are ignored, but whitespace - is not stripped.""" + """Merge exclude patterns from files with those on command line.""" if hasattr(args, 'exclude_files') and args.exclude_files: if not hasattr(args, 'excludes') or args.excludes is None: args.excludes = [] for file in args.exclude_files: - patterns = [line.rstrip('\r\n') for line in file if not line.startswith('#')] - args.excludes += [ExcludePattern(pattern) for pattern in patterns if pattern] + args.excludes += load_excludes(file) file.close() -def adjust_patterns(paths, excludes): - if paths: - return (excludes or []) + [IncludePattern(path) for path in paths] + [ExcludePattern('*')] - else: - return excludes +class PatternMatcher: + def __init__(self, fallback=None): + self._items = [] + # Value to return from match function when none of the patterns match. + self.fallback = fallback -def exclude_path(path, patterns): - """Used by create and extract sub-commands to determine - whether or not an item should be processed. - """ - for pattern in (patterns or []): - if pattern.match(path): - return isinstance(pattern, ExcludePattern) - return False + def add(self, patterns, value): + """Add list of patterns to internal list. The given value is returned from the match function when one of the + given patterns matches. + """ + self._items.extend((i, value) for i in patterns) + def match(self, path): + for (pattern, value) in self._items: + if pattern.match(path): + return value + + return self.fallback -# For both IncludePattern and ExcludePattern, we require that -# the pattern either match the whole path or an initial segment -# of the path up to but not including a path separator. To -# unify the two cases, we add a path separator to the end of -# the path before matching. def normalized(func): """ Decorator for the Pattern match methods, returning a wrapper that @@ -295,12 +293,11 @@ def normalized(func): return func -class IncludePattern: - """Literal files or directories listed on the command line - for some operations (e.g. extract, but not create). - If a directory is specified, all paths that start with that - path match as well. A trailing slash makes no difference. +class PatternBase: + """Shared logic for inclusion/exclusion patterns. """ + PREFIX = NotImplemented + def __init__(self, pattern): self.pattern_orig = pattern self.match_count = 0 @@ -308,13 +305,15 @@ class IncludePattern: if sys.platform in ('darwin',): pattern = unicodedata.normalize("NFD", pattern) - self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep + self._prepare(pattern) @normalized def match(self, path): - matches = (path+os.path.sep).startswith(self.pattern) + matches = self._match(path) + if matches: self.match_count += 1 + return matches def __repr__(self): @@ -323,39 +322,117 @@ class IncludePattern: def __str__(self): return self.pattern_orig + def _prepare(self, pattern): + raise NotImplementedError -class ExcludePattern(IncludePattern): + def _match(self, path): + raise NotImplementedError + + +# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path +# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path +# separator to the end of the path before matching. + + +class PathPrefixPattern(PatternBase): + """Literal files or directories listed on the command line + for some operations (e.g. extract, but not create). + If a directory is specified, all paths that start with that + path match as well. A trailing slash makes no difference. + """ + PREFIX = "pp" + + def _prepare(self, pattern): + self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + + def _match(self, path): + return (path + os.path.sep).startswith(self.pattern) + + +class FnmatchPattern(PatternBase): """Shell glob patterns to exclude. A trailing slash means to exclude the contents of a directory, but not the directory itself. """ - def __init__(self, pattern): - self.pattern_orig = pattern - self.match_count = 0 + PREFIX = "fm" + def _prepare(self, pattern): if pattern.endswith(os.path.sep): - self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep + pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep else: - self.pattern = os.path.normpath(pattern)+os.path.sep+'*' + pattern = os.path.normpath(pattern) + os.path.sep + '*' - if sys.platform in ('darwin',): - self.pattern = unicodedata.normalize("NFD", self.pattern) + self.pattern = pattern # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) - @normalized - def match(self, path): - matches = self.regex.match(path+os.path.sep) is not None - if matches: - self.match_count += 1 - return matches + def _match(self, path): + return (self.regex.match(path + os.path.sep) is not None) - def __repr__(self): - return '%s(%s)' % (type(self), self.pattern) - def __str__(self): - return self.pattern_orig +class ShellPattern(PatternBase): + """Shell glob patterns to exclude. A trailing slash means to + exclude the contents of a directory, but not the directory itself. + """ + PREFIX = "sh" + + def _prepare(self, pattern): + sep = os.path.sep + + if pattern.endswith(sep): + pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep + else: + pattern = os.path.normpath(pattern) + sep + "**" + sep + "*" + + self.pattern = pattern + self.regex = re.compile(shellpattern.translate(self.pattern)) + + def _match(self, path): + return (self.regex.match(path + os.path.sep) is not None) + + +class RegexPattern(PatternBase): + """Regular expression to exclude. + """ + PREFIX = "re" + + def _prepare(self, pattern): + self.pattern = pattern + self.regex = re.compile(pattern) + + def _match(self, path): + # Normalize path separators + if os.path.sep != '/': + path = path.replace(os.path.sep, '/') + + return (self.regex.search(path) is not None) + + +_PATTERN_STYLES = set([ + FnmatchPattern, + PathPrefixPattern, + RegexPattern, + ShellPattern, +]) + +_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES) + + +def parse_pattern(pattern, fallback=FnmatchPattern): + """Read pattern from string and return an instance of the appropriate implementation class. + """ + if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum(): + (style, pattern) = (pattern[:2], pattern[3:]) + + cls = _PATTERN_STYLE_BY_PREFIX.get(style, None) + + if cls is None: + raise ValueError("Unknown pattern style: {}".format(style)) + else: + cls = fallback + + return cls(pattern) def timestamp(s): @@ -392,34 +469,24 @@ def CompressionSpec(s): count = len(values) if count < 1: raise ValueError - compression = values[0] - try: - compression = int(compression) - if count > 1: - raise ValueError - # DEPRECATED: it is just --compression N - if 0 <= compression <= 9: - return dict(name='zlib', level=compression) - raise ValueError - except ValueError: - # --compression algo[,...] - name = compression - if name in ('none', 'lz4', ): - return dict(name=name) - if name in ('zlib', 'lzma', ): - if count < 2: - level = 6 # default compression level in py stdlib - elif count == 2: - level = int(values[1]) - if not 0 <= level <= 9: - raise ValueError - else: + # --compression algo[,level] + name = values[0] + if name in ('none', 'lz4', ): + return dict(name=name) + if name in ('zlib', 'lzma', ): + if count < 2: + level = 6 # default compression level in py stdlib + elif count == 2: + level = int(values[1]) + if not 0 <= level <= 9: raise ValueError - return dict(name=name, level=level) - raise ValueError + else: + raise ValueError + return dict(name=name, level=level) + raise ValueError -def is_cachedir(path): +def dir_is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to the CACHEDIR.TAG protocol @@ -439,13 +506,27 @@ def is_cachedir(path): return False -def format_time(t): - """Format datetime suitable for fixed length list output +def dir_is_tagged(path, exclude_caches, exclude_if_present): + """Determines whether the specified path is excluded by being a cache + directory or containing user-specified tag files. Returns a list of the + paths of the tag files (either CACHEDIR.TAG or the matching + user-specified files). """ - if abs((datetime.now() - t).days) < 365: - return t.strftime('%b %d %H:%M') - else: - return t.strftime('%b %d %Y') + tag_paths = [] + if exclude_caches and dir_is_cachedir(path): + tag_paths.append(os.path.join(path, 'CACHEDIR.TAG')) + if exclude_if_present is not None: + for tag in exclude_if_present: + tag_path = os.path.join(path, tag) + if os.path.isfile(tag_path): + tag_paths.append(tag_path) + return tag_paths + + +def format_time(t): + """use ISO-8601 date and time format + """ + return t.strftime('%a, %Y-%m-%d %H:%M:%S') def format_timedelta(td): @@ -466,15 +547,6 @@ def format_timedelta(td): return txt -def format_file_mode(mod): - """Format file mode bits for list output - """ - def x(v): - return ''.join(v & m and s or '-' - for m, s in ((4, 'r'), (2, 'w'), (1, 'x'))) - return '%s%s%s' % (x(mod // 64), x(mod // 8), x(mod)) - - def format_file_size(v, precision=2): """Format file size into a human friendly format """ @@ -501,7 +573,7 @@ def sizeof_fmt_decimal(num, suffix='B', sep='', precision=2): def format_archive(archive): - return '%-36s %s' % (archive.name, to_localtime(archive.ts).strftime('%c')) + return '%-36s %s' % (archive.name, format_time(to_localtime(archive.ts))) def memoize(function): @@ -620,33 +692,34 @@ class Location: self.user = m.group('user') self.host = m.group('host') self.port = m.group('port') and int(m.group('port')) or None - self.path = m.group('path') + self.path = os.path.normpath(m.group('path')) self.archive = m.group('archive') return True m = self.file_re.match(text) if m: self.proto = m.group('proto') - self.path = m.group('path') + self.path = os.path.normpath(m.group('path')) self.archive = m.group('archive') return True m = self.scp_re.match(text) if m: self.user = m.group('user') self.host = m.group('host') - self.path = m.group('path') + self.path = os.path.normpath(m.group('path')) self.archive = m.group('archive') self.proto = self.host and 'ssh' or 'file' return True return False def __str__(self): - items = [] - items.append('proto=%r' % self.proto) - items.append('user=%r' % self.user) - items.append('host=%r' % self.host) - items.append('port=%r' % self.port) - items.append('path=%r' % self.path) - items.append('archive=%r' % self.archive) + items = [ + 'proto=%r' % self.proto, + 'user=%r' % self.user, + 'host=%r' % self.host, + 'port=%r' % self.port, + 'path=%r' % self.path, + 'archive=%r' % self.archive, + ] return ', '.join(items) def to_key_filename(self): @@ -679,7 +752,7 @@ def location_validator(archive=None): try: loc = Location(text) except ValueError: - raise argparse.ArgumentTypeError('Invalid location format: "%s"' % text) + raise argparse.ArgumentTypeError('Invalid location format: "%s"' % text) from None if archive is True and not loc.archive: raise argparse.ArgumentTypeError('"%s": No archive specified' % text) elif archive is False and loc.archive: @@ -688,19 +761,6 @@ def location_validator(archive=None): return validator -def read_msgpack(filename): - with open(filename, 'rb') as fd: - return msgpack.unpack(fd) - - -def write_msgpack(filename, d): - with open(filename + '.tmp', 'wb') as fd: - msgpack.pack(d, fd) - fd.flush() - os.fsync(fd.fileno()) - os.rename(filename + '.tmp', filename) - - def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'): for key in keys: if isinstance(d.get(key), bytes): @@ -749,35 +809,6 @@ class StableDict(dict): return sorted(super().items()) -if sys.version < '3.3': - # st_xtime_ns attributes only available in 3.3+ - def st_atime_ns(st): - return int(st.st_atime * 1e9) - - def st_ctime_ns(st): - return int(st.st_ctime * 1e9) - - def st_mtime_ns(st): - return int(st.st_mtime * 1e9) - - # unhexlify in < 3.3 incorrectly only accepts bytes input - def unhexlify(data): - if isinstance(data, str): - data = data.encode('ascii') - return binascii.unhexlify(data) -else: - def st_atime_ns(st): - return st.st_atime_ns - - def st_ctime_ns(st): - return st.st_ctime_ns - - def st_mtime_ns(st): - return st.st_mtime_ns - - unhexlify = binascii.unhexlify - - def bigint_to_int(mtime): """Convert bytearray to int """ @@ -868,3 +899,187 @@ def make_queue(name, maxsize=0, debug=QUEUE_DEBUG): return DebugQueue(name, maxsize) else: return queue.Queue(maxsize) + + +def is_slow_msgpack(): + return msgpack.Packer is msgpack.fallback.Packer + + +FALSISH = ('No', 'NO', 'no', 'N', 'n', '0', ) +TRUISH = ('Yes', 'YES', 'yes', 'Y', 'y', '1', ) +DEFAULTISH = ('Default', 'DEFAULT', 'default', 'D', 'd', '', ) + + +def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, + retry_msg=None, invalid_msg=None, env_msg=None, + falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH, + default=False, retry=True, env_var_override=None, ofile=None, input=input): + """ + Output (usually a question) and let user input an answer. + Qualifies the answer according to falsish, truish and defaultish as True, False or . + If it didn't qualify and retry_msg is None (no retries wanted), + return the default [which defaults to False]. Otherwise let user retry + answering until answer is qualified. + + If env_var_override is given and this var is present in the environment, do not ask + the user, but just use the env var contents as answer as if it was typed in. + Otherwise read input from stdin and proceed as normal. + If EOF is received instead an input or an invalid input without retry possibility, + return default. + + :param msg: introducing message to output on ofile, no \n is added [None] + :param retry_msg: retry message to output on ofile, no \n is added [None] + :param false_msg: message to output before returning False [None] + :param true_msg: message to output before returning True [None] + :param default_msg: message to output before returning a [None] + :param invalid_msg: message to output after a invalid answer was given [None] + :param env_msg: message to output when using input from env_var_override [None], + needs to have 2 placeholders for answer and env var name, e.g.: "{} (from {})" + :param falsish: sequence of answers qualifying as False + :param truish: sequence of answers qualifying as True + :param defaultish: sequence of answers qualifying as + :param default: default return value (defaultish answer was given or no-answer condition) [False] + :param retry: if True and input is incorrect, retry. Otherwise return default. [True] + :param env_var_override: environment variable name [None] + :param ofile: output stream [sys.stderr] + :param input: input function [input from builtins] + :return: boolean answer value, True or False + """ + # note: we do not assign sys.stderr as default above, so it is + # really evaluated NOW, not at function definition time. + if ofile is None: + ofile = sys.stderr + if default not in (True, False): + raise ValueError("invalid default value, must be True or False") + if msg: + print(msg, file=ofile, end='', flush=True) + while True: + answer = None + if env_var_override: + answer = os.environ.get(env_var_override) + if answer is not None and env_msg: + print(env_msg.format(answer, env_var_override), file=ofile) + if answer is None: + try: + answer = input() + except EOFError: + # avoid defaultish[0], defaultish could be empty + answer = truish[0] if default else falsish[0] + if answer in defaultish: + if default_msg: + print(default_msg, file=ofile) + return default + if answer in truish: + if true_msg: + print(true_msg, file=ofile) + return True + if answer in falsish: + if false_msg: + print(false_msg, file=ofile) + return False + # if we get here, the answer was invalid + if invalid_msg: + print(invalid_msg, file=ofile) + if not retry: + return default + if retry_msg: + print(retry_msg, file=ofile, end='', flush=True) + # in case we used an environment variable and it gave an invalid answer, do not use it again: + env_var_override = None + + +class ProgressIndicatorPercent: + def __init__(self, total, step=5, start=0, same_line=False, msg="%3.0f%%", file=sys.stderr): + """ + Percentage-based progress indicator + + :param total: total amount of items + :param step: step size in percent + :param start: at which percent value to start + :param same_line: if True, emit output always on same line + :param msg: output message, must contain one %f placeholder for the percentage + :param file: output file, default: sys.stderr + """ + self.counter = 0 # 0 .. (total-1) + self.total = total + self.trigger_at = start # output next percentage value when reaching (at least) this + self.step = step + self.file = file + self.msg = msg + self.same_line = same_line + + def progress(self, current=None): + if current is not None: + self.counter = current + pct = self.counter * 100 / self.total + self.counter += 1 + if pct >= self.trigger_at: + self.trigger_at += self.step + return pct + + def show(self, current=None): + pct = self.progress(current) + if pct is not None: + return self.output(pct) + + def output(self, percent): + print(self.msg % percent, file=self.file, end='\r' if self.same_line else '\n', flush=True) + + def finish(self): + if self.same_line: + print(" " * len(self.msg % 100.0), file=self.file, end='\r') + + +class ProgressIndicatorEndless: + def __init__(self, step=10, file=sys.stderr): + """ + Progress indicator (long row of dots) + + :param step: every Nth call, call the func + :param file: output file, default: sys.stderr + """ + self.counter = 0 # call counter + self.triggered = 0 # increases 1 per trigger event + self.step = step # trigger every calls + self.file = file + + def progress(self): + self.counter += 1 + trigger = self.counter % self.step == 0 + if trigger: + self.triggered += 1 + return trigger + + def show(self): + trigger = self.progress() + if trigger: + return self.output(self.triggered) + + def output(self, triggered): + print('.', end='', file=self.file, flush=True) + + def finish(self): + print(file=self.file) + + +def sysinfo(): + info = [] + info.append('Platform: %s' % (' '.join(platform.uname()), )) + if sys.platform.startswith('linux'): + info.append('Linux: %s %s %s LibC: %s %s' % (platform.linux_distribution() + platform.libc_ver())) + info.append('Borg: %s Python: %s %s' % (borg_version, platform.python_implementation(), platform.python_version())) + info.append('') + return '\n'.join(info) + + +def log_multi(*msgs, level=logging.INFO): + """ + log multiple lines of text, each line by a separate logging call for cosmetic reasons + + each positional argument may be a single or multiple lines (separated by \n) of text. + """ + lines = [] + for msg in msgs: + lines.extend(msg.splitlines()) + for line in lines: + logger.log(level, line) diff --git a/borg/key.py b/borg/key.py index a9ceef41..6e56de19 100644 --- a/borg/key.py +++ b/borg/key.py @@ -2,42 +2,32 @@ from binascii import hexlify, a2b_base64, b2a_base64 import configparser import getpass import os +import sys import textwrap -import hmac -from hashlib import sha256 +from hmac import HMAC, compare_digest +from hashlib import sha256, pbkdf2_hmac -from .helpers import IntegrityError, get_keys_dir, Error, have_cython +from .helpers import IntegrityError, get_keys_dir, Error from .logger import create_logger logger = create_logger() -if have_cython(): - from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks - from .compress import Compressor, COMPR_BUFFER - import msgpack +from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks +from .compress import Compressor, COMPR_BUFFER +import msgpack PREFIX = b'\0' * 8 class UnsupportedPayloadError(Error): - """Unsupported payload type {}. A newer version is required to access this repository. - """ + """Unsupported payload type {}. A newer version is required to access this repository.""" class KeyfileNotFoundError(Error): - """No key file for repository {} found in {}. - """ + """No key file for repository {} found in {}.""" class RepoKeyNotFoundError(Error): - """No key entry found in the config of repository {}. - """ - - -class HMAC(hmac.HMAC): - """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews - """ - def update(self, msg): - self.inner.update(msg) + """No key entry found in the config of repository {}.""" def key_creator(repository, args): @@ -45,8 +35,6 @@ def key_creator(repository, args): return KeyfileKey.create(repository, args) elif args.encryption == 'repokey': return RepoKey.create(repository, args) - elif args.encryption == 'passphrase': # deprecated, kill in 1.x - return PassphraseKey.create(repository, args) else: return PlaintextKey.create(repository, args) @@ -57,8 +45,10 @@ def key_factory(repository, manifest_data): return KeyfileKey.detect(repository, manifest_data) elif key_type == RepoKey.TYPE: return RepoKey.detect(repository, manifest_data) - elif key_type == PassphraseKey.TYPE: # deprecated, kill in 1.x - return PassphraseKey.detect(repository, manifest_data) + elif key_type == PassphraseKey.TYPE: + # we just dispatch to repokey mode and assume the passphrase was migrated to a repokey. + # see also comment in PassphraseKey class. + return RepoKey.detect(repository, manifest_data) elif key_type == PlaintextKey.TYPE: return PlaintextKey.detect(repository, manifest_data) else: @@ -92,7 +82,7 @@ class PlaintextKey(KeyBase): @classmethod def create(cls, repository, args): - logger.info('Encryption NOT enabled.\nUse the "--encryption=repokey|keyfile|passphrase" to enable encryption.') + logger.info('Encryption NOT enabled.\nUse the "--encryption=repokey|keyfile" to enable encryption.') return cls(repository) @classmethod @@ -142,19 +132,25 @@ class AESKeyBase(KeyBase): return b''.join((self.TYPE_STR, hmac, data)) def decrypt(self, id, data): - if data[0] != self.TYPE: + if not (data[0] == self.TYPE or + data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)): raise IntegrityError('Invalid encryption envelope') - hmac = memoryview(data)[1:33] - if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac: + hmac_given = memoryview(data)[1:33] + hmac_computed = memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) + if not compare_digest(hmac_computed, hmac_given): raise IntegrityError('Encryption envelope checksum mismatch') self.dec_cipher.reset(iv=PREFIX + data[33:41]) data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:])) - if id and HMAC(self.id_key, data, sha256).digest() != id: - raise IntegrityError('Chunk id verification failed') + if id: + hmac_given = id + hmac_computed = HMAC(self.id_key, data, sha256).digest() + if not compare_digest(hmac_computed, hmac_given): + raise IntegrityError('Chunk id verification failed') return data def extract_nonce(self, payload): - if payload[0] != self.TYPE: + if not (payload[0] == self.TYPE or + payload[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)): raise IntegrityError('Invalid encryption envelope') nonce = bytes_to_long(payload[33:41]) return nonce @@ -205,22 +201,25 @@ class Passphrase(str): return '' def kdf(self, salt, iterations, length): - return pbkdf2_sha256(self.encode('utf-8'), salt, iterations, length) + return pbkdf2_hmac('sha256', self.encode('utf-8'), salt, iterations, length) class PassphraseKey(AESKeyBase): - # This mode is DEPRECATED and will be killed at 1.0 release. - # With this mode: + # This mode was killed in borg 1.0, see: https://github.com/borgbackup/borg/issues/97 + # Reasons: # - you can never ever change your passphrase for existing repos. # - you can never ever use a different iterations count for existing repos. + # "Killed" means: + # - there is no automatic dispatch to this class via type byte + # - --encryption=passphrase is an invalid argument now + # This class is kept for a while to support migration from passphrase to repokey mode. TYPE = 0x01 iterations = 100000 # must not be changed ever! @classmethod def create(cls, repository, args): key = cls(repository) - logger.warning('WARNING: "passphrase" mode is deprecated and will be removed in 1.0.') - logger.warning('If you want something similar (but with less issues), use "repokey" mode.') + logger.warning('WARNING: "passphrase" mode is unsupported since borg 1.0.') passphrase = Passphrase.new(allow_empty=False) key.init(repository, passphrase) return key @@ -273,7 +272,7 @@ class KeyfileKeyBase(AESKeyBase): raise NotImplementedError def _load(self, key_data, passphrase): - cdata = a2b_base64(key_data.encode('ascii')) # .encode needed for Python 3.[0-2] + cdata = a2b_base64(key_data) data = self.decrypt_key_file(cdata, passphrase) if data: key = msgpack.unpackb(data) @@ -297,7 +296,7 @@ class KeyfileKeyBase(AESKeyBase): return data def encrypt_key_file(self, data, passphrase): - salt = get_random_bytes(32) + salt = os.urandom(32) iterations = 100000 key = passphrase.kdf(salt, iterations, 32) hash = HMAC(key, data, sha256).digest() @@ -335,7 +334,7 @@ class KeyfileKeyBase(AESKeyBase): passphrase = Passphrase.new(allow_empty=True) key = cls(repository) key.repository_id = repository.id - key.init_from_random_data(get_random_bytes(100)) + key.init_from_random_data(os.urandom(100)) key.init_ciphers() target = key.get_new_target(args) key.save(target, passphrase) @@ -361,12 +360,12 @@ class KeyfileKey(KeyfileKeyBase): filename = os.path.join(keys_dir, name) with open(filename, 'r') as fd: line = fd.readline().strip() - if line.startswith(self.FILE_ID) and line[len(self.FILE_ID)+1:] == id: + if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == id: return filename raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir()) def get_new_target(self, args): - filename = args.repository.to_key_filename() + filename = args.location.to_key_filename() path = filename i = 1 while os.path.exists(path): @@ -400,7 +399,7 @@ class RepoKey(KeyfileKeyBase): self.repository.load_key() return loc except configparser.NoOptionError: - raise RepoKeyNotFoundError(loc) + raise RepoKeyNotFoundError(loc) from None def get_new_target(self, args): return self.repository diff --git a/borg/locking.py b/borg/locking.py index aff3c5fc..c0388578 100644 --- a/borg/locking.py +++ b/borg/locking.py @@ -1,10 +1,9 @@ -import errno import json import os import socket import time -from borg.helpers import Error +from borg.helpers import Error, ErrorWithTraceback ADD, REMOVE = 'add', 'remove' SHARED, EXCLUSIVE = 'shared', 'exclusive' @@ -74,32 +73,45 @@ class TimeoutTimer: return False +class LockError(Error): + """Failed to acquire the lock {}.""" + + +class LockErrorT(ErrorWithTraceback): + """Failed to acquire the lock {}.""" + + +class LockTimeout(LockError): + """Failed to create/acquire the lock {} (timeout).""" + + +class LockFailed(LockErrorT): + """Failed to create/acquire the lock {} ({}).""" + + +class NotLocked(LockErrorT): + """Failed to release the lock {} (was not locked).""" + + +class NotMyLock(LockErrorT): + """Failed to release the lock {} (was/is locked, but not by me).""" + + class ExclusiveLock: - """An exclusive Lock based on mkdir fs operation being atomic""" - class LockError(Error): - """Failed to acquire the lock {}.""" - - class LockTimeout(LockError): - """Failed to create/acquire the lock {} (timeout).""" - - class LockFailed(LockError): - """Failed to create/acquire the lock {} ({}).""" - - class UnlockError(Error): - """Failed to release the lock {}.""" - - class NotLocked(UnlockError): - """Failed to release the lock {} (was not locked).""" - - class NotMyLock(UnlockError): - """Failed to release the lock {} (was/is locked, but not by me).""" + """An exclusive Lock based on mkdir fs operation being atomic. + If possible, try to use the contextmanager here like: + with ExclusiveLock(...) as lock: + ... + This makes sure the lock is released again if the block is left, no + matter how (e.g. if an exception occurred). + """ def __init__(self, path, timeout=None, sleep=None, id=None): self.timeout = timeout self.sleep = sleep self.path = os.path.abspath(path) self.id = id or get_id() - self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id) + self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id) def __enter__(self): return self.acquire() @@ -119,14 +131,13 @@ class ExclusiveLock: while True: try: os.mkdir(self.path) + except FileExistsError: # already locked + if self.by_me(): + return self + if timer.timed_out_or_sleep(): + raise LockTimeout(self.path) except OSError as err: - if err.errno == errno.EEXIST: # already locked - if self.by_me(): - return self - if timer.timed_out_or_sleep(): - raise self.LockTimeout(self.path) - else: - raise self.LockFailed(self.path, str(err)) + raise LockFailed(self.path, str(err)) from None else: with open(self.unique_name, "wb"): pass @@ -134,9 +145,9 @@ class ExclusiveLock: def release(self): if not self.is_locked(): - raise self.NotLocked(self.path) + raise NotLocked(self.path) if not self.by_me(): - raise self.NotMyLock(self.path) + raise NotMyLock(self.path) os.unlink(self.unique_name) os.rmdir(self.path) @@ -168,12 +179,8 @@ class LockRoster: try: with open(self.path) as f: data = json.load(f) - except IOError as err: - if err.errno != errno.ENOENT: - raise - data = {} - except ValueError: - # corrupt/empty roster file? + except (FileNotFoundError, ValueError): + # no or corrupt/empty roster file? data = {} return data @@ -184,9 +191,8 @@ class LockRoster: def remove(self): try: os.unlink(self.path) - except OSError as e: - if e.errno != errno.ENOENT: - raise + except FileNotFoundError: + pass def get(self, key): roster = self.load() @@ -214,24 +220,25 @@ class UpgradableLock: Typically, write access to a resource needs an exclusive lock (1 writer, noone is allowed reading) and read access to a resource needs a shared lock (multiple readers are allowed). + + If possible, try to use the contextmanager here like: + with UpgradableLock(...) as lock: + ... + This makes sure the lock is released again if the block is left, no + matter how (e.g. if an exception occurred). """ - class SharedLockFailed(Error): - """Failed to acquire shared lock [{}]""" - - class ExclusiveLockFailed(Error): - """Failed to acquire write lock [{}]""" - - def __init__(self, path, exclusive=False, sleep=None, id=None): + def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None): self.path = path self.is_exclusive = exclusive self.sleep = sleep + self.timeout = timeout self.id = id or get_id() # globally keeping track of shared and exclusive lockers: self._roster = LockRoster(path + '.roster', id=id) # an exclusive lock, used for: # - holding while doing roster queries / updates # - holding while the UpgradableLock itself is exclusive - self._lock = ExclusiveLock(path + '.exclusive', id=id) + self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout) def __enter__(self): return self.acquire() @@ -246,34 +253,37 @@ class UpgradableLock: if exclusive is None: exclusive = self.is_exclusive sleep = sleep or self.sleep or 0.2 - try: - if exclusive: - self._wait_for_readers_finishing(remove, sleep) - self._roster.modify(EXCLUSIVE, ADD) - else: - with self._lock: - if remove is not None: - self._roster.modify(remove, REMOVE) - self._roster.modify(SHARED, ADD) - self.is_exclusive = exclusive - return self - except ExclusiveLock.LockError as err: - msg = str(err) - if exclusive: - raise self.ExclusiveLockFailed(msg) - else: - raise self.SharedLockFailed(msg) + if exclusive: + self._wait_for_readers_finishing(remove, sleep) + self._roster.modify(EXCLUSIVE, ADD) + else: + with self._lock: + if remove is not None: + self._roster.modify(remove, REMOVE) + self._roster.modify(SHARED, ADD) + self.is_exclusive = exclusive + return self def _wait_for_readers_finishing(self, remove, sleep): + timer = TimeoutTimer(self.timeout, sleep).start() while True: self._lock.acquire() - if remove is not None: - self._roster.modify(remove, REMOVE) - remove = None - if len(self._roster.get(SHARED)) == 0: - return # we are the only one and we keep the lock! - self._lock.release() - time.sleep(sleep) + try: + if remove is not None: + self._roster.modify(remove, REMOVE) + if len(self._roster.get(SHARED)) == 0: + return # we are the only one and we keep the lock! + # restore the roster state as before (undo the roster change): + if remove is not None: + self._roster.modify(remove, ADD) + except: + # avoid orphan lock when an exception happens here, e.g. Ctrl-C! + self._lock.release() + raise + else: + self._lock.release() + if timer.timed_out_or_sleep(): + raise LockTimeout(self.path) def release(self): if self.is_exclusive: diff --git a/borg/logger.py b/borg/logger.py index 69f2a3c2..f2350f8d 100644 --- a/borg/logger.py +++ b/borg/logger.py @@ -32,25 +32,75 @@ The way to use this is as follows: import inspect import logging -import sys +import logging.config +import logging.handlers # needed for handlers defined there being configurable in logging.conf file +import os +import warnings + +configured = False + +# use something like this to ignore warnings: +# warnings.filterwarnings('ignore', r'... regex for warning message to ignore ...') -def setup_logging(stream=None): +def _log_warning(message, category, filename, lineno, file=None, line=None): + # for warnings, we just want to use the logging system, not stderr or other files + msg = "{0}:{1}: {2}: {3}".format(filename, lineno, category.__name__, message) + logger = create_logger(__name__) + # Note: the warning will look like coming from here, + # but msg contains info about where it really comes from + logger.warning(msg) + + +def setup_logging(stream=None, conf_fname=None, env_var='BORG_LOGGING_CONF', level='info', is_serve=False): """setup logging module according to the arguments provided - this sets up a stream handler logger on stderr (by default, if no + if conf_fname is given (or the config file name can be determined via + the env_var, if given): load this logging configuration. + + otherwise, set up a stream handler logger on stderr (by default, if no stream is provided). + + if is_serve == True, we configure a special log format as expected by + the borg client log message interceptor. """ - logging.raiseExceptions = False - l = logging.getLogger('') - sh = logging.StreamHandler(stream) - # other formatters will probably want this, but let's remove - # clutter on stderr - # example: - # sh.setFormatter(logging.Formatter('%(name)s: %(message)s')) - l.addHandler(sh) - l.setLevel(logging.INFO) - return sh + global configured + err_msg = None + if env_var: + conf_fname = os.environ.get(env_var, conf_fname) + if conf_fname: + try: + conf_fname = os.path.abspath(conf_fname) + # we open the conf file here to be able to give a reasonable + # error message in case of failure (if we give the filename to + # fileConfig(), it silently ignores unreadable files and gives + # unhelpful error msgs like "No section: 'formatters'"): + with open(conf_fname) as f: + logging.config.fileConfig(f) + configured = True + logger = logging.getLogger(__name__) + logger.debug('using logging configuration read from "{0}"'.format(conf_fname)) + warnings.showwarning = _log_warning + return None + except Exception as err: # XXX be more precise + err_msg = str(err) + # if we did not / not successfully load a logging configuration, fallback to this: + logger = logging.getLogger('') + handler = logging.StreamHandler(stream) + if is_serve: + fmt = '$LOG %(levelname)s Remote: %(message)s' + else: + fmt = '%(message)s' + handler.setFormatter(logging.Formatter(fmt)) + logger.addHandler(handler) + logger.setLevel(level.upper()) + configured = True + logger = logging.getLogger(__name__) + if err_msg: + logger.warning('setup_logging for "{0}" failed with "{1}".'.format(conf_fname, err_msg)) + logger.debug('using builtin fallback logging configuration') + warnings.showwarning = _log_warning + return handler def find_parent_module(): @@ -73,7 +123,7 @@ def find_parent_module(): def create_logger(name=None): - """create a Logger object with the proper path, which is returned by + """lazily create a Logger object with the proper path, which is returned by find_parent_module() by default, or is provided via the commandline this is really a shortcut for: @@ -81,5 +131,48 @@ def create_logger(name=None): logger = logging.getLogger(__name__) we use it to avoid errors and provide a more standard API. + + We must create the logger lazily, because this is usually called from + module level (and thus executed at import time - BEFORE setup_logging() + was called). By doing it lazily we can do the setup first, we just have to + be careful not to call any logger methods before the setup_logging() call. + If you try, you'll get an exception. """ - return logging.getLogger(name or find_parent_module()) + class LazyLogger: + def __init__(self, name=None): + self.__name = name or find_parent_module() + self.__real_logger = None + + @property + def __logger(self): + if self.__real_logger is None: + if not configured: + raise Exception("tried to call a logger before setup_logging() was called") + self.__real_logger = logging.getLogger(self.__name) + return self.__real_logger + + def setLevel(self, *args, **kw): + return self.__logger.setLevel(*args, **kw) + + def log(self, *args, **kw): + return self.__logger.log(*args, **kw) + + def exception(self, *args, **kw): + return self.__logger.exception(*args, **kw) + + def debug(self, *args, **kw): + return self.__logger.debug(*args, **kw) + + def info(self, *args, **kw): + return self.__logger.info(*args, **kw) + + def warning(self, *args, **kw): + return self.__logger.warning(*args, **kw) + + def error(self, *args, **kw): + return self.__logger.error(*args, **kw) + + def critical(self, *args, **kw): + return self.__logger.critical(*args, **kw) + + return LazyLogger(name) diff --git a/borg/platform.py b/borg/platform.py index caa3b4ed..1bc8ee5e 100644 --- a/borg/platform.py +++ b/borg/platform.py @@ -1,12 +1,12 @@ import sys -if sys.platform.startswith('linux'): +if sys.platform.startswith('linux'): # pragma: linux only from .platform_linux import acl_get, acl_set, API_VERSION -elif sys.platform.startswith('freebsd'): +elif sys.platform.startswith('freebsd'): # pragma: freebsd only from .platform_freebsd import acl_get, acl_set, API_VERSION -elif sys.platform == 'darwin': +elif sys.platform == 'darwin': # pragma: darwin only from .platform_darwin import acl_get, acl_set, API_VERSION -else: +else: # pragma: unknown platform only API_VERSION = 2 def acl_get(path, item, st, numeric_owner=False): diff --git a/borg/remote.py b/borg/remote.py index 5d8c71a8..b91a4f95 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -1,20 +1,21 @@ import errno import fcntl +import logging import os import select import shlex from subprocess import Popen, PIPE import sys import tempfile -import traceback from . import __version__ -from .helpers import Error, IntegrityError, have_cython +from .helpers import Error, IntegrityError, sysinfo from .repository import Repository -if have_cython(): - import msgpack +import msgpack + +RPC_PROTOCOL_VERSION = 2 BUFSIZE = 10 * 1024 * 1024 @@ -23,12 +24,16 @@ class ConnectionClosed(Error): """Connection closed by remote host""" +class ConnectionClosedWithHint(ConnectionClosed): + """Connection closed by remote host. {}""" + + class PathNotAllowed(Error): """Repository path not allowed""" class InvalidRPCMethod(Error): - """RPC method is not valid""" + """RPC method {} is not valid""" class RepositoryServer: # pragma: no cover @@ -43,10 +48,10 @@ class RepositoryServer: # pragma: no cover 'negotiate', 'open', 'put', - 'repair', 'rollback', 'save_key', 'load_key', + 'break_lock', ) def __init__(self, restrict_to_paths): @@ -56,12 +61,16 @@ class RepositoryServer: # pragma: no cover def serve(self): stdin_fd = sys.stdin.fileno() stdout_fd = sys.stdout.fileno() + stderr_fd = sys.stdout.fileno() # Make stdin non-blocking fl = fcntl.fcntl(stdin_fd, fcntl.F_GETFL) fcntl.fcntl(stdin_fd, fcntl.F_SETFL, fl | os.O_NONBLOCK) # Make stdout blocking fl = fcntl.fcntl(stdout_fd, fcntl.F_GETFL) fcntl.fcntl(stdout_fd, fcntl.F_SETFL, fl & ~os.O_NONBLOCK) + # Make stderr blocking + fl = fcntl.fcntl(stderr_fd, fcntl.F_GETFL) + fcntl.fcntl(stderr_fd, fcntl.F_SETFL, fl & ~os.O_NONBLOCK) unpacker = msgpack.Unpacker(use_list=False) while True: r, w, es = select.select([stdin_fd], [], [], 10) @@ -84,7 +93,9 @@ class RepositoryServer: # pragma: no cover f = getattr(self.repository, method) res = f(*args) except BaseException as e: - exc = "Remote Traceback by Borg %s%s%s" % (__version__, os.linesep, traceback.format_exc()) + logging.exception('Borg %s: exception in RPC call:', __version__) + logging.error(sysinfo()) + exc = "Remote Exception (see remote log for the traceback)" os.write(stdout_fd, msgpack.packb((1, msgid, e.__class__.__name__, exc))) else: os.write(stdout_fd, msgpack.packb((1, msgid, None, res))) @@ -92,9 +103,9 @@ class RepositoryServer: # pragma: no cover return def negotiate(self, versions): - return 1 + return RPC_PROTOCOL_VERSION - def open(self, path, create=False): + def open(self, path, create=False, lock_wait=None, lock=True): path = os.fsdecode(path) if path.startswith('/~'): path = path[1:] @@ -105,22 +116,19 @@ class RepositoryServer: # pragma: no cover break else: raise PathNotAllowed(path) - self.repository = Repository(path, create) + self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock) return self.repository.id class RemoteRepository: extra_test_args = [] - remote_path = 'borg' - # default umask, overriden by --umask, defaults to read/write only for owner - umask = 0o077 class RPCError(Exception): def __init__(self, name): self.name = name - def __init__(self, location, create=False): - self.location = location + def __init__(self, location, create=False, lock_wait=None, lock=True, args=None): + self.location = self._location = location self.preload_ids = [] self.msgid = 0 self.to_send = b'' @@ -129,29 +137,31 @@ class RemoteRepository: self.responses = {} self.unpacker = msgpack.Unpacker(use_list=False) self.p = None - # XXX: ideally, the testsuite would subclass Repository and - # override ssh_cmd() instead of this crude hack, although - # __testsuite__ is not a valid domain name so this is pretty - # safe. - if location.host == '__testsuite__': - args = [sys.executable, '-m', 'borg.archiver', 'serve' ] + self.extra_test_args - else: # pragma: no cover - args = self.ssh_cmd(location) - self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) + testing = location.host == '__testsuite__' + borg_cmd = self.borg_cmd(args, testing) + env = dict(os.environ) + if not testing: + borg_cmd = self.ssh_cmd(location) + borg_cmd + # pyinstaller binary adds LD_LIBRARY_PATH=/tmp/_ME... but we do not want + # that the system's ssh binary picks up (non-matching) libraries from there + env.pop('LD_LIBRARY_PATH', None) + self.p = Popen(borg_cmd, bufsize=0, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() + self.stderr_fd = self.p.stderr.fileno() fcntl.fcntl(self.stdin_fd, fcntl.F_SETFL, fcntl.fcntl(self.stdin_fd, fcntl.F_GETFL) | os.O_NONBLOCK) fcntl.fcntl(self.stdout_fd, fcntl.F_SETFL, fcntl.fcntl(self.stdout_fd, fcntl.F_GETFL) | os.O_NONBLOCK) - self.r_fds = [self.stdout_fd] - self.x_fds = [self.stdin_fd, self.stdout_fd] + fcntl.fcntl(self.stderr_fd, fcntl.F_SETFL, fcntl.fcntl(self.stderr_fd, fcntl.F_GETFL) | os.O_NONBLOCK) + self.r_fds = [self.stdout_fd, self.stderr_fd] + self.x_fds = [self.stdin_fd, self.stdout_fd, self.stderr_fd] try: - version = self.call('negotiate', 1) + version = self.call('negotiate', RPC_PROTOCOL_VERSION) except ConnectionClosed: - raise Exception('Server immediately closed connection - is Borg installed and working on the server?') - if version != 1: + raise ConnectionClosedWithHint('Is borg working on the server?') from None + if version != RPC_PROTOCOL_VERSION: raise Exception('Server insisted on using unsupported protocol version %d' % version) - self.id = self.call('open', location.path, create) + self.id = self.call('open', location.path, create, lock_wait, lock) def __del__(self): self.close() @@ -159,10 +169,28 @@ class RemoteRepository: def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.location.canonical_path()) - def umask_flag(self): - return ['--umask', '%03o' % self.umask] + def borg_cmd(self, args, testing): + """return a borg serve command line""" + # give some args/options to "borg serve" process as they were given to us + opts = [] + if args is not None: + opts.append('--umask=%03o' % args.umask) + root_logger = logging.getLogger() + if root_logger.isEnabledFor(logging.DEBUG): + opts.append('--debug') + elif root_logger.isEnabledFor(logging.INFO): + opts.append('--info') + elif root_logger.isEnabledFor(logging.WARNING): + pass # warning is default + else: + raise ValueError('log level missing, fix this code') + if testing: + return [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args + else: # pragma: no cover + return [args.remote_path, 'serve'] + opts def ssh_cmd(self, location): + """return a ssh command line that can be prefixed to a borg command line""" args = shlex.split(os.environ.get('BORG_RSH', 'ssh')) if location.port: args += ['-p', str(location.port)] @@ -170,8 +198,6 @@ class RemoteRepository: args.append('%s@%s' % (location.user, location.host)) else: args.append('%s' % location.host) - # use local umask also for the remote process - args += [self.remote_path, 'serve'] + self.umask_flag() return args def call(self, cmd, *args, **kw): @@ -222,19 +248,32 @@ class RemoteRepository: r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1) if x: raise Exception('FD exception occurred') - if r: - data = os.read(self.stdout_fd, BUFSIZE) - if not data: - raise ConnectionClosed() - self.unpacker.feed(data) - for unpacked in self.unpacker: - if not (isinstance(unpacked, tuple) and len(unpacked) == 4): - raise Exception("Unexpected RPC data format.") - type, msgid, error, res = unpacked - if msgid in self.ignore_responses: - self.ignore_responses.remove(msgid) - else: - self.responses[msgid] = error, res + for fd in r: + if fd is self.stdout_fd: + data = os.read(fd, BUFSIZE) + if not data: + raise ConnectionClosed() + self.unpacker.feed(data) + for unpacked in self.unpacker: + if not (isinstance(unpacked, tuple) and len(unpacked) == 4): + raise Exception("Unexpected RPC data format.") + type, msgid, error, res = unpacked + if msgid in self.ignore_responses: + self.ignore_responses.remove(msgid) + else: + self.responses[msgid] = error, res + elif fd is self.stderr_fd: + data = os.read(fd, 32768) + if not data: + raise ConnectionClosed() + data = data.decode('utf-8') + for line in data.splitlines(keepends=True): + if line.startswith('$LOG '): + _, level, msg = line.split(' ', 2) + level = getattr(logging, level, logging.CRITICAL) # str -> int + logging.log(level, msg.rstrip()) + else: + sys.stderr.write("Remote: " + line) if w: while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < 100: if calls: @@ -267,11 +306,11 @@ class RemoteRepository: w_fds = [] self.ignore_responses |= set(waiting_for) - def check(self, repair=False): - return self.call('check', repair) + def check(self, repair=False, save_space=False): + return self.call('check', repair, save_space) - def commit(self, *args): - return self.call('commit') + def commit(self, save_space=False): + return self.call('commit', save_space) def rollback(self, *args): return self.call('rollback') @@ -305,6 +344,9 @@ class RemoteRepository: def load_key(self): return self.call('load_key') + def break_lock(self): + return self.call('break_lock') + def close(self): if self.p: self.p.stdin.close() @@ -316,21 +358,45 @@ class RemoteRepository: self.preload_ids += ids -class RepositoryCache: +class RepositoryNoCache: + """A not caching Repository wrapper, passes through to repository. + + Just to have same API (including the context manager) as RepositoryCache. + """ + def __init__(self, repository): + self.repository = repository + + def close(self): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def get(self, key): + return next(self.get_many([key])) + + def get_many(self, keys): + for data in self.repository.get_many(keys): + yield data + + +class RepositoryCache(RepositoryNoCache): """A caching Repository wrapper Caches Repository GET operations using a local temporary Repository. """ def __init__(self, repository): - self.repository = repository + super().__init__(repository) tmppath = tempfile.mkdtemp(prefix='borg-tmp') self.caching_repo = Repository(tmppath, create=True, exclusive=True) - def __del__(self): - self.caching_repo.destroy() - - def get(self, key): - return next(self.get_many([key])) + def close(self): + if self.caching_repo is not None: + self.caching_repo.destroy() + self.caching_repo = None def get_many(self, keys): unknown_keys = [key for key in keys if key not in self.caching_repo] @@ -352,4 +418,5 @@ class RepositoryCache: def cache_if_remote(repository): if isinstance(repository, RemoteRepository): return RepositoryCache(repository) - return repository + else: + return RepositoryNoCache(repository) diff --git a/borg/repository.py b/borg/repository.py index 69ced28d..334065bc 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -1,5 +1,5 @@ -from configparser import RawConfigParser -from binascii import hexlify +from configparser import ConfigParser +from binascii import hexlify, unhexlify from itertools import islice import errno import logging @@ -8,13 +8,12 @@ logger = logging.getLogger(__name__) import os import shutil import struct -import sys from zlib import crc32 -from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify, have_cython -if have_cython(): - from .hashindex import NSIndex -from .locking import UpgradableLock +import msgpack +from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent +from .hashindex import NSIndex +from .locking import UpgradableLock, LockError, LockErrorT from .lrucache import LRUCache MAX_OBJECT_SIZE = 20 * 1024 * 1024 @@ -45,23 +44,24 @@ class Repository: """Repository {} already exists.""" class InvalidRepository(Error): - """{} is not a valid repository.""" + """{} is not a valid repository. Check repo config.""" - class CheckNeeded(Error): + class CheckNeeded(ErrorWithTraceback): """Inconsistency detected. Please run "borg check {}".""" - class ObjectNotFound(Error): + class ObjectNotFound(ErrorWithTraceback): """Object with key {} not found in repository {}.""" - def __init__(self, path, create=False, exclusive=False): + def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True): self.path = os.path.abspath(path) + self._location = Location('file://%s' % self.path) self.io = None self.lock = None self.index = None self._active_txn = False if create: self.create(self.path) - self.open(self.path, exclusive) + self.open(self.path, exclusive, lock_wait=lock_wait, lock=lock) def __del__(self): self.close() @@ -79,11 +79,11 @@ class Repository: with open(os.path.join(path, 'README'), 'w') as fd: fd.write('This is a Borg repository\n') os.mkdir(os.path.join(path, 'data')) - config = RawConfigParser() + config = ConfigParser(interpolation=None) config.add_section('repository') config.set('repository', 'version', '1') - config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR) - config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE) + config.set('repository', 'segments_per_dir', str(self.DEFAULT_SEGMENTS_PER_DIR)) + config.set('repository', 'max_segment_size', str(self.DEFAULT_MAX_SEGMENT_SIZE)) config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii')) self.save_config(path, config) @@ -131,12 +131,18 @@ class Repository: self.replay_segments(replay_from, segments_transaction_id) return self.get_index_transaction_id() - def open(self, path, exclusive): + def break_lock(self): + UpgradableLock(os.path.join(self.path, 'lock')).break_lock() + + def open(self, path, exclusive, lock_wait=None, lock=True): self.path = path if not os.path.isdir(path): raise self.DoesNotExist(path) - self.lock = UpgradableLock(os.path.join(path, 'lock'), exclusive).acquire() - self.config = RawConfigParser() + if lock: + self.lock = UpgradableLock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait).acquire() + else: + self.lock = None + self.config = ConfigParser(interpolation=None) self.config.read(os.path.join(self.path, 'config')) if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1: raise self.InvalidRepository(path) @@ -153,11 +159,11 @@ class Repository: self.lock.release() self.lock = None - def commit(self): + def commit(self, save_space=False): """Commit transaction """ self.io.write_commit() - self.compact_segments() + self.compact_segments(save_space=save_space) self.write_index() self.rollback() @@ -170,21 +176,22 @@ class Repository: self._active_txn = True try: self.lock.upgrade() - except UpgradableLock.ExclusiveLockFailed: + except (LockError, LockErrorT): # if upgrading the lock to exclusive fails, we do not have an # active transaction. this is important for "serve" mode, where # the repository instance lives on - even if exceptions happened. self._active_txn = False raise - if not self.index: + if not self.index or transaction_id is None: self.index = self.open_index(transaction_id) if transaction_id is None: - self.segments = {} - self.compact = set() + self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x] + self.compact = set() # XXX bad name: segments_needing_compaction = self.compact else: if do_cleanup: self.io.cleanup(transaction_id) - hints = read_msgpack(os.path.join(self.path, 'hints.%d' % transaction_id)) + with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: + hints = msgpack.unpack(fd) if hints[b'version'] != 1: raise ValueError('Unknown hints file version: %d' % hints['version']) self.segments = hints[b'segments'] @@ -195,7 +202,12 @@ class Repository: b'segments': self.segments, b'compact': list(self.compact)} transaction_id = self.io.get_segments_transaction_id() - write_msgpack(os.path.join(self.path, 'hints.%d' % transaction_id), hints) + hints_file = os.path.join(self.path, 'hints.%d' % transaction_id) + with open(hints_file + '.tmp', 'wb') as fd: + msgpack.pack(hints, fd) + fd.flush() + os.fsync(fd.fileno()) + os.rename(hints_file + '.tmp', hints_file) self.index.write(os.path.join(self.path, 'index.tmp')) os.rename(os.path.join(self.path, 'index.tmp'), os.path.join(self.path, 'index.%d' % transaction_id)) @@ -209,69 +221,103 @@ class Repository: os.unlink(os.path.join(self.path, name)) self.index = None - def compact_segments(self): + def compact_segments(self, save_space=False): """Compact sparse segments by copying data into new segments """ if not self.compact: return index_transaction_id = self.get_index_transaction_id() segments = self.segments + unused = [] # list of segments, that are not used anymore + + def complete_xfer(): + # complete the transfer (usually exactly when some target segment + # is full, or at the very end when everything is processed) + nonlocal unused + # commit the new, compact, used segments + self.io.write_commit() + # get rid of the old, sparse, unused segments. free space. + for segment in unused: + assert self.segments.pop(segment) == 0 + self.io.delete_segment(segment) + unused = [] + for segment in sorted(self.compact): if self.io.segment_exists(segment): for tag, key, offset, data in self.io.iter_objects(segment, include_data=True): if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset): - new_segment, offset = self.io.write_put(key, data) + try: + new_segment, offset = self.io.write_put(key, data, raise_full=save_space) + except LoggedIO.SegmentFull: + complete_xfer() + new_segment, offset = self.io.write_put(key, data) self.index[key] = new_segment, offset segments.setdefault(new_segment, 0) segments[new_segment] += 1 segments[segment] -= 1 elif tag == TAG_DELETE: if index_transaction_id is None or segment > index_transaction_id: - self.io.write_delete(key) + try: + self.io.write_delete(key, raise_full=save_space) + except LoggedIO.SegmentFull: + complete_xfer() + self.io.write_delete(key) assert segments[segment] == 0 - - self.io.write_commit() - for segment in sorted(self.compact): - assert self.segments.pop(segment) == 0 - self.io.delete_segment(segment) + unused.append(segment) + complete_xfer() self.compact = set() def replay_segments(self, index_transaction_id, segments_transaction_id): self.prepare_txn(index_transaction_id, do_cleanup=False) - for segment, filename in self.io.segment_iterator(): - if index_transaction_id is not None and segment <= index_transaction_id: - continue - if segment > segments_transaction_id: - break - self.segments[segment] = 0 - for tag, key, offset in self.io.iter_objects(segment): - if tag == TAG_PUT: - try: - s, _ = self.index[key] - self.compact.add(s) - self.segments[s] -= 1 - except KeyError: - pass - self.index[key] = segment, offset - self.segments[segment] += 1 - elif tag == TAG_DELETE: - try: - s, _ = self.index.pop(key) - self.segments[s] -= 1 - self.compact.add(s) - except KeyError: - pass - self.compact.add(segment) - elif tag == TAG_COMMIT: + try: + segment_count = sum(1 for _ in self.io.segment_iterator()) + pi = ProgressIndicatorPercent(total=segment_count, msg="Replaying segments %3.0f%%", same_line=True) + for i, (segment, filename) in enumerate(self.io.segment_iterator()): + pi.show(i) + if index_transaction_id is not None and segment <= index_transaction_id: continue - else: - raise self.CheckNeeded(self.path) - if self.segments[segment] == 0: - self.compact.add(segment) - self.write_index() - self.rollback() + if segment > segments_transaction_id: + break + objects = self.io.iter_objects(segment) + self._update_index(segment, objects) + pi.finish() + self.write_index() + finally: + self.rollback() - def check(self, repair=False): + def _update_index(self, segment, objects, report=None): + """some code shared between replay_segments and check""" + self.segments[segment] = 0 + for tag, key, offset in objects: + if tag == TAG_PUT: + try: + s, _ = self.index[key] + self.compact.add(s) + self.segments[s] -= 1 + except KeyError: + pass + self.index[key] = segment, offset + self.segments[segment] += 1 + elif tag == TAG_DELETE: + try: + s, _ = self.index.pop(key) + self.segments[s] -= 1 + self.compact.add(s) + except KeyError: + pass + self.compact.add(segment) + elif tag == TAG_COMMIT: + continue + else: + msg = 'Unexpected tag {} in segment {}'.format(tag, segment) + if report is None: + raise self.CheckNeeded(msg) + else: + report(msg) + if self.segments[segment] == 0: + self.compact.add(segment) + + def check(self, repair=False, save_space=False): """Check repository consistency This method verifies all segment checksums and makes sure @@ -284,6 +330,7 @@ class Repository: error_found = True logger.error(msg) + logger.info('Starting repository check') assert not self._active_txn try: transaction_id = self.get_transaction_id() @@ -298,8 +345,11 @@ class Repository: if repair: self.io.cleanup(transaction_id) segments_transaction_id = self.io.get_segments_transaction_id() - self.prepare_txn(None) - for segment, filename in self.io.segment_iterator(): + self.prepare_txn(None) # self.index, self.compact, self.segments all empty now! + segment_count = sum(1 for _ in self.io.segment_iterator()) + pi = ProgressIndicatorPercent(total=segment_count, msg="Checking segments %3.1f%%", step=0.1, same_line=True) + for i, (segment, filename) in enumerate(self.io.segment_iterator()): + pi.show(i) if segment > transaction_id: continue try: @@ -310,35 +360,17 @@ class Repository: if repair: self.io.recover_segment(segment, filename) objects = list(self.io.iter_objects(segment)) - self.segments[segment] = 0 - for tag, key, offset in objects: - if tag == TAG_PUT: - try: - s, _ = self.index[key] - self.compact.add(s) - self.segments[s] -= 1 - except KeyError: - pass - self.index[key] = segment, offset - self.segments[segment] += 1 - elif tag == TAG_DELETE: - try: - s, _ = self.index.pop(key) - self.segments[s] -= 1 - self.compact.add(s) - except KeyError: - pass - self.compact.add(segment) - elif tag == TAG_COMMIT: - continue - else: - report_error('Unexpected tag {} in segment {}'.format(tag, segment)) + self._update_index(segment, objects, report_error) + pi.finish() + # self.index, self.segments, self.compact now reflect the state of the segment files up to # We might need to add a commit tag if no committed segment is found if repair and segments_transaction_id is None: report_error('Adding commit tag to segment {}'.format(transaction_id)) self.io.segment = transaction_id + 1 self.io.write_commit() if current_index and not repair: + # current_index = "as found on disk" + # self.index = "as rebuilt in-memory from segments" if len(current_index) != len(self.index): report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index))) elif current_index: @@ -346,9 +378,16 @@ class Repository: if current_index.get(key, (-1, -1)) != value: report_error('Index mismatch for key {}. {} != {}'.format(key, value, current_index.get(key, (-1, -1)))) if repair: - self.compact_segments() + self.compact_segments(save_space=save_space) self.write_index() self.rollback() + if error_found: + if repair: + logger.info('Completed repository check, errors found and repaired.') + else: + logger.error('Completed repository check, errors found.') + else: + logger.info('Completed repository check, no problems found.') return not error_found or repair def rollback(self): @@ -379,7 +418,7 @@ class Repository: segment, offset = self.index[id_] return self.io.read(segment, offset, id_) except KeyError: - raise self.ObjectNotFound(id_, self.path) + raise self.ObjectNotFound(id_, self.path) from None def get_many(self, ids, is_preloaded=False): for id_ in ids: @@ -408,7 +447,7 @@ class Repository: try: segment, offset = self.index.pop(id) except KeyError: - raise self.ObjectNotFound(id, self.path) + raise self.ObjectNotFound(id, self.path) from None self.segments[segment] -= 1 self.compact.add(segment) segment = self.io.write_delete(id) @@ -422,6 +461,9 @@ class Repository: class LoggedIO: + class SegmentFull(Exception): + """raised when a segment is full, before opening next""" + header_fmt = struct.Struct(' self.limit: + if raise_full: + raise self.SegmentFull self.close_segment() if not self._write_fd: if self.segment % self.segments_per_dir == 0: @@ -524,7 +568,7 @@ class LoggedIO: del self.fds[segment] try: os.unlink(self.segment_filename(segment)) - except OSError: + except FileNotFoundError: pass def segment_exists(self, segment): @@ -585,7 +629,7 @@ class LoggedIO: hdr_tuple = fmt.unpack(header) except struct.error as err: raise IntegrityError('Invalid segment entry header [segment {}, offset {}]: {}'.format( - segment, offset, err)) + segment, offset, err)) from None if fmt is self.put_header_fmt: crc, size, tag, key = hdr_tuple elif fmt is self.header_fmt: @@ -611,9 +655,9 @@ class LoggedIO: key, data = data[:32], data[32:] return size, tag, key, data - def write_put(self, id, data): + def write_put(self, id, data, raise_full=False): + fd = self.get_write_fd(raise_full=raise_full) size = len(data) + self.put_header_fmt.size - fd = self.get_write_fd() offset = self.offset header = self.header_no_crc_fmt.pack(size, TAG_PUT) crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff) @@ -621,8 +665,8 @@ class LoggedIO: self.offset += size return self.segment, offset - def write_delete(self, id): - fd = self.get_write_fd() + def write_delete(self, id, raise_full=False): + fd = self.get_write_fd(raise_full=raise_full) header = self.header_no_crc_fmt.pack(self.put_header_fmt.size, TAG_DELETE) crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff) fd.write(b''.join((crc, header, id))) @@ -642,7 +686,7 @@ class LoggedIO: self.offset = 0 self._write_fd.flush() os.fsync(self._write_fd.fileno()) - if hasattr(os, 'posix_fadvise'): # python >= 3.3, only on UNIX + if hasattr(os, 'posix_fadvise'): # only on UNIX # tell the OS that it does not need to cache what we just wrote, # avoids spoiling the cache for the OS and other processes. os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) diff --git a/borg/shellpattern.py b/borg/shellpattern.py new file mode 100644 index 00000000..7cb8f211 --- /dev/null +++ b/borg/shellpattern.py @@ -0,0 +1,62 @@ +import re +import os + + +def translate(pat): + """Translate a shell-style pattern to a regular expression. + + The pattern may include "**" ( stands for the platform-specific path separator; "/" on POSIX systems) for + matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception of + any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal character + "?"). + + This function is derived from the "fnmatch" module distributed with the Python standard library. + + Copyright (C) 2001-2016 Python Software Foundation. All rights reserved. + + TODO: support {alt1,alt2} shell-style alternatives + + """ + sep = os.path.sep + n = len(pat) + i = 0 + res = "" + + while i < n: + c = pat[i] + i += 1 + + if c == "*": + if i + 1 < n and pat[i] == "*" and pat[i + 1] == sep: + # **/ == wildcard for 0+ full (relative) directory names with trailing slashes; the forward slash stands + # for the platform-specific path separator + res += r"(?:[^\%s]*\%s)*" % (sep, sep) + i += 2 + else: + # * == wildcard for name parts (does not cross path separator) + res += r"[^\%s]*" % sep + elif c == "?": + # ? == any single character excluding path separator + res += r"[^\%s]" % sep + elif c == "[": + j = i + if j < n and pat[j] == "!": + j += 1 + if j < n and pat[j] == "]": + j += 1 + while j < n and pat[j] != "]": + j += 1 + if j >= n: + res += "\\[" + else: + stuff = pat[i:j].replace("\\", "\\\\") + i = j + 1 + if stuff[0] == "!": + stuff = "^" + stuff[1:] + elif stuff[0] == "^": + stuff = "\\" + stuff + res += "[%s]" % stuff + else: + res += re.escape(c) + + return res + r"\Z(?ms)" diff --git a/borg/support/__init__.py b/borg/support/__init__.py deleted file mode 100644 index 449fcebf..00000000 --- a/borg/support/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -3rd party stuff that needed fixing - -Note: linux package maintainers feel free to remove any of these hacks - IF your python version is not affected. - -argparse is broken with default args (double conversion): -affects: 3.2.0 <= python < 3.2.4 -affects: 3.3.0 <= python < 3.3.1 - -as we still support 3.2 and 3.3 there is no other way than to bundle -a fixed version (I just took argparse.py from 3.2.6) and import it from -here (see import in archiver.py). -DEPRECATED - remove support.argparse after requiring python 3.4. -""" - diff --git a/borg/support/argparse.py b/borg/support/argparse.py deleted file mode 100644 index da73bc5f..00000000 --- a/borg/support/argparse.py +++ /dev/null @@ -1,2383 +0,0 @@ -# Author: Steven J. Bethard . - -"""Command-line parsing library - -This module is an optparse-inspired command-line parsing library that: - - - handles both optional and positional arguments - - produces highly informative usage messages - - supports parsers that dispatch to sub-parsers - -The following is a simple usage example that sums integers from the -command-line and writes the result to a file:: - - parser = argparse.ArgumentParser( - description='sum the integers at the command line') - parser.add_argument( - 'integers', metavar='int', nargs='+', type=int, - help='an integer to be summed') - parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), - help='the file where the sum should be written') - args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() - -The module contains the following public classes: - - - ArgumentParser -- The main entry point for command-line parsing. As the - example above shows, the add_argument() method is used to populate - the parser with actions for optional and positional arguments. Then - the parse_args() method is invoked to convert the args at the - command-line into an object with attributes. - - - ArgumentError -- The exception raised by ArgumentParser objects when - there are errors with the parser's actions. Errors raised while - parsing the command-line are caught by ArgumentParser and emitted - as command-line messages. - - - FileType -- A factory for defining types of files to be created. As the - example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. - - - Action -- The base class for parser actions. Typically actions are - selected by passing strings like 'store_true' or 'append_const' to - the action= argument of add_argument(). However, for greater - customization of ArgumentParser actions, subclasses of Action may - be defined and passed as the action= argument. - - - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter, - ArgumentDefaultsHelpFormatter -- Formatter classes which - may be passed as the formatter_class= argument to the - ArgumentParser constructor. HelpFormatter is the default, - RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser - not to change the formatting for help text, and - ArgumentDefaultsHelpFormatter adds information about argument defaults - to the help. - -All other classes in this module are considered implementation details. -(Also note that HelpFormatter and RawDescriptionHelpFormatter are only -considered public as object names -- the API of the formatter objects is -still considered an implementation detail.) -""" - -__version__ = '1.1' -__all__ = [ - 'ArgumentParser', - 'ArgumentError', - 'ArgumentTypeError', - 'FileType', - 'HelpFormatter', - 'ArgumentDefaultsHelpFormatter', - 'RawDescriptionHelpFormatter', - 'RawTextHelpFormatter', - 'Namespace', - 'Action', - 'ONE_OR_MORE', - 'OPTIONAL', - 'PARSER', - 'REMAINDER', - 'SUPPRESS', - 'ZERO_OR_MORE', -] - - -import collections as _collections -import copy as _copy -import os as _os -import re as _re -import sys as _sys -import textwrap as _textwrap - -try: - from gettext import gettext, ngettext -except ImportError: - def gettext(message): - return message - def ngettext(msg1, msg2, n): - return msg1 if n == 1 else msg2 -_ = gettext - - -SUPPRESS = '==SUPPRESS==' - -OPTIONAL = '?' -ZERO_OR_MORE = '*' -ONE_OR_MORE = '+' -PARSER = 'A...' -REMAINDER = '...' -_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' - -# ============================= -# Utility functions and classes -# ============================= - -class _AttributeHolder(object): - """Abstract base class that provides __repr__. - - The __repr__ method returns a string in the format:: - ClassName(attr=name, attr=name, ...) - The attributes are determined either by a class-level attribute, - '_kwarg_names', or by inspecting the instance __dict__. - """ - - def __repr__(self): - type_name = type(self).__name__ - arg_strings = [] - for arg in self._get_args(): - arg_strings.append(repr(arg)) - for name, value in self._get_kwargs(): - arg_strings.append('%s=%r' % (name, value)) - return '%s(%s)' % (type_name, ', '.join(arg_strings)) - - def _get_kwargs(self): - return sorted(self.__dict__.items()) - - def _get_args(self): - return [] - - -def _ensure_value(namespace, name, value): - if getattr(namespace, name, None) is None: - setattr(namespace, name, value) - return getattr(namespace, name) - - -# =============== -# Formatting Help -# =============== - -class HelpFormatter(object): - """Formatter for generating usage messages and argument help strings. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): - - # default setting for width - if width is None: - try: - width = int(_os.environ['COLUMNS']) - except (KeyError, ValueError): - width = 80 - width -= 2 - - self._prog = prog - self._indent_increment = indent_increment - self._max_help_position = max_help_position - self._width = width - - self._current_indent = 0 - self._level = 0 - self._action_max_length = 0 - - self._root_section = self._Section(self, None) - self._current_section = self._root_section - - self._whitespace_matcher = _re.compile(r'\s+') - self._long_break_matcher = _re.compile(r'\n\n\n+') - - # =============================== - # Section and indentation methods - # =============================== - def _indent(self): - self._current_indent += self._indent_increment - self._level += 1 - - def _dedent(self): - self._current_indent -= self._indent_increment - assert self._current_indent >= 0, 'Indent decreased below 0.' - self._level -= 1 - - class _Section(object): - - def __init__(self, formatter, parent, heading=None): - self.formatter = formatter - self.parent = parent - self.heading = heading - self.items = [] - - def format_help(self): - # format the indented section - if self.parent is not None: - self.formatter._indent() - join = self.formatter._join_parts - for func, args in self.items: - func(*args) - item_help = join([func(*args) for func, args in self.items]) - if self.parent is not None: - self.formatter._dedent() - - # return nothing if the section was empty - if not item_help: - return '' - - # add the heading if the section was non-empty - if self.heading is not SUPPRESS and self.heading is not None: - current_indent = self.formatter._current_indent - heading = '%*s%s:\n' % (current_indent, '', self.heading) - else: - heading = '' - - # join the section-initial newline, the heading and the help - return join(['\n', heading, item_help, '\n']) - - def _add_item(self, func, args): - self._current_section.items.append((func, args)) - - # ======================== - # Message building methods - # ======================== - def start_section(self, heading): - self._indent() - section = self._Section(self, self._current_section, heading) - self._add_item(section.format_help, []) - self._current_section = section - - def end_section(self): - self._current_section = self._current_section.parent - self._dedent() - - def add_text(self, text): - if text is not SUPPRESS and text is not None: - self._add_item(self._format_text, [text]) - - def add_usage(self, usage, actions, groups, prefix=None): - if usage is not SUPPRESS: - args = usage, actions, groups, prefix - self._add_item(self._format_usage, args) - - def add_argument(self, action): - if action.help is not SUPPRESS: - - # find all invocations - get_invocation = self._format_action_invocation - invocations = [get_invocation(action)] - for subaction in self._iter_indented_subactions(action): - invocations.append(get_invocation(subaction)) - - # update the maximum item length - invocation_length = max([len(s) for s in invocations]) - action_length = invocation_length + self._current_indent - self._action_max_length = max(self._action_max_length, - action_length) - - # add the item to the list - self._add_item(self._format_action, [action]) - - def add_arguments(self, actions): - for action in actions: - self.add_argument(action) - - # ======================= - # Help-formatting methods - # ======================= - def format_help(self): - help = self._root_section.format_help() - if help: - help = self._long_break_matcher.sub('\n\n', help) - help = help.strip('\n') + '\n' - return help - - def _join_parts(self, part_strings): - return ''.join([part - for part in part_strings - if part and part is not SUPPRESS]) - - def _format_usage(self, usage, actions, groups, prefix): - if prefix is None: - prefix = _('usage: ') - - # if usage is specified, use that - if usage is not None: - usage = usage % dict(prog=self._prog) - - # if no optionals or positionals are available, usage is just prog - elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) - - # if optionals and positionals are available, calculate usage - elif usage is None: - prog = '%(prog)s' % dict(prog=self._prog) - - # split optionals from positionals - optionals = [] - positionals = [] - for action in actions: - if action.option_strings: - optionals.append(action) - else: - positionals.append(action) - - # build full usage string - format = self._format_actions_usage - action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) - - # wrap the usage parts if it's too long - text_width = self._width - self._current_indent - if len(prefix) + len(usage) > text_width: - - # break usage into wrappable parts - part_regexp = r'\(.*?\)+|\[.*?\]+|\S+' - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage - - # helper for wrapping lines - def get_lines(parts, indent, prefix=None): - lines = [] - line = [] - if prefix is not None: - line_len = len(prefix) - 1 - else: - line_len = len(indent) - 1 - for part in parts: - if line_len + 1 + len(part) > text_width: - lines.append(indent + ' '.join(line)) - line = [] - line_len = len(indent) - 1 - line.append(part) - line_len += len(part) + 1 - if line: - lines.append(indent + ' '.join(line)) - if prefix is not None: - lines[0] = lines[0][len(indent):] - return lines - - # if prog is short, follow it with optionals or positionals - if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) - if opt_parts: - lines = get_lines([prog] + opt_parts, indent, prefix) - lines.extend(get_lines(pos_parts, indent)) - elif pos_parts: - lines = get_lines([prog] + pos_parts, indent, prefix) - else: - lines = [prog] - - # if prog is long, put it on its own line - else: - indent = ' ' * len(prefix) - parts = opt_parts + pos_parts - lines = get_lines(parts, indent) - if len(lines) > 1: - lines = [] - lines.extend(get_lines(opt_parts, indent)) - lines.extend(get_lines(pos_parts, indent)) - lines = [prog] + lines - - # join lines into usage - usage = '\n'.join(lines) - - # prefix with 'usage:' - return '%s%s\n\n' % (prefix, usage) - - def _format_actions_usage(self, actions, groups): - # find group indices and identify actions in groups - group_actions = set() - inserts = {} - for group in groups: - try: - start = actions.index(group._group_actions[0]) - except ValueError: - continue - else: - end = start + len(group._group_actions) - if actions[start:end] == group._group_actions: - for action in group._group_actions: - group_actions.add(action) - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - inserts[end] = ']' - else: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' - - # collect all actions format strings - parts = [] - for i, action in enumerate(actions): - - # suppressed arguments are marked with None - # remove | separators for suppressed arguments - if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) - - # produce all arg strings - elif not action.option_strings: - part = self._format_args(action, action.dest) - - # if it's in a group, strip the outer [] - if action in group_actions: - if part[0] == '[' and part[-1] == ']': - part = part[1:-1] - - # add the action string to the list - parts.append(part) - - # produce the first way to invoke the option in brackets - else: - option_string = action.option_strings[0] - - # if the Optional doesn't take a value, format is: - # -s or --long - if action.nargs == 0: - part = '%s' % option_string - - # if the Optional takes a value, format is: - # -s ARGS or --long ARGS - else: - default = action.dest.upper() - args_string = self._format_args(action, default) - part = '%s %s' % (option_string, args_string) - - # make it look optional if it's not required or in a group - if not action.required and action not in group_actions: - part = '[%s]' % part - - # add the action string to the list - parts.append(part) - - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) - - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = _re.sub(r'\(([^|]*)\)', r'\1', text) - text = text.strip() - - # return the text - return text - - def _format_text(self, text): - if '%(prog)' in text: - text = text % dict(prog=self._prog) - text_width = self._width - self._current_indent - indent = ' ' * self._current_indent - return self._fill_text(text, text_width, indent) + '\n\n' - - def _format_action(self, action): - # determine the required width and the entry label - help_position = min(self._action_max_length + 2, - self._max_help_position) - help_width = self._width - help_position - action_width = help_position - self._current_indent - 2 - action_header = self._format_action_invocation(action) - - # ho nelp; start on same line and add a final newline - if not action.help: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup - - # short action name; start on the same line and pad two spaces - elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header - action_header = '%*s%-*s ' % tup - indent_first = 0 - - # long action name; start on the next line - else: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup - indent_first = help_position - - # collect the pieces of the action help - parts = [action_header] - - # if there was help for the action, add lines of help text - if action.help: - help_text = self._expand_help(action) - help_lines = self._split_lines(help_text, help_width) - parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) - for line in help_lines[1:]: - parts.append('%*s%s\n' % (help_position, '', line)) - - # or add a newline if the description doesn't end with one - elif not action_header.endswith('\n'): - parts.append('\n') - - # if there are any sub-actions, add their help as well - for subaction in self._iter_indented_subactions(action): - parts.append(self._format_action(subaction)) - - # return a single string - return self._join_parts(parts) - - def _format_action_invocation(self, action): - if not action.option_strings: - metavar, = self._metavar_formatter(action, action.dest)(1) - return metavar - - else: - parts = [] - - # if the Optional doesn't take a value, format is: - # -s, --long - if action.nargs == 0: - parts.extend(action.option_strings) - - # if the Optional takes a value, format is: - # -s ARGS, --long ARGS - else: - default = action.dest.upper() - args_string = self._format_args(action, default) - for option_string in action.option_strings: - parts.append('%s %s' % (option_string, args_string)) - - return ', '.join(parts) - - def _metavar_formatter(self, action, default_metavar): - if action.metavar is not None: - result = action.metavar - elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) - else: - result = default_metavar - - def format(tuple_size): - if isinstance(result, tuple): - return result - else: - return (result, ) * tuple_size - return format - - def _format_args(self, action, default_metavar): - get_metavar = self._metavar_formatter(action, default_metavar) - if action.nargs is None: - result = '%s' % get_metavar(1) - elif action.nargs == OPTIONAL: - result = '[%s]' % get_metavar(1) - elif action.nargs == ZERO_OR_MORE: - result = '[%s [%s ...]]' % get_metavar(2) - elif action.nargs == ONE_OR_MORE: - result = '%s [%s ...]' % get_metavar(2) - elif action.nargs == REMAINDER: - result = '...' - elif action.nargs == PARSER: - result = '%s ...' % get_metavar(1) - else: - formats = ['%s' for _ in range(action.nargs)] - result = ' '.join(formats) % get_metavar(action.nargs) - return result - - def _expand_help(self, action): - params = dict(vars(action), prog=self._prog) - for name in list(params): - if params[name] is SUPPRESS: - del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ - if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str - return self._get_help_string(action) % params - - def _iter_indented_subactions(self, action): - try: - get_subactions = action._get_subactions - except AttributeError: - pass - else: - self._indent() - for subaction in get_subactions(): - yield subaction - self._dedent() - - def _split_lines(self, text, width): - text = self._whitespace_matcher.sub(' ', text).strip() - return _textwrap.wrap(text, width) - - def _fill_text(self, text, width, indent): - text = self._whitespace_matcher.sub(' ', text).strip() - return _textwrap.fill(text, width, initial_indent=indent, - subsequent_indent=indent) - - def _get_help_string(self, action): - return action.help - - -class RawDescriptionHelpFormatter(HelpFormatter): - """Help message formatter which retains any formatting in descriptions. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _fill_text(self, text, width, indent): - return ''.join([indent + line for line in text.splitlines(True)]) - - -class RawTextHelpFormatter(RawDescriptionHelpFormatter): - """Help message formatter which retains formatting of all help text. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _split_lines(self, text, width): - return text.splitlines() - - -class ArgumentDefaultsHelpFormatter(HelpFormatter): - """Help message formatter which adds default values to argument help. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _get_help_string(self, action): - help = action.help - if '%(default)' not in action.help: - if action.default is not SUPPRESS: - defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] - if action.option_strings or action.nargs in defaulting_nargs: - help += ' (default: %(default)s)' - return help - - -# ===================== -# Options and Arguments -# ===================== - -def _get_action_name(argument): - if argument is None: - return None - elif argument.option_strings: - return '/'.join(argument.option_strings) - elif argument.metavar not in (None, SUPPRESS): - return argument.metavar - elif argument.dest not in (None, SUPPRESS): - return argument.dest - else: - return None - - -class ArgumentError(Exception): - """An error from creating or using an argument (optional or positional). - - The string value of this exception is the message, augmented with - information about the argument that caused it. - """ - - def __init__(self, argument, message): - self.argument_name = _get_action_name(argument) - self.message = message - - def __str__(self): - if self.argument_name is None: - format = '%(message)s' - else: - format = 'argument %(argument_name)s: %(message)s' - return format % dict(message=self.message, - argument_name=self.argument_name) - - -class ArgumentTypeError(Exception): - """An error from trying to convert a command line string to a type.""" - pass - - -# ============== -# Action classes -# ============== - -class Action(_AttributeHolder): - """Information about how to convert command line strings to Python objects. - - Action objects are used by an ArgumentParser to represent the information - needed to parse a single argument from one or more strings from the - command line. The keyword arguments to the Action constructor are also - all attributes of Action instances. - - Keyword Arguments: - - - option_strings -- A list of command-line option strings which - should be associated with this action. - - - dest -- The name of the attribute to hold the created object(s) - - - nargs -- The number of command-line arguments that should be - consumed. By default, one argument will be consumed and a single - value will be produced. Other values include: - - N (an integer) consumes N arguments (and produces a list) - - '?' consumes zero or one arguments - - '*' consumes zero or more arguments (and produces a list) - - '+' consumes one or more arguments (and produces a list) - Note that the difference between the default and nargs=1 is that - with the default, a single value will be produced, while with - nargs=1, a list containing a single value will be produced. - - - const -- The value to be produced if the option is specified and the - option uses an action that takes no values. - - - default -- The value to be produced if the option is not specified. - - - type -- A callable that accepts a single string argument, and - returns the converted value. The standard Python types str, int, - float, and complex are useful examples of such callables. If None, - str is used. - - - choices -- A container of values that should be allowed. If not None, - after a command-line argument has been converted to the appropriate - type, an exception will be raised if it is not a member of this - collection. - - - required -- True if the action must always be specified at the - command line. This is only meaningful for optional command-line - arguments. - - - help -- The help string describing the argument. - - - metavar -- The name to be used for the option's argument with the - help string. If None, the 'dest' value will be used as the name. - """ - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - self.option_strings = option_strings - self.dest = dest - self.nargs = nargs - self.const = const - self.default = default - self.type = type - self.choices = choices - self.required = required - self.help = help - self.metavar = metavar - - def _get_kwargs(self): - names = [ - 'option_strings', - 'dest', - 'nargs', - 'const', - 'default', - 'type', - 'choices', - 'help', - 'metavar', - ] - return [(name, getattr(self, name)) for name in names] - - def __call__(self, parser, namespace, values, option_string=None): - raise NotImplementedError(_('.__call__() not defined')) - - -class _StoreAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - if nargs == 0: - raise ValueError('nargs for store actions must be > 0; if you ' - 'have nothing to store, actions such as store ' - 'true or store const may be more appropriate') - if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) - super(_StoreAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=nargs, - const=const, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, values) - - -class _StoreConstAction(Action): - - def __init__(self, - option_strings, - dest, - const, - default=None, - required=False, - help=None, - metavar=None): - super(_StoreConstAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=const, - default=default, - required=required, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, self.const) - - -class _StoreTrueAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=False, - required=False, - help=None): - super(_StoreTrueAction, self).__init__( - option_strings=option_strings, - dest=dest, - const=True, - default=default, - required=required, - help=help) - - -class _StoreFalseAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=True, - required=False, - help=None): - super(_StoreFalseAction, self).__init__( - option_strings=option_strings, - dest=dest, - const=False, - default=default, - required=required, - help=help) - - -class _AppendAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - if nargs == 0: - raise ValueError('nargs for append actions must be > 0; if arg ' - 'strings are not supplying the value to append, ' - 'the append const action may be more appropriate') - if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) - super(_AppendAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=nargs, - const=const, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - items = _copy.copy(_ensure_value(namespace, self.dest, [])) - items.append(values) - setattr(namespace, self.dest, items) - - -class _AppendConstAction(Action): - - def __init__(self, - option_strings, - dest, - const, - default=None, - required=False, - help=None, - metavar=None): - super(_AppendConstAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=const, - default=default, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - items = _copy.copy(_ensure_value(namespace, self.dest, [])) - items.append(self.const) - setattr(namespace, self.dest, items) - - -class _CountAction(Action): - - def __init__(self, - option_strings, - dest, - default=None, - required=False, - help=None): - super(_CountAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - default=default, - required=required, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - new_count = _ensure_value(namespace, self.dest, 0) + 1 - setattr(namespace, self.dest, new_count) - - -class _HelpAction(Action): - - def __init__(self, - option_strings, - dest=SUPPRESS, - default=SUPPRESS, - help=None): - super(_HelpAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - parser.print_help() - parser.exit() - - -class _VersionAction(Action): - - def __init__(self, - option_strings, - version=None, - dest=SUPPRESS, - default=SUPPRESS, - help="show program's version number and exit"): - super(_VersionAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) - self.version = version - - def __call__(self, parser, namespace, values, option_string=None): - version = self.version - if version is None: - version = parser.version - formatter = parser._get_formatter() - formatter.add_text(version) - parser.exit(message=formatter.format_help()) - - -class _SubParsersAction(Action): - - class _ChoicesPseudoAction(Action): - - def __init__(self, name, aliases, help): - metavar = dest = name - if aliases: - metavar += ' (%s)' % ', '.join(aliases) - sup = super(_SubParsersAction._ChoicesPseudoAction, self) - sup.__init__(option_strings=[], dest=dest, help=help, - metavar=metavar) - - def __init__(self, - option_strings, - prog, - parser_class, - dest=SUPPRESS, - help=None, - metavar=None): - - self._prog_prefix = prog - self._parser_class = parser_class - self._name_parser_map = _collections.OrderedDict() - self._choices_actions = [] - - super(_SubParsersAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=PARSER, - choices=self._name_parser_map, - help=help, - metavar=metavar) - - def add_parser(self, name, **kwargs): - # set prog from the existing prefix - if kwargs.get('prog') is None: - kwargs['prog'] = '%s %s' % (self._prog_prefix, name) - - aliases = kwargs.pop('aliases', ()) - - # create a pseudo-action to hold the choice help - if 'help' in kwargs: - help = kwargs.pop('help') - choice_action = self._ChoicesPseudoAction(name, aliases, help) - self._choices_actions.append(choice_action) - - # create the parser and add it to the map - parser = self._parser_class(**kwargs) - self._name_parser_map[name] = parser - - # make parser available under aliases also - for alias in aliases: - self._name_parser_map[alias] = parser - - return parser - - def _get_subactions(self): - return self._choices_actions - - def __call__(self, parser, namespace, values, option_string=None): - parser_name = values[0] - arg_strings = values[1:] - - # set the parser name if requested - if self.dest is not SUPPRESS: - setattr(namespace, self.dest, parser_name) - - # select the parser - try: - parser = self._name_parser_map[parser_name] - except KeyError: - args = {'parser_name': parser_name, - 'choices': ', '.join(self._name_parser_map)} - msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args - raise ArgumentError(self, msg) - - # parse all the remaining options into the namespace - # store any unrecognized options on the object, so that the top - # level parser can decide what to do with them - namespace, arg_strings = parser.parse_known_args(arg_strings, namespace) - if arg_strings: - vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) - getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) - - -# ============== -# Type classes -# ============== - -class FileType(object): - """Factory for creating file object types - - Instances of FileType are typically passed as type= arguments to the - ArgumentParser add_argument() method. - - Keyword Arguments: - - mode -- A string indicating how the file is to be opened. Accepts the - same values as the builtin open() function. - - bufsize -- The file's desired buffer size. Accepts the same values as - the builtin open() function. - """ - - def __init__(self, mode='r', bufsize=-1): - self._mode = mode - self._bufsize = bufsize - - def __call__(self, string): - # the special argument "-" means sys.std{in,out} - if string == '-': - if 'r' in self._mode: - return _sys.stdin - elif 'w' in self._mode: - return _sys.stdout - else: - msg = _('argument "-" with mode %r') % self._mode - raise ValueError(msg) - - # all other arguments are used as file names - try: - return open(string, self._mode, self._bufsize) - except IOError as e: - message = _("can't open '%s': %s") - raise ArgumentTypeError(message % (string, e)) - - def __repr__(self): - args = self._mode, self._bufsize - args_str = ', '.join(repr(arg) for arg in args if arg != -1) - return '%s(%s)' % (type(self).__name__, args_str) - -# =========================== -# Optional and Positional Parsing -# =========================== - -class Namespace(_AttributeHolder): - """Simple object for storing attributes. - - Implements equality by attribute names and values, and provides a simple - string representation. - """ - - def __init__(self, **kwargs): - for name in kwargs: - setattr(self, name, kwargs[name]) - - def __eq__(self, other): - return vars(self) == vars(other) - - def __ne__(self, other): - return not (self == other) - - def __contains__(self, key): - return key in self.__dict__ - - -class _ActionsContainer(object): - - def __init__(self, - description, - prefix_chars, - argument_default, - conflict_handler): - super(_ActionsContainer, self).__init__() - - self.description = description - self.argument_default = argument_default - self.prefix_chars = prefix_chars - self.conflict_handler = conflict_handler - - # set up registries - self._registries = {} - - # register actions - self.register('action', None, _StoreAction) - self.register('action', 'store', _StoreAction) - self.register('action', 'store_const', _StoreConstAction) - self.register('action', 'store_true', _StoreTrueAction) - self.register('action', 'store_false', _StoreFalseAction) - self.register('action', 'append', _AppendAction) - self.register('action', 'append_const', _AppendConstAction) - self.register('action', 'count', _CountAction) - self.register('action', 'help', _HelpAction) - self.register('action', 'version', _VersionAction) - self.register('action', 'parsers', _SubParsersAction) - - # raise an exception if the conflict handler is invalid - self._get_handler() - - # action storage - self._actions = [] - self._option_string_actions = {} - - # groups - self._action_groups = [] - self._mutually_exclusive_groups = [] - - # defaults storage - self._defaults = {} - - # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') - - # whether or not there are any optionals that look like negative - # numbers -- uses a list so it can be shared and edited - self._has_negative_number_optionals = [] - - # ==================== - # Registration methods - # ==================== - def register(self, registry_name, value, object): - registry = self._registries.setdefault(registry_name, {}) - registry[value] = object - - def _registry_get(self, registry_name, value, default=None): - return self._registries[registry_name].get(value, default) - - # ================================== - # Namespace default accessor methods - # ================================== - def set_defaults(self, **kwargs): - self._defaults.update(kwargs) - - # if these defaults match any existing arguments, replace - # the previous default on the object with the new one - for action in self._actions: - if action.dest in kwargs: - action.default = kwargs[action.dest] - - def get_default(self, dest): - for action in self._actions: - if action.dest == dest and action.default is not None: - return action.default - return self._defaults.get(dest, None) - - - # ======================= - # Adding argument actions - # ======================= - def add_argument(self, *args, **kwargs): - """ - add_argument(dest, ..., name=value, ...) - add_argument(option_string, option_string, ..., name=value, ...) - """ - - # if no positional args are supplied or only one is supplied and - # it doesn't look like an option string, parse a positional - # argument - chars = self.prefix_chars - if not args or len(args) == 1 and args[0][0] not in chars: - if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') - kwargs = self._get_positional_kwargs(*args, **kwargs) - - # otherwise, we're adding an optional argument - else: - kwargs = self._get_optional_kwargs(*args, **kwargs) - - # if no default was supplied, use the parser-level default - if 'default' not in kwargs: - dest = kwargs['dest'] - if dest in self._defaults: - kwargs['default'] = self._defaults[dest] - elif self.argument_default is not None: - kwargs['default'] = self.argument_default - - # create the action object, and add it to the parser - action_class = self._pop_action_class(kwargs) - if not callable(action_class): - raise ValueError('unknown action "%s"' % (action_class,)) - action = action_class(**kwargs) - - # raise an error if the action type is not callable - type_func = self._registry_get('type', action.type, action.type) - if not callable(type_func): - raise ValueError('%r is not callable' % (type_func,)) - - # raise an error if the metavar does not match the type - if hasattr(self, "_get_formatter"): - try: - self._get_formatter()._format_args(action, None) - except TypeError: - raise ValueError("length of metavar tuple does not match nargs") - - return self._add_action(action) - - def add_argument_group(self, *args, **kwargs): - group = _ArgumentGroup(self, *args, **kwargs) - self._action_groups.append(group) - return group - - def add_mutually_exclusive_group(self, **kwargs): - group = _MutuallyExclusiveGroup(self, **kwargs) - self._mutually_exclusive_groups.append(group) - return group - - def _add_action(self, action): - # resolve any conflicts - self._check_conflict(action) - - # add to actions list - self._actions.append(action) - action.container = self - - # index the action by any option strings it has - for option_string in action.option_strings: - self._option_string_actions[option_string] = action - - # set the flag if any option strings look like negative numbers - for option_string in action.option_strings: - if self._negative_number_matcher.match(option_string): - if not self._has_negative_number_optionals: - self._has_negative_number_optionals.append(True) - - # return the created action - return action - - def _remove_action(self, action): - self._actions.remove(action) - - def _add_container_actions(self, container): - # collect groups by titles - title_group_map = {} - for group in self._action_groups: - if group.title in title_group_map: - msg = _('cannot merge actions - two groups are named %r') - raise ValueError(msg % (group.title)) - title_group_map[group.title] = group - - # map each action to its group - group_map = {} - for group in container._action_groups: - - # if a group with the title exists, use that, otherwise - # create a new group matching the container's group - if group.title not in title_group_map: - title_group_map[group.title] = self.add_argument_group( - title=group.title, - description=group.description, - conflict_handler=group.conflict_handler) - - # map the actions to their new group - for action in group._group_actions: - group_map[action] = title_group_map[group.title] - - # add container's mutually exclusive groups - # NOTE: if add_mutually_exclusive_group ever gains title= and - # description= then this code will need to be expanded as above - for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( - required=group.required) - - # map the actions to their new mutex group - for action in group._group_actions: - group_map[action] = mutex_group - - # add all actions to this container or their group - for action in container._actions: - group_map.get(action, self)._add_action(action) - - def _get_positional_kwargs(self, dest, **kwargs): - # make sure required is not specified - if 'required' in kwargs: - msg = _("'required' is an invalid argument for positionals") - raise TypeError(msg) - - # mark positional arguments as required if at least one is - # always required - if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: - kwargs['required'] = True - if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: - kwargs['required'] = True - - # return the keyword arguments with no option strings - return dict(kwargs, dest=dest, option_strings=[]) - - def _get_optional_kwargs(self, *args, **kwargs): - # determine short and long option strings - option_strings = [] - long_option_strings = [] - for option_string in args: - # error on strings that don't start with an appropriate prefix - if not option_string[0] in self.prefix_chars: - args = {'option': option_string, - 'prefix_chars': self.prefix_chars} - msg = _('invalid option string %(option)r: ' - 'must start with a character %(prefix_chars)r') - raise ValueError(msg % args) - - # strings starting with two prefix characters are long options - option_strings.append(option_string) - if option_string[0] in self.prefix_chars: - if len(option_string) > 1: - if option_string[1] in self.prefix_chars: - long_option_strings.append(option_string) - - # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' - dest = kwargs.pop('dest', None) - if dest is None: - if long_option_strings: - dest_option_string = long_option_strings[0] - else: - dest_option_string = option_strings[0] - dest = dest_option_string.lstrip(self.prefix_chars) - if not dest: - msg = _('dest= is required for options like %r') - raise ValueError(msg % option_string) - dest = dest.replace('-', '_') - - # return the updated keyword arguments - return dict(kwargs, dest=dest, option_strings=option_strings) - - def _pop_action_class(self, kwargs, default=None): - action = kwargs.pop('action', default) - return self._registry_get('action', action, action) - - def _get_handler(self): - # determine function from conflict handler string - handler_func_name = '_handle_conflict_%s' % self.conflict_handler - try: - return getattr(self, handler_func_name) - except AttributeError: - msg = _('invalid conflict_resolution value: %r') - raise ValueError(msg % self.conflict_handler) - - def _check_conflict(self, action): - - # find all options that conflict with this option - confl_optionals = [] - for option_string in action.option_strings: - if option_string in self._option_string_actions: - confl_optional = self._option_string_actions[option_string] - confl_optionals.append((option_string, confl_optional)) - - # resolve any conflicts - if confl_optionals: - conflict_handler = self._get_handler() - conflict_handler(action, confl_optionals) - - def _handle_conflict_error(self, action, conflicting_actions): - message = ngettext('conflicting option string: %s', - 'conflicting option strings: %s', - len(conflicting_actions)) - conflict_string = ', '.join([option_string - for option_string, action - in conflicting_actions]) - raise ArgumentError(action, message % conflict_string) - - def _handle_conflict_resolve(self, action, conflicting_actions): - - # remove all conflicting options - for option_string, action in conflicting_actions: - - # remove the conflicting option - action.option_strings.remove(option_string) - self._option_string_actions.pop(option_string, None) - - # if the option now has no option string, remove it from the - # container holding it - if not action.option_strings: - action.container._remove_action(action) - - -class _ArgumentGroup(_ActionsContainer): - - def __init__(self, container, title=None, description=None, **kwargs): - # add any missing keyword arguments by checking the container - update = kwargs.setdefault - update('conflict_handler', container.conflict_handler) - update('prefix_chars', container.prefix_chars) - update('argument_default', container.argument_default) - super_init = super(_ArgumentGroup, self).__init__ - super_init(description=description, **kwargs) - - # group attributes - self.title = title - self._group_actions = [] - - # share most attributes with the container - self._registries = container._registries - self._actions = container._actions - self._option_string_actions = container._option_string_actions - self._defaults = container._defaults - self._has_negative_number_optionals = \ - container._has_negative_number_optionals - self._mutually_exclusive_groups = container._mutually_exclusive_groups - - def _add_action(self, action): - action = super(_ArgumentGroup, self)._add_action(action) - self._group_actions.append(action) - return action - - def _remove_action(self, action): - super(_ArgumentGroup, self)._remove_action(action) - self._group_actions.remove(action) - - -class _MutuallyExclusiveGroup(_ArgumentGroup): - - def __init__(self, container, required=False): - super(_MutuallyExclusiveGroup, self).__init__(container) - self.required = required - self._container = container - - def _add_action(self, action): - if action.required: - msg = _('mutually exclusive arguments must be optional') - raise ValueError(msg) - action = self._container._add_action(action) - self._group_actions.append(action) - return action - - def _remove_action(self, action): - self._container._remove_action(action) - self._group_actions.remove(action) - - -class ArgumentParser(_AttributeHolder, _ActionsContainer): - """Object for parsing command line strings into Python objects. - - Keyword Arguments: - - prog -- The name of the program (default: sys.argv[0]) - - usage -- A usage message (default: auto-generated from arguments) - - description -- A description of what the program does - - epilog -- Text following the argument descriptions - - parents -- Parsers whose arguments should be copied into this one - - formatter_class -- HelpFormatter class for printing help messages - - prefix_chars -- Characters that prefix optional arguments - - fromfile_prefix_chars -- Characters that prefix files containing - additional arguments - - argument_default -- The default value for all arguments - - conflict_handler -- String indicating how to handle conflicts - - add_help -- Add a -h/-help option - """ - - def __init__(self, - prog=None, - usage=None, - description=None, - epilog=None, - version=None, - parents=[], - formatter_class=HelpFormatter, - prefix_chars='-', - fromfile_prefix_chars=None, - argument_default=None, - conflict_handler='error', - add_help=True): - - if version is not None: - import warnings - warnings.warn( - """The "version" argument to ArgumentParser is deprecated. """ - """Please use """ - """"add_argument(..., action='version', version="N", ...)" """ - """instead""", DeprecationWarning) - - superinit = super(ArgumentParser, self).__init__ - superinit(description=description, - prefix_chars=prefix_chars, - argument_default=argument_default, - conflict_handler=conflict_handler) - - # default setting for prog - if prog is None: - prog = _os.path.basename(_sys.argv[0]) - - self.prog = prog - self.usage = usage - self.epilog = epilog - self.version = version - self.formatter_class = formatter_class - self.fromfile_prefix_chars = fromfile_prefix_chars - self.add_help = add_help - - add_group = self.add_argument_group - self._positionals = add_group(_('positional arguments')) - self._optionals = add_group(_('optional arguments')) - self._subparsers = None - - # register types - def identity(string): - return string - self.register('type', None, identity) - - # add help and version arguments if necessary - # (using explicit default to override global argument_default) - default_prefix = '-' if '-' in prefix_chars else prefix_chars[0] - if self.add_help: - self.add_argument( - default_prefix+'h', default_prefix*2+'help', - action='help', default=SUPPRESS, - help=_('show this help message and exit')) - if self.version: - self.add_argument( - default_prefix+'v', default_prefix*2+'version', - action='version', default=SUPPRESS, - version=self.version, - help=_("show program's version number and exit")) - - # add parent arguments and defaults - for parent in parents: - self._add_container_actions(parent) - try: - defaults = parent._defaults - except AttributeError: - pass - else: - self._defaults.update(defaults) - - # ======================= - # Pretty __repr__ methods - # ======================= - def _get_kwargs(self): - names = [ - 'prog', - 'usage', - 'description', - 'version', - 'formatter_class', - 'conflict_handler', - 'add_help', - ] - return [(name, getattr(self, name)) for name in names] - - # ================================== - # Optional/Positional adding methods - # ================================== - def add_subparsers(self, **kwargs): - if self._subparsers is not None: - self.error(_('cannot have multiple subparser arguments')) - - # add the parser class to the arguments if it's not present - kwargs.setdefault('parser_class', type(self)) - - if 'title' in kwargs or 'description' in kwargs: - title = _(kwargs.pop('title', 'subcommands')) - description = _(kwargs.pop('description', None)) - self._subparsers = self.add_argument_group(title, description) - else: - self._subparsers = self._positionals - - # prog defaults to the usage message of this parser, skipping - # optional arguments and with no "usage:" prefix - if kwargs.get('prog') is None: - formatter = self._get_formatter() - positionals = self._get_positional_actions() - groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') - kwargs['prog'] = formatter.format_help().strip() - - # create the parsers action and add it to the positionals list - parsers_class = self._pop_action_class(kwargs, 'parsers') - action = parsers_class(option_strings=[], **kwargs) - self._subparsers._add_action(action) - - # return the created parsers action - return action - - def _add_action(self, action): - if action.option_strings: - self._optionals._add_action(action) - else: - self._positionals._add_action(action) - return action - - def _get_optional_actions(self): - return [action - for action in self._actions - if action.option_strings] - - def _get_positional_actions(self): - return [action - for action in self._actions - if not action.option_strings] - - # ===================================== - # Command line argument parsing methods - # ===================================== - def parse_args(self, args=None, namespace=None): - args, argv = self.parse_known_args(args, namespace) - if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) - return args - - def parse_known_args(self, args=None, namespace=None): - if args is None: - # args default to the system args - args = _sys.argv[1:] - else: - # make sure that args are mutable - args = list(args) - - # default Namespace built from parser defaults - if namespace is None: - namespace = Namespace() - - # add any action defaults that aren't present - for action in self._actions: - if action.dest is not SUPPRESS: - if not hasattr(namespace, action.dest): - if action.default is not SUPPRESS: - setattr(namespace, action.dest, action.default) - - # add any parser defaults that aren't present - for dest in self._defaults: - if not hasattr(namespace, dest): - setattr(namespace, dest, self._defaults[dest]) - - # parse the arguments and exit if there are any errors - try: - namespace, args = self._parse_known_args(args, namespace) - if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): - args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) - delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) - return namespace, args - except ArgumentError: - err = _sys.exc_info()[1] - self.error(str(err)) - - def _parse_known_args(self, arg_strings, namespace): - # replace arg strings that are file references - if self.fromfile_prefix_chars is not None: - arg_strings = self._read_args_from_files(arg_strings) - - # map all mutually exclusive arguments to the other arguments - # they can't occur with - action_conflicts = {} - for mutex_group in self._mutually_exclusive_groups: - group_actions = mutex_group._group_actions - for i, mutex_action in enumerate(mutex_group._group_actions): - conflicts = action_conflicts.setdefault(mutex_action, []) - conflicts.extend(group_actions[:i]) - conflicts.extend(group_actions[i + 1:]) - - # find all option indices, and determine the arg_string_pattern - # which has an 'O' if there is an option at an index, - # an 'A' if there is an argument, or a '-' if there is a '--' - option_string_indices = {} - arg_string_pattern_parts = [] - arg_strings_iter = iter(arg_strings) - for i, arg_string in enumerate(arg_strings_iter): - - # all args after -- are non-options - if arg_string == '--': - arg_string_pattern_parts.append('-') - for arg_string in arg_strings_iter: - arg_string_pattern_parts.append('A') - - # otherwise, add the arg to the arg strings - # and note the index if it was an option - else: - option_tuple = self._parse_optional(arg_string) - if option_tuple is None: - pattern = 'A' - else: - option_string_indices[i] = option_tuple - pattern = 'O' - arg_string_pattern_parts.append(pattern) - - # join the pieces together to form the pattern - arg_strings_pattern = ''.join(arg_string_pattern_parts) - - # converts arg strings to the appropriate and then takes the action - seen_actions = set() - seen_non_default_actions = set() - - def take_action(action, argument_strings, option_string=None): - seen_actions.add(action) - argument_values = self._get_values(action, argument_strings) - - # error if this argument is not allowed with other previously - # seen arguments, assuming that actions that use the default - # value don't really count as "present" - if argument_values is not action.default: - seen_non_default_actions.add(action) - for conflict_action in action_conflicts.get(action, []): - if conflict_action in seen_non_default_actions: - msg = _('not allowed with argument %s') - action_name = _get_action_name(conflict_action) - raise ArgumentError(action, msg % action_name) - - # take the action if we didn't receive a SUPPRESS value - # (e.g. from a default) - if argument_values is not SUPPRESS: - action(self, namespace, argument_values, option_string) - - # function to convert arg_strings into an optional action - def consume_optional(start_index): - - # get the optional identified at this index - option_tuple = option_string_indices[start_index] - action, option_string, explicit_arg = option_tuple - - # identify additional optionals in the same arg string - # (e.g. -xyz is the same as -x -y -z if no args are required) - match_argument = self._match_argument - action_tuples = [] - while True: - - # if we found no optional action, skip it - if action is None: - extras.append(arg_strings[start_index]) - return start_index + 1 - - # if there is an explicit argument, try to match the - # optional's string arguments to only this - if explicit_arg is not None: - arg_count = match_argument(action, 'A') - - # if the action is a single-dash option and takes no - # arguments, try to parse more single-dash options out - # of the tail of the option string - chars = self.prefix_chars - if arg_count == 0 and option_string[1] not in chars: - action_tuples.append((action, [], option_string)) - char = option_string[0] - option_string = char + explicit_arg[0] - new_explicit_arg = explicit_arg[1:] or None - optionals_map = self._option_string_actions - if option_string in optionals_map: - action = optionals_map[option_string] - explicit_arg = new_explicit_arg - else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - - # if the action expect exactly one argument, we've - # successfully matched the option; exit the loop - elif arg_count == 1: - stop = start_index + 1 - args = [explicit_arg] - action_tuples.append((action, args, option_string)) - break - - # error if a double-dash option did not use the - # explicit argument - else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - - # if there is no explicit argument, try to match the - # optional's string arguments with the following strings - # if successful, exit the loop - else: - start = start_index + 1 - selected_patterns = arg_strings_pattern[start:] - arg_count = match_argument(action, selected_patterns) - stop = start + arg_count - args = arg_strings[start:stop] - action_tuples.append((action, args, option_string)) - break - - # add the Optional to the list and return the index at which - # the Optional's string args stopped - assert action_tuples - for action, args, option_string in action_tuples: - take_action(action, args, option_string) - return stop - - # the list of Positionals left to be parsed; this is modified - # by consume_positionals() - positionals = self._get_positional_actions() - - # function to convert arg_strings into positional actions - def consume_positionals(start_index): - # match as many Positionals as possible - match_partial = self._match_arguments_partial - selected_pattern = arg_strings_pattern[start_index:] - arg_counts = match_partial(positionals, selected_pattern) - - # slice off the appropriate arg strings for each Positional - # and add the Positional and its args to the list - for action, arg_count in zip(positionals, arg_counts): - args = arg_strings[start_index: start_index + arg_count] - start_index += arg_count - take_action(action, args) - - # slice off the Positionals that we just parsed and return the - # index at which the Positionals' string args stopped - positionals[:] = positionals[len(arg_counts):] - return start_index - - # consume Positionals and Optionals alternately, until we have - # passed the last option string - extras = [] - start_index = 0 - if option_string_indices: - max_option_string_index = max(option_string_indices) - else: - max_option_string_index = -1 - while start_index <= max_option_string_index: - - # consume any Positionals preceding the next option - next_option_string_index = min([ - index - for index in option_string_indices - if index >= start_index]) - if start_index != next_option_string_index: - positionals_end_index = consume_positionals(start_index) - - # only try to parse the next optional if we didn't consume - # the option string during the positionals parsing - if positionals_end_index > start_index: - start_index = positionals_end_index - continue - else: - start_index = positionals_end_index - - # if we consumed all the positionals we could and we're not - # at the index of an option string, there were extra arguments - if start_index not in option_string_indices: - strings = arg_strings[start_index:next_option_string_index] - extras.extend(strings) - start_index = next_option_string_index - - # consume the next optional and any arguments for it - start_index = consume_optional(start_index) - - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) - - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) - - # if we didn't use all the Positional objects, there were too few - # arg strings supplied. - if positionals: - self.error(_('too few arguments')) - - # make sure all required actions were present, and convert defaults. - for action in self._actions: - if action not in seen_actions: - if action.required: - name = _get_action_name(action) - self.error(_('argument %s is required') % name) - else: - # Convert action default now instead of doing it before - # parsing arguments to avoid calling convert functions - # twice (which may fail) if the argument was given, but - # only if it was defined already in the namespace - if (action.default is not None and - isinstance(action.default, str) and - hasattr(namespace, action.dest) and - action.default is getattr(namespace, action.dest)): - setattr(namespace, action.dest, - self._get_value(action, action.default)) - - # make sure all required groups had one option present - for group in self._mutually_exclusive_groups: - if group.required: - for action in group._group_actions: - if action in seen_non_default_actions: - break - - # if no actions were used, report the error - else: - names = [_get_action_name(action) - for action in group._group_actions - if action.help is not SUPPRESS] - msg = _('one of the arguments %s is required') - self.error(msg % ' '.join(names)) - - # return the updated namespace and the extra arguments - return namespace, extras - - def _read_args_from_files(self, arg_strings): - # expand arguments referencing files - new_arg_strings = [] - for arg_string in arg_strings: - - # for regular arguments, just add them back into the list - if not arg_string or arg_string[0] not in self.fromfile_prefix_chars: - new_arg_strings.append(arg_string) - - # replace arguments referencing files with the file content - else: - try: - args_file = open(arg_string[1:]) - try: - arg_strings = [] - for arg_line in args_file.read().splitlines(): - for arg in self.convert_arg_line_to_args(arg_line): - arg_strings.append(arg) - arg_strings = self._read_args_from_files(arg_strings) - new_arg_strings.extend(arg_strings) - finally: - args_file.close() - except IOError: - err = _sys.exc_info()[1] - self.error(str(err)) - - # return the modified argument list - return new_arg_strings - - def convert_arg_line_to_args(self, arg_line): - return [arg_line] - - def _match_argument(self, action, arg_strings_pattern): - # match the pattern for this action to the arg strings - nargs_pattern = self._get_nargs_pattern(action) - match = _re.match(nargs_pattern, arg_strings_pattern) - - # raise an exception if we weren't able to find a match - if match is None: - nargs_errors = { - None: _('expected one argument'), - OPTIONAL: _('expected at most one argument'), - ONE_OR_MORE: _('expected at least one argument'), - } - default = ngettext('expected %s argument', - 'expected %s arguments', - action.nargs) % action.nargs - msg = nargs_errors.get(action.nargs, default) - raise ArgumentError(action, msg) - - # return the number of arguments matched - return len(match.group(1)) - - def _match_arguments_partial(self, actions, arg_strings_pattern): - # progressively shorten the actions list by slicing off the - # final actions until we find a match - result = [] - for i in range(len(actions), 0, -1): - actions_slice = actions[:i] - pattern = ''.join([self._get_nargs_pattern(action) - for action in actions_slice]) - match = _re.match(pattern, arg_strings_pattern) - if match is not None: - result.extend([len(string) for string in match.groups()]) - break - - # return the list of arg string counts - return result - - def _parse_optional(self, arg_string): - # if it's an empty string, it was meant to be a positional - if not arg_string: - return None - - # if it doesn't start with a prefix, it was meant to be positional - if not arg_string[0] in self.prefix_chars: - return None - - # if the option string is present in the parser, return the action - if arg_string in self._option_string_actions: - action = self._option_string_actions[arg_string] - return action, arg_string, None - - # if it's just a single character, it was meant to be positional - if len(arg_string) == 1: - return None - - # if the option string before the "=" is present, return the action - if '=' in arg_string: - option_string, explicit_arg = arg_string.split('=', 1) - if option_string in self._option_string_actions: - action = self._option_string_actions[option_string] - return action, option_string, explicit_arg - - # search through all possible prefixes of the option string - # and all actions in the parser for possible interpretations - option_tuples = self._get_option_tuples(arg_string) - - # if multiple actions match, the option string was ambiguous - if len(option_tuples) > 1: - options = ', '.join([option_string - for action, option_string, explicit_arg in option_tuples]) - args = {'option': arg_string, 'matches': options} - msg = _('ambiguous option: %(option)s could match %(matches)s') - self.error(msg % args) - - # if exactly one action matched, this segmentation is good, - # so return the parsed action - elif len(option_tuples) == 1: - option_tuple, = option_tuples - return option_tuple - - # if it was not found as an option, but it looks like a negative - # number, it was meant to be positional - # unless there are negative-number-like options - if self._negative_number_matcher.match(arg_string): - if not self._has_negative_number_optionals: - return None - - # if it contains a space, it was meant to be a positional - if ' ' in arg_string: - return None - - # it was meant to be an optional but there is no such option - # in this parser (though it might be a valid option in a subparser) - return None, arg_string, None - - def _get_option_tuples(self, option_string): - result = [] - - # option strings starting with two prefix characters are only - # split at the '=' - chars = self.prefix_chars - if option_string[0] in chars and option_string[1] in chars: - if '=' in option_string: - option_prefix, explicit_arg = option_string.split('=', 1) - else: - option_prefix = option_string - explicit_arg = None - for option_string in self._option_string_actions: - if option_string.startswith(option_prefix): - action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg - result.append(tup) - - # single character options can be concatenated with their arguments - # but multiple character options always have to have their argument - # separate - elif option_string[0] in chars and option_string[1] not in chars: - option_prefix = option_string - explicit_arg = None - short_option_prefix = option_string[:2] - short_explicit_arg = option_string[2:] - - for option_string in self._option_string_actions: - if option_string == short_option_prefix: - action = self._option_string_actions[option_string] - tup = action, option_string, short_explicit_arg - result.append(tup) - elif option_string.startswith(option_prefix): - action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg - result.append(tup) - - # shouldn't ever get here - else: - self.error(_('unexpected option string: %s') % option_string) - - # return the collected option tuples - return result - - def _get_nargs_pattern(self, action): - # in all examples below, we have to allow for '--' args - # which are represented as '-' in the pattern - nargs = action.nargs - - # the default (None) is assumed to be a single argument - if nargs is None: - nargs_pattern = '(-*A-*)' - - # allow zero or one arguments - elif nargs == OPTIONAL: - nargs_pattern = '(-*A?-*)' - - # allow zero or more arguments - elif nargs == ZERO_OR_MORE: - nargs_pattern = '(-*[A-]*)' - - # allow one or more arguments - elif nargs == ONE_OR_MORE: - nargs_pattern = '(-*A[A-]*)' - - # allow any number of options or arguments - elif nargs == REMAINDER: - nargs_pattern = '([-AO]*)' - - # allow one argument followed by any number of options or arguments - elif nargs == PARSER: - nargs_pattern = '(-*A[-AO]*)' - - # all others should be integers - else: - nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) - - # if this is an optional action, -- is not allowed - if action.option_strings: - nargs_pattern = nargs_pattern.replace('-*', '') - nargs_pattern = nargs_pattern.replace('-', '') - - # return the pattern - return nargs_pattern - - # ======================== - # Value conversion methods - # ======================== - def _get_values(self, action, arg_strings): - # for everything but PARSER, REMAINDER args, strip out first '--' - if action.nargs not in [PARSER, REMAINDER]: - try: - arg_strings.remove('--') - except ValueError: - pass - - # optional argument produces a default when not present - if not arg_strings and action.nargs == OPTIONAL: - if action.option_strings: - value = action.const - else: - value = action.default - if isinstance(value, str): - value = self._get_value(action, value) - self._check_value(action, value) - - # when nargs='*' on a positional, if there were no command-line - # args, use the default if it is anything other than None - elif (not arg_strings and action.nargs == ZERO_OR_MORE and - not action.option_strings): - if action.default is not None: - value = action.default - else: - value = arg_strings - self._check_value(action, value) - - # single argument or optional argument produces a single value - elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: - arg_string, = arg_strings - value = self._get_value(action, arg_string) - self._check_value(action, value) - - # REMAINDER arguments convert all values, checking none - elif action.nargs == REMAINDER: - value = [self._get_value(action, v) for v in arg_strings] - - # PARSER arguments convert all values, but check only the first - elif action.nargs == PARSER: - value = [self._get_value(action, v) for v in arg_strings] - self._check_value(action, value[0]) - - # all other types of nargs produce a list - else: - value = [self._get_value(action, v) for v in arg_strings] - for v in value: - self._check_value(action, v) - - # return the converted value - return value - - def _get_value(self, action, arg_string): - type_func = self._registry_get('type', action.type, action.type) - if not callable(type_func): - msg = _('%r is not callable') - raise ArgumentError(action, msg % type_func) - - # convert the value to the appropriate type - try: - result = type_func(arg_string) - - # ArgumentTypeErrors indicate errors - except ArgumentTypeError: - name = getattr(action.type, '__name__', repr(action.type)) - msg = str(_sys.exc_info()[1]) - raise ArgumentError(action, msg) - - # TypeErrors or ValueErrors also indicate errors - except (TypeError, ValueError): - name = getattr(action.type, '__name__', repr(action.type)) - args = {'type': name, 'value': arg_string} - msg = _('invalid %(type)s value: %(value)r') - raise ArgumentError(action, msg % args) - - # return the converted value - return result - - def _check_value(self, action, value): - # converted value must be one of the choices (if specified) - if action.choices is not None and value not in action.choices: - args = {'value': value, - 'choices': ', '.join(map(repr, action.choices))} - msg = _('invalid choice: %(value)r (choose from %(choices)s)') - raise ArgumentError(action, msg % args) - - # ======================= - # Help-formatting methods - # ======================= - def format_usage(self): - formatter = self._get_formatter() - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) - return formatter.format_help() - - def format_help(self): - formatter = self._get_formatter() - - # usage - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) - - # description - formatter.add_text(self.description) - - # positionals, optionals and user-defined groups - for action_group in self._action_groups: - formatter.start_section(action_group.title) - formatter.add_text(action_group.description) - formatter.add_arguments(action_group._group_actions) - formatter.end_section() - - # epilog - formatter.add_text(self.epilog) - - # determine help from format above - return formatter.format_help() - - def format_version(self): - import warnings - warnings.warn( - 'The format_version method is deprecated -- the "version" ' - 'argument to ArgumentParser is no longer supported.', - DeprecationWarning) - formatter = self._get_formatter() - formatter.add_text(self.version) - return formatter.format_help() - - def _get_formatter(self): - return self.formatter_class(prog=self.prog) - - # ===================== - # Help-printing methods - # ===================== - def print_usage(self, file=None): - if file is None: - file = _sys.stdout - self._print_message(self.format_usage(), file) - - def print_help(self, file=None): - if file is None: - file = _sys.stdout - self._print_message(self.format_help(), file) - - def print_version(self, file=None): - import warnings - warnings.warn( - 'The print_version method is deprecated -- the "version" ' - 'argument to ArgumentParser is no longer supported.', - DeprecationWarning) - self._print_message(self.format_version(), file) - - def _print_message(self, message, file=None): - if message: - if file is None: - file = _sys.stderr - file.write(message) - - # =============== - # Exiting methods - # =============== - def exit(self, status=0, message=None): - if message: - self._print_message(message, _sys.stderr) - _sys.exit(status) - - def error(self, message): - """error(message: string) - - Prints a usage message incorporating the message to stderr and - exits. - - If you override this in a subclass, it should not return -- it - should either exit or raise an exception. - """ - self.print_usage(_sys.stderr) - args = {'prog': self.prog, 'message': message} - self.exit(2, _('%(prog)s: error: %(message)s\n') % args) diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index 2d2ee904..1d09be50 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -7,7 +7,6 @@ import sys import sysconfig import time import unittest -from ..helpers import st_mtime_ns from ..xattr import get_all try: @@ -31,9 +30,6 @@ else: if sys.platform.startswith('netbsd'): st_mtime_ns_round = -4 # only >1 microsecond resolution here? -has_mtime_ns = sys.version >= '3.3' -utime_supports_fd = os.utime in getattr(os, 'supports_fd', {}) - class BaseTestCase(unittest.TestCase): """ @@ -80,14 +76,13 @@ class BaseTestCase(unittest.TestCase): d1[4] = None if not stat.S_ISCHR(d2[1]) and not stat.S_ISBLK(d2[1]): d2[4] = None - if not os.path.islink(path1) or utime_supports_fd: - # Older versions of llfuse do not support ns precision properly - if fuse and not have_fuse_mtime_ns: - d1.append(round(st_mtime_ns(s1), -4)) - d2.append(round(st_mtime_ns(s2), -4)) - else: - d1.append(round(st_mtime_ns(s1), st_mtime_ns_round)) - d2.append(round(st_mtime_ns(s2), st_mtime_ns_round)) + # Older versions of llfuse do not support ns precision properly + if fuse and not have_fuse_mtime_ns: + d1.append(round(s1.st_mtime_ns, -4)) + d2.append(round(s2.st_mtime_ns, -4)) + else: + d1.append(round(s1.st_mtime_ns, st_mtime_ns_round)) + d2.append(round(s2.st_mtime_ns, st_mtime_ns_round)) d1.append(get_all(path1, follow_symlinks=False)) d2.append(get_all(path2, follow_symlinks=False)) self.assert_equal(d1, d2) @@ -103,3 +98,50 @@ class BaseTestCase(unittest.TestCase): return time.sleep(.1) raise Exception('wait_for_mount(%s) timeout' % path) + + +class changedir: + def __init__(self, dir): + self.dir = dir + + def __enter__(self): + self.old = os.getcwd() + os.chdir(self.dir) + + def __exit__(self, *args, **kw): + os.chdir(self.old) + + +class environment_variable: + def __init__(self, **values): + self.values = values + self.old_values = {} + + def __enter__(self): + for k, v in self.values.items(): + self.old_values[k] = os.environ.get(k) + if v is None: + os.environ.pop(k, None) + else: + os.environ[k] = v + + def __exit__(self, *args, **kw): + for k, v in self.old_values.items(): + if v is None: + os.environ.pop(k, None) + else: + os.environ[k] = v + + +class FakeInputs: + """Simulate multiple user inputs, can be used as input() replacement""" + def __init__(self, inputs): + self.inputs = inputs + + def __call__(self, prompt=None): + if prompt is not None: + print(prompt, end='') + try: + return self.inputs.pop(0) + except IndexError: + raise EOFError from None diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 5c88f075..c190e054 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -1,7 +1,7 @@ from datetime import datetime, timezone +from unittest.mock import Mock import msgpack -from mock import Mock from ..archive import Archive, CacheChunkBuffer, RobustUnpacker from ..key import PlaintextKey diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 35563afd..01cc6699 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1,8 +1,9 @@ from binascii import hexlify -from configparser import RawConfigParser +from configparser import ConfigParser import errno import os from io import StringIO +import random import stat import subprocess import sys @@ -10,9 +11,9 @@ import shutil import tempfile import time import unittest +from unittest.mock import patch from hashlib import sha256 -from mock import patch import pytest from .. import xattr @@ -20,10 +21,10 @@ from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP from ..archiver import Archiver from ..cache import Cache from ..crypto import bytes_to_long, num_aes_blocks -from ..helpers import Manifest, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, st_atime_ns, st_mtime_ns +from ..helpers import Manifest, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR from ..remote import RemoteRepository, PathNotAllowed from ..repository import Repository -from . import BaseTestCase +from . import BaseTestCase, changedir, environment_variable try: import llfuse @@ -33,43 +34,8 @@ except ImportError: has_lchflags = hasattr(os, 'lchflags') -src_dir = os.path.join(os.getcwd(), os.path.dirname(__file__), '..') +src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -# Python <= 3.2 raises OSError instead of PermissionError (See #164) -try: - PermissionError = PermissionError -except NameError: - PermissionError = OSError - - -class changedir: - def __init__(self, dir): - self.dir = dir - - def __enter__(self): - self.old = os.getcwd() - os.chdir(self.dir) - - def __exit__(self, *args, **kw): - os.chdir(self.old) - - -class environment_variable: - def __init__(self, **values): - self.values = values - self.old_values = {} - - def __enter__(self): - for k, v in self.values.items(): - self.old_values[k] = os.environ.get(k) - os.environ[k] = v - - def __exit__(self, *args, **kw): - for k, v in self.old_values.items(): - if v is None: - del os.environ[k] - else: - os.environ[k] = v def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw): if fork: @@ -93,7 +59,8 @@ def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw): sys.stdout = sys.stderr = output = StringIO() if archiver is None: archiver = Archiver() - ret = archiver.run(list(args)) + args = archiver.parse_args(list(args)) + ret = archiver.run(args) return ret, output.getvalue() finally: sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr @@ -103,9 +70,7 @@ def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw): try: exec_cmd('help', exe='borg.exe', fork=True) BORG_EXES = ['python', 'binary', ] -except (IOError, OSError) as err: - if err.errno != errno.ENOENT: - raise +except FileNotFoundError: BORG_EXES = ['python', ] @@ -117,6 +82,7 @@ def cmd(request): exe = 'borg.exe' else: raise ValueError("param must be 'python' or 'binary'") + def exec_fn(*args, **kw): return exec_cmd(*args, exe=exe, fork=True, **kw) return exec_fn @@ -128,7 +94,7 @@ def test_return_codes(cmd, tmpdir): input = tmpdir.mkdir('input') output = tmpdir.mkdir('output') input.join('test_file').write('content') - rc, out = cmd('init', '%s' % str(repo)) + rc, out = cmd('init', '--encryption=none', '%s' % str(repo)) assert rc == EXIT_SUCCESS rc, out = cmd('create', '%s::archive' % repo, str(input)) assert rc == EXIT_SUCCESS @@ -141,13 +107,95 @@ def test_return_codes(cmd, tmpdir): assert rc == EXIT_ERROR # duplicate archive name +""" +test_disk_full is very slow and not recommended to be included in daily testing. +for this test, an empty, writable 16MB filesystem mounted on DF_MOUNT is required. +for speed and other reasons, it is recommended that the underlying block device is +in RAM, not a magnetic or flash disk. + +assuming /tmp is a tmpfs (in memory filesystem), one can use this: +dd if=/dev/zero of=/tmp/borg-disk bs=16M count=1 +mkfs.ext4 /tmp/borg-disk +mkdir /tmp/borg-mount +sudo mount /tmp/borg-disk /tmp/borg-mount + +if the directory does not exist, the test will be skipped. +""" +DF_MOUNT = '/tmp/borg-mount' + + +@pytest.mark.skipif(not os.path.exists(DF_MOUNT), reason="needs a 16MB fs mounted on %s" % DF_MOUNT) +def test_disk_full(cmd): + def make_files(dir, count, size, rnd=True): + shutil.rmtree(dir, ignore_errors=True) + os.mkdir(dir) + if rnd: + count = random.randint(1, count) + if size > 1: + size = random.randint(1, size) + for i in range(count): + fn = os.path.join(dir, "file%03d" % i) + with open(fn, 'wb') as f: + data = os.urandom(size) + f.write(data) + + with environment_variable(BORG_CHECK_I_KNOW_WHAT_I_AM_DOING='YES'): + mount = DF_MOUNT + assert os.path.exists(mount) + repo = os.path.join(mount, 'repo') + input = os.path.join(mount, 'input') + reserve = os.path.join(mount, 'reserve') + for j in range(100): + shutil.rmtree(repo, ignore_errors=True) + shutil.rmtree(input, ignore_errors=True) + # keep some space and some inodes in reserve that we can free up later: + make_files(reserve, 80, 100000, rnd=False) + rc, out = cmd('init', repo) + if rc != EXIT_SUCCESS: + print('init', rc, out) + assert rc == EXIT_SUCCESS + try: + success, i = True, 0 + while success: + i += 1 + try: + make_files(input, 20, 200000) + except OSError as err: + if err.errno == errno.ENOSPC: + # already out of space + break + raise + try: + rc, out = cmd('create', '%s::test%03d' % (repo, i), input) + success = rc == EXIT_SUCCESS + if not success: + print('create', rc, out) + finally: + # make sure repo is not locked + shutil.rmtree(os.path.join(repo, 'lock.exclusive'), ignore_errors=True) + os.remove(os.path.join(repo, 'lock.roster')) + finally: + # now some error happened, likely we are out of disk space. + # free some space so we can expect borg to be able to work normally: + shutil.rmtree(reserve, ignore_errors=True) + rc, out = cmd('list', repo) + if rc != EXIT_SUCCESS: + print('list', rc, out) + rc, out = cmd('check', '--repair', repo) + if rc != EXIT_SUCCESS: + print('check', rc, out) + assert rc == EXIT_SUCCESS + + class ArchiverTestCaseBase(BaseTestCase): EXE = None # python source based FORK_DEFAULT = False prefix = '' def setUp(self): - os.environ['BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1' + os.environ['BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'] = 'YES' + os.environ['BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'] = 'YES' + os.environ['BORG_PASSPHRASE'] = 'waytooeasyonlyfortests' self.archiver = not self.FORK_DEFAULT and Archiver() or None self.tmpdir = tempfile.mkdtemp() self.repository_path = os.path.join(self.tmpdir, 'repository') @@ -253,7 +301,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', '--stats', self.repository_location + '::test.2', 'input') with changedir('output'): self.cmd('extract', self.repository_location + '::test') - self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) + list_output = self.cmd('list', '--short', self.repository_location) + self.assert_in('test', list_output) + self.assert_in('test.2', list_output) expected = set([ 'input', 'input/bdev', @@ -273,15 +323,17 @@ class ArchiverTestCase(ArchiverTestCaseBase): expected.remove('input/cdev') if has_lchflags: # remove the file we did not backup, so input and output become equal - expected.remove('input/flagfile') # this file is UF_NODUMP + expected.remove('input/flagfile') # this file is UF_NODUMP os.remove(os.path.join('input', 'flagfile')) - self.assert_equal(set(self.cmd('list', '--short', self.repository_location + '::test').splitlines()), expected) + list_output = self.cmd('list', '--short', self.repository_location + '::test') + for name in expected: + self.assert_in(name, list_output) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') item_count = 3 if has_lchflags else 4 # one file is UF_NODUMP self.assert_in('Number of files: %d' % item_count, info_output) shutil.rmtree(self.cache_path) - with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'): + with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='yes'): info_output2 = self.cmd('info', self.repository_location + '::test') def filter(output): @@ -299,7 +351,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(filter(info_output), filter(info_output2)) def test_atime(self): - have_root = self.create_test_files() + self.create_test_files() atime, mtime = 123456780, 234567890 os.utime('input/file1', (atime, mtime)) self.cmd('init', self.repository_location) @@ -308,18 +360,18 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('extract', self.repository_location + '::test') sti = os.stat('input/file1') sto = os.stat('output/input/file1') - assert st_mtime_ns(sti) == st_mtime_ns(sto) == mtime * 1e9 + assert sti.st_mtime_ns == sto.st_mtime_ns == mtime * 1e9 if hasattr(os, 'O_NOATIME'): - assert st_atime_ns(sti) == st_atime_ns(sto) == atime * 1e9 + assert sti.st_atime_ns == sto.st_atime_ns == atime * 1e9 else: # it touched the input file's atime while backing it up - assert st_atime_ns(sto) == atime * 1e9 + assert sto.st_atime_ns == atime * 1e9 def _extract_repository_id(self, path): return Repository(self.repository_path).id def _set_repository_id(self, path, id): - config = RawConfigParser() + config = ConfigParser(interpolation=None) config.read(os.path.join(path, 'config')) config.set('repository', 'id', hexlify(id).decode('ascii')) with open(os.path.join(path, 'config'), 'w') as fd: @@ -365,7 +417,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): filenames = ['normal', 'with some blanks', '(with_parens)', ] for filename in filenames: filename = os.path.join(self.input_path, filename) - with open(filename, 'wb') as fd: + with open(filename, 'wb'): pass self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') @@ -377,7 +429,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_repository_swap_detection(self): self.create_test_files() os.environ['BORG_PASSPHRASE'] = 'passphrase' - self.cmd('init', '--encryption=passphrase', self.repository_location) + self.cmd('init', '--encryption=repokey', self.repository_location) repository_id = self._extract_repository_id(self.repository_path) self.cmd('create', self.repository_location + '::test', 'input') shutil.rmtree(self.repository_path) @@ -393,7 +445,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.create_test_files() self.cmd('init', '--encryption=none', self.repository_location + '_unencrypted') os.environ['BORG_PASSPHRASE'] = 'passphrase' - self.cmd('init', '--encryption=passphrase', self.repository_location + '_encrypted') + self.cmd('init', '--encryption=repokey', self.repository_location + '_encrypted') self.cmd('create', self.repository_location + '_encrypted::test', 'input') shutil.rmtree(self.repository_path + '_encrypted') os.rename(self.repository_path + '_unencrypted', self.repository_path + '_encrypted') @@ -433,6 +485,112 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3']) + def test_extract_include_exclude_regex(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('file3', size=1024 * 80) + self.create_regular_file('file4', size=1024 * 80) + self.create_regular_file('file333', size=1024 * 80) + + # Create with regular expression exclusion for file4 + self.cmd('create', '--exclude=re:input/file4$', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file2', 'file3', 'file333']) + shutil.rmtree('output/input') + + # Extract with regular expression exclusion + with changedir('output'): + self.cmd('extract', '--exclude=re:file3+', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file2']) + shutil.rmtree('output/input') + + # Combine --exclude with fnmatch and regular expression + with changedir('output'): + self.cmd('extract', '--exclude=input/file2', '--exclude=re:file[01]', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file3', 'file333']) + shutil.rmtree('output/input') + + # Combine --exclude-from and regular expression exclusion + with changedir('output'): + self.cmd('extract', '--exclude-from=' + self.exclude_file_path, '--exclude=re:file1', + '--exclude=re:file(\\d)\\1\\1$', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file3']) + + def test_extract_include_exclude_regex_from_file(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('file3', size=1024 * 80) + self.create_regular_file('file4', size=1024 * 80) + self.create_regular_file('file333', size=1024 * 80) + self.create_regular_file('aa:something', size=1024 * 80) + + # Create while excluding using mixed pattern styles + with open(self.exclude_file_path, 'wb') as fd: + fd.write(b're:input/file4$\n') + fd.write(b'fm:*aa:*thing\n') + + self.cmd('create', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file2', 'file3', 'file333']) + shutil.rmtree('output/input') + + # Exclude using regular expression + with open(self.exclude_file_path, 'wb') as fd: + fd.write(b're:file3+\n') + + with changedir('output'): + self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file2']) + shutil.rmtree('output/input') + + # Mixed exclude pattern styles + with open(self.exclude_file_path, 'wb') as fd: + fd.write(b're:file(\\d)\\1\\1$\n') + fd.write(b'fm:nothingwillmatchthis\n') + fd.write(b'*/file1\n') + fd.write(b're:file2$\n') + + with changedir('output'): + self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file3']) + + def test_extract_with_pattern(self): + self.cmd("init", self.repository_location) + self.create_regular_file("file1", size=1024 * 80) + self.create_regular_file("file2", size=1024 * 80) + self.create_regular_file("file3", size=1024 * 80) + self.create_regular_file("file4", size=1024 * 80) + self.create_regular_file("file333", size=1024 * 80) + + self.cmd("create", self.repository_location + "::test", "input") + + # Extract everything with regular expression + with changedir("output"): + self.cmd("extract", self.repository_location + "::test", "re:.*") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file3", "file333", "file4"]) + shutil.rmtree("output/input") + + # Extract with pattern while also excluding files + with changedir("output"): + self.cmd("extract", "--exclude=re:file[34]$", self.repository_location + "::test", r"re:file\d$") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2"]) + shutil.rmtree("output/input") + + # Combine --exclude with pattern for extraction + with changedir("output"): + self.cmd("extract", "--exclude=input/file1", self.repository_location + "::test", "re:file[12]$") + self.assert_equal(sorted(os.listdir("output/input")), ["file2"]) + shutil.rmtree("output/input") + + # Multiple pattern + with changedir("output"): + self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2") + self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"]) + def test_exclude_caches(self): self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) @@ -444,6 +602,41 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1']) self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG']) + def test_exclude_tagged(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('tagged1/.NOBACKUP') + self.create_regular_file('tagged2/00-NOBACKUP') + self.create_regular_file('tagged3/.NOBACKUP/file2') + self.cmd('create', '--exclude-if-present', '.NOBACKUP', '--exclude-if-present', '00-NOBACKUP', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged3']) + + def test_exclude_keep_tagged(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file0', size=1024) + self.create_regular_file('tagged1/.NOBACKUP1') + self.create_regular_file('tagged1/file1', size=1024) + self.create_regular_file('tagged2/.NOBACKUP2') + self.create_regular_file('tagged2/file2', size=1024) + self.create_regular_file('tagged3/CACHEDIR.TAG', contents=b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') + self.create_regular_file('tagged3/file3', size=1024) + self.create_regular_file('taggedall/.NOBACKUP1') + self.create_regular_file('taggedall/.NOBACKUP2') + self.create_regular_file('taggedall/CACHEDIR.TAG', contents=b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') + self.create_regular_file('taggedall/file4', size=1024) + self.cmd('create', '--exclude-if-present', '.NOBACKUP1', '--exclude-if-present', '.NOBACKUP2', + '--exclude-caches', '--keep-tag-files', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file0', 'tagged1', 'tagged2', 'tagged3', 'taggedall']) + self.assert_equal(os.listdir('output/input/tagged1'), ['.NOBACKUP1']) + self.assert_equal(os.listdir('output/input/tagged2'), ['.NOBACKUP2']) + self.assert_equal(os.listdir('output/input/tagged3'), ['CACHEDIR.TAG']) + self.assert_equal(sorted(os.listdir('output/input/taggedall')), + ['.NOBACKUP1', '.NOBACKUP2', 'CACHEDIR.TAG', ]) + def test_path_normalization(self): self.cmd('init', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80) @@ -579,14 +772,65 @@ class ArchiverTestCase(ArchiverTestCaseBase): manifest, key = Manifest.load(repository) self.assert_equal(len(manifest.archives), 0) - def test_cmdline_compatibility(self): + def test_progress(self): self.create_regular_file('file1', size=1024 * 80) self.cmd('init', self.repository_location) - self.cmd('create', self.repository_location + '::test', 'input') - output = self.cmd('verify', '-v', self.repository_location + '::test') - self.assert_in('"borg verify" has been deprecated', output) - output = self.cmd('prune', self.repository_location, '--hourly=1') - self.assert_in('"--hourly" has been deprecated. Use "--keep-hourly" instead', output) + # progress forced on + output = self.cmd('create', '--progress', self.repository_location + '::test4', 'input') + self.assert_in("\r", output) + # progress forced off + output = self.cmd('create', self.repository_location + '::test5', 'input') + self.assert_not_in("\r", output) + + def test_file_status(self): + """test that various file status show expected results + + clearly incomplete: only tests for the weird "unchanged" status for now""" + now = time.time() + self.create_regular_file('file1', size=1024 * 80) + os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago + self.create_regular_file('file2', size=1024 * 80) + self.cmd('init', self.repository_location) + output = self.cmd('create', '-v', '--list', self.repository_location + '::test', 'input') + self.assert_in("A input/file1", output) + self.assert_in("A input/file2", output) + # should find first file as unmodified + output = self.cmd('create', '-v', '--list', self.repository_location + '::test1', 'input') + self.assert_in("U input/file1", output) + # this is expected, although surprising, for why, see: + # https://borgbackup.readthedocs.org/en/latest/faq.html#i-am-seeing-a-added-status-for-a-unchanged-file + self.assert_in("A input/file2", output) + + def test_create_topical(self): + now = time.time() + self.create_regular_file('file1', size=1024 * 80) + os.utime('input/file1', (now-5, now-5)) + self.create_regular_file('file2', size=1024 * 80) + self.cmd('init', self.repository_location) + # no listing by default + output = self.cmd('create', self.repository_location + '::test', 'input') + self.assert_not_in('file1', output) + # shouldn't be listed even if unchanged + output = self.cmd('create', self.repository_location + '::test0', 'input') + self.assert_not_in('file1', output) + # should list the file as unchanged + output = self.cmd('create', '-v', '--list', '--filter=U', self.repository_location + '::test1', 'input') + self.assert_in('file1', output) + # should *not* list the file as changed + output = self.cmd('create', '-v', '--filter=AM', self.repository_location + '::test2', 'input') + self.assert_not_in('file1', output) + # change the file + self.create_regular_file('file1', size=1024 * 100) + # should list the file as changed + output = self.cmd('create', '-v', '--list', '--filter=AM', self.repository_location + '::test3', 'input') + self.assert_in('file1', output) + + # def test_cmdline_compatibility(self): + # self.create_regular_file('file1', size=1024 * 80) + # self.cmd('init', self.repository_location) + # self.cmd('create', self.repository_location + '::test', 'input') + # output = self.cmd('foo', self.repository_location, '--old') + # self.assert_in('"--old" has been deprecated. Use "--new" instead', output) def test_prune_repository(self): self.cmd('init', self.repository_location) @@ -603,6 +847,21 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_not_in('test1', output) self.assert_in('test2', output) + def test_prune_repository_save_space(self): + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test1', src_dir) + self.cmd('create', self.repository_location + '::test2', src_dir) + output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2') + self.assert_in('Keeping archive: test2', output) + self.assert_in('Would prune: test1', output) + output = self.cmd('list', self.repository_location) + self.assert_in('test1', output) + self.assert_in('test2', output) + self.cmd('prune', '--save-space', self.repository_location, '--keep-daily=2') + output = self.cmd('list', self.repository_location) + self.assert_not_in('test1', output) + self.assert_in('test2', output) + def test_prune_repository_prefix(self): self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir) @@ -624,6 +883,20 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('bar-2015-08-12-10:00', output) self.assert_in('bar-2015-08-12-20:00', output) + def test_list_prefix(self): + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test-1', src_dir) + self.cmd('create', self.repository_location + '::something-else-than-test-1', src_dir) + self.cmd('create', self.repository_location + '::test-2', src_dir) + output = self.cmd('list', '--prefix=test-', self.repository_location) + self.assert_in('test-1', output) + self.assert_in('test-2', output) + self.assert_not_in('something-else', output) + + def test_break_lock(self): + self.cmd('init', self.repository_location) + self.cmd('break-lock', self.repository_location) + def test_usage(self): if self.FORK_DEFAULT: self.cmd(exit_code=0) @@ -714,7 +987,36 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.verify_aes_counter_uniqueness('keyfile') def test_aes_counter_uniqueness_passphrase(self): - self.verify_aes_counter_uniqueness('passphrase') + self.verify_aes_counter_uniqueness('repokey') + + def test_debug_dump_archive_items(self): + self.create_test_files() + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + with changedir('output'): + output = self.cmd('debug-dump-archive-items', self.repository_location + '::test') + output_dir = sorted(os.listdir('output')) + assert len(output_dir) > 0 and output_dir[0].startswith('000000_') + assert 'Done.' in output + + def test_debug_put_get_delete_obj(self): + self.cmd('init', self.repository_location) + data = b'some data' + hexkey = sha256(data).hexdigest() + self.create_regular_file('file', contents=data) + output = self.cmd('debug-put-obj', self.repository_location, 'input/file') + assert hexkey in output + output = self.cmd('debug-get-obj', self.repository_location, hexkey, 'output/file') + assert hexkey in output + with open('output/file', 'rb') as f: + data_read = f.read() + assert data == data_read + output = self.cmd('debug-delete-obj', self.repository_location, hexkey) + assert "deleted" in output + output = self.cmd('debug-delete-obj', self.repository_location, hexkey) + assert "not found" in output + output = self.cmd('debug-delete-obj', self.repository_location, 'invalid') + assert "is invalid" in output @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available') @@ -739,15 +1041,17 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): return archive, repository def test_check_usage(self): - output = self.cmd('check', self.repository_location, exit_code=0) + output = self.cmd('check', '-v', self.repository_location, exit_code=0) self.assert_in('Starting repository check', output) self.assert_in('Starting archive consistency check', output) - output = self.cmd('check', '--repository-only', self.repository_location, exit_code=0) + output = self.cmd('check', '-v', '--repository-only', self.repository_location, exit_code=0) self.assert_in('Starting repository check', output) self.assert_not_in('Starting archive consistency check', output) - output = self.cmd('check', '--archives-only', self.repository_location, exit_code=0) + output = self.cmd('check', '-v', '--archives-only', self.repository_location, exit_code=0) self.assert_not_in('Starting repository check', output) self.assert_in('Starting archive consistency check', output) + output = self.cmd('check', '-v', '--archives-only', '--prefix=archive2', self.repository_location, exit_code=0) + self.assert_not_in('archive1', output) def test_missing_file_chunk(self): archive, repository = self.open_archive('archive1') @@ -781,7 +1085,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): repository.delete(Manifest.MANIFEST_ID) repository.commit() self.cmd('check', self.repository_location, exit_code=1) - output = self.cmd('check', '--repair', self.repository_location, exit_code=0) + output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0) self.assert_in('archive1', output) self.assert_in('archive2', output) self.cmd('check', self.repository_location, exit_code=0) @@ -825,3 +1129,28 @@ if 0: @unittest.skip('deadlock issues') def test_fuse_mount_archive(self): pass + + @unittest.skip('only works locally') + def test_debug_put_get_delete_obj(self): + pass + + +def test_get_args(): + archiver = Archiver() + # everything normal: + # first param is argv as produced by ssh forced command, + # second param is like from SSH_ORIGINAL_COMMAND env variable + args = archiver.get_args(['borg', 'serve', '--restrict-to-path=/p1', '--restrict-to-path=/p2', ], + 'borg serve --info --umask=0027') + assert args.func == archiver.do_serve + assert args.restrict_to_paths == ['/p1', '/p2'] + assert args.umask == 0o027 + assert args.log_level == 'info' + # trying to cheat - break out of path restriction + args = archiver.get_args(['borg', 'serve', '--restrict-to-path=/p1', '--restrict-to-path=/p2', ], + 'borg serve --restrict-to-path=/') + assert args.restrict_to_paths == ['/p1', '/p2'] + # trying to cheat - try to execute different subcommand + args = archiver.get_args(['borg', 'serve', '--restrict-to-path=/p1', '--restrict-to-path=/p2', ], + 'borg init /') + assert args.func == archiver.do_serve diff --git a/borg/testsuite/benchmark.py b/borg/testsuite/benchmark.py index 88f86b53..1e620c62 100644 --- a/borg/testsuite/benchmark.py +++ b/borg/testsuite/benchmark.py @@ -16,15 +16,16 @@ from .archiver import changedir, cmd @pytest.yield_fixture def repo_url(request, tmpdir): os.environ['BORG_PASSPHRASE'] = '123456' - os.environ['BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1' - os.environ['BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK'] = '1' + os.environ['BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'] = 'YES' + os.environ['BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'] = 'YES' + os.environ['BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK'] = 'yes' os.environ['BORG_KEYS_DIR'] = str(tmpdir.join('keys')) os.environ['BORG_CACHE_DIR'] = str(tmpdir.join('cache')) yield str(tmpdir.join('repository')) tmpdir.remove(rec=1) -@pytest.fixture(params=["none", "passphrase"]) +@pytest.fixture(params=["none", "repokey"]) def repo(request, cmd, repo_url): cmd('init', '--encryption', request.param, repo_url) return repo_url @@ -37,15 +38,14 @@ def testdata(request, tmpdir_factory): data_type = request.param if data_type == 'zeros': # do not use a binary zero (\0) to avoid sparse detection - data = lambda: b'0' * size + def data(size): + return b'0' * size if data_type == 'random': - rnd = open('/dev/urandom', 'rb') - data = lambda: rnd.read(size) + def data(size): + return os.urandom(size) for i in range(count): with open(str(p.join(str(i))), "wb") as f: - f.write(data()) - if data_type == 'random': - rnd.close() + f.write(data(size)) yield str(p) p.remove(rec=1) @@ -98,4 +98,3 @@ def test_check(benchmark, cmd, archive): def test_help(benchmark, cmd): result, out = benchmark(cmd, 'help') assert result == 0 - diff --git a/borg/testsuite/compress.py b/borg/testsuite/compress.py index ce46c9d3..1a435358 100644 --- a/borg/testsuite/compress.py +++ b/borg/testsuite/compress.py @@ -98,5 +98,3 @@ def test_compressor(): for params in params_list: c = Compressor(**params) assert data == c.decompress(c.compress(data)) - - diff --git a/borg/testsuite/crypto.py b/borg/testsuite/crypto.py index e438eb85..2d74493d 100644 --- a/borg/testsuite/crypto.py +++ b/borg/testsuite/crypto.py @@ -1,6 +1,6 @@ from binascii import hexlify -from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes +from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes from . import BaseTestCase @@ -13,21 +13,6 @@ class CryptoTestCase(BaseTestCase): self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1) self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1') - def test_pbkdf2_sha256(self): - self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)), - b'120fb6cffcf8b32c43e7225256c4f837a86548c92ccc35480805987cb70be17b') - self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 2, 32)), - b'ae4d0c95af6b46d32d0adff928f06dd02a303f8ef3c251dfd6e2d85a95474c43') - self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 4096, 32)), - b'c5e478d59288c841aa530db6845c4c8d962893a001ce4e11a4963873aa98134a') - - def test_get_random_bytes(self): - bytes = get_random_bytes(10) - bytes2 = get_random_bytes(10) - self.assert_equal(len(bytes), 10) - self.assert_equal(len(bytes2), 10) - self.assert_not_equal(bytes, bytes2) - def test_aes(self): key = b'X' * 32 data = b'foo' * 10 diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index bbefeb05..0421ed8c 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -51,11 +51,11 @@ class HashIndexTestCase(BaseTestCase): def test_nsindex(self): self._generic_test(NSIndex, lambda x: (x, x), - '861d6d60069ea45e39d36bed2bdc1d0c07981e0641955f897ac6848be429abac') + '80fba5b40f8cf12f1486f1ba33c9d852fb2b41a5b5961d3b9d1228cf2aa9c4c9') def test_chunkindex(self): self._generic_test(ChunkIndex, lambda x: (x, x, x), - '69464bd0ebbc5866b9f95d838bc48617d21bfe3dcf294682a5c21a2ef6b9dc0b') + '1d71865e72e3c3af18d3c7216b6fa7b014695eaa3ed7f14cf9cd02fba75d1c95') def test_resize(self): n = 2000 # Must be >= MIN_BUCKETS diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index a27b2271..a1b5440a 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -7,11 +7,15 @@ import os import pytest import sys import msgpack +import msgpack.fallback -from ..helpers import adjust_patterns, exclude_path, Location, format_file_size, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \ - prune_within, prune_split, get_cache_dir, Statistics, \ - StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams -from . import BaseTestCase +from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, \ + prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \ + yes, TRUISH, FALSISH, DEFAULTISH, \ + StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ + ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ + PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern +from . import BaseTestCase, environment_variable, FakeInputs class BigIntTestCase(BaseTestCase): @@ -158,113 +162,302 @@ class FormatTimedeltaTestCase(BaseTestCase): ) -class PatternTestCase(BaseTestCase): +def check_patterns(files, pattern, expected): + """Utility for testing patterns. + """ + assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths" + matched = [f for f in files if pattern.match(f)] + + assert matched == (files if expected is None else expected) + + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/", None), + ("/./", None), + ("", []), + ("/home/u", []), + ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc", ["/etc/server/config", "/etc/server/hosts"]), + ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv", ["/srv/messages", "/srv/dmesg"]), + ]) +def test_patterns_prefix(pattern, expected): files = [ - '/etc/passwd', '/etc/hosts', '/home', - '/home/user/.profile', '/home/user/.bashrc', - '/home/user2/.profile', '/home/user2/public_html/index.html', - '/var/log/messages', '/var/log/dmesg', + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", ] - def evaluate(self, paths, excludes): - patterns = adjust_patterns(paths, [ExcludePattern(p) for p in excludes]) - return [path for path in self.files if not exclude_path(path, patterns)] - - def test(self): - self.assert_equal(self.evaluate(['/'], []), self.files) - self.assert_equal(self.evaluate([], []), self.files) - self.assert_equal(self.evaluate(['/'], ['/h']), self.files) - self.assert_equal(self.evaluate(['/'], ['/home']), - ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) - self.assert_equal(self.evaluate(['/'], ['/home/']), - ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg']) - self.assert_equal(self.evaluate(['/home/u'], []), []) - self.assert_equal(self.evaluate(['/', '/home', '/etc/hosts'], ['/']), []) - self.assert_equal(self.evaluate(['/home/'], ['/home/user2']), - ['/home', '/home/user/.profile', '/home/user/.bashrc']) - self.assert_equal(self.evaluate(['/'], ['*.profile', '/var/log']), - ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html']) - self.assert_equal(self.evaluate(['/'], ['/home/*/public_html', '*.profile', '*/log/*']), - ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc']) - self.assert_equal(self.evaluate(['/etc/', '/var'], ['dmesg']), - ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) + check_patterns(files, PathPrefixPattern(pattern), expected) -@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test') -class PatternNonAsciiTestCase(BaseTestCase): - def testComposedUnicode(self): - pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' - i = IncludePattern(pattern) - e = ExcludePattern(pattern) +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", []), + ("foo", []), + ("relative", ["relative/path1", "relative/two"]), + ("more", ["more/relative"]), + ]) +def test_patterns_prefix_relative(pattern, expected): + files = ["relative/path1", "relative/two", "more/relative"] - assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testDecomposedUnicode(self): - pattern = 'ba\N{COMBINING ACUTE ACCENT}' - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - - assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testInvalidUnicode(self): - pattern = str(b'ba\x80', 'latin1') - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - - assert not i.match("ba/foo") - assert i.match(str(b"ba\x80/foo", 'latin1')) - assert not e.match("ba/foo") - assert e.match(str(b"ba\x80/foo", 'latin1')) + check_patterns(files, PathPrefixPattern(pattern), expected) -@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') -class OSXPatternNormalizationTestCase(BaseTestCase): - def testComposedUnicode(self): - pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' - i = IncludePattern(pattern) - e = ExcludePattern(pattern) +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("/*", None), + ("/./*", None), + ("*", None), + ("*/*", None), + ("*///*", None), + ("/home/u", []), + ("/home/*", + ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]), + ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), + ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv*", ["/srv/messages", "/srv/dmesg"]), + ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ]) +def test_patterns_fnmatch(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] - assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + check_patterns(files, FnmatchPattern(pattern), expected) - def testDecomposedUnicode(self): - pattern = 'ba\N{COMBINING ACUTE ACCENT}' - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("*", None), + ("**/*", None), + ("/**/*", None), + ("/./*", None), + ("*/*", None), + ("*///*", None), + ("/home/u", []), + ("/home/*", + ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html", + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]), + ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]), + ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]), + ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]), + ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]), + ("*/.pr????e", []), + ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]), + ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]), + ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]), + ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]), + ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]), + ("/srv/*", ["/srv/messages", "/srv/dmesg"]), + ("/srv2/**", ["/srv2", "/srv2/blafasel"]), + ("/srv2/**/", ["/srv2/blafasel"]), + ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]), + ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]), + ]) +def test_patterns_shell(pattern, expected): + files = [ + "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc", + "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg", + "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] - def testInvalidUnicode(self): - pattern = str(b'ba\x80', 'latin1') - i = IncludePattern(pattern) - e = ExcludePattern(pattern) + check_patterns(files, ShellPattern(pattern), expected) - assert not i.match("ba/foo") - assert i.match(str(b"ba\x80/foo", 'latin1')) - assert not e.match("ba/foo") - assert e.match(str(b"ba\x80/foo", 'latin1')) + +@pytest.mark.parametrize("pattern, expected", [ + # "None" means all files, i.e. all match the given pattern + ("", None), + (".*", None), + ("^/", None), + ("^abc$", []), + ("^[^/]", []), + ("^(?!/srv|/foo|/opt)", + ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", + "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]), + ]) +def test_patterns_regex(pattern, expected): + files = [ + '/srv/data', '/foo/bar', '/home', + '/home/user/.profile', '/home/user/.bashrc', + '/home/user2/.profile', '/home/user2/public_html/index.html', + '/opt/log/messages.txt', '/opt/log/dmesg.txt', + "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", + ] + + obj = RegexPattern(pattern) + assert str(obj) == pattern + assert obj.pattern == pattern + + check_patterns(files, obj, expected) + + +def test_regex_pattern(): + # The forward slash must match the platform-specific path separator + assert RegexPattern("^/$").match("/") + assert RegexPattern("^/$").match(os.path.sep) + assert not RegexPattern(r"^\\$").match("/") + + +def use_normalized_unicode(): + return sys.platform in ("darwin",) + + +def _make_test_patterns(pattern): + return [PathPrefixPattern(pattern), + FnmatchPattern(pattern), + RegexPattern("^{}/foo$".format(pattern)), + ShellPattern(pattern), + ] + + +@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}")) +def test_composed_unicode_pattern(pattern): + assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode() + + +@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}")) +def test_decomposed_unicode_pattern(pattern): + assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode() + assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + +@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1"))) +def test_invalid_unicode_pattern(pattern): + assert not pattern.match("ba/foo") + assert pattern.match(str(b"ba\x80/foo", "latin1")) + + +@pytest.mark.parametrize("lines, expected", [ + # "None" means all files, i.e. none excluded + ([], None), + (["# Comment only"], None), + (["*"], []), + (["# Comment", + "*/something00.txt", + " *whitespace* ", + # Whitespace before comment + " #/ws*", + # Empty line + "", + "# EOF"], + ["/more/data", "/home", " #/wsfoobar"]), + (["re:.*"], []), + (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]), + ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]), + (["", "", "", + "# This is a test with mixed pattern styles", + # Case-insensitive pattern + "re:(?i)BAR|ME$", + "", + "*whitespace*", + "fm:*/something00*"], + ["/more/data"]), + ([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]), + ([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]), + (["pp:./"], None), + (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]), + (["pp:aaabbb"], None), + (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), + ]) +def test_patterns_from_file(tmpdir, lines, expected): + files = [ + '/data/something00.txt', '/more/data', '/home', + ' #/wsfoobar', + '\tstart/whitespace', + '/whitespace/end\t', + ] + + def evaluate(filename): + matcher = PatternMatcher(fallback=True) + matcher.add(load_excludes(open(filename, "rt")), False) + return [path for path in files if matcher.match(path)] + + exclfile = tmpdir.join("exclude.txt") + + with exclfile.open("wt") as fh: + fh.write("\n".join(lines)) + + assert evaluate(str(exclfile)) == (files if expected is None else expected) + + +@pytest.mark.parametrize("pattern, cls", [ + ("", FnmatchPattern), + + # Default style + ("*", FnmatchPattern), + ("/data/*", FnmatchPattern), + + # fnmatch style + ("fm:", FnmatchPattern), + ("fm:*", FnmatchPattern), + ("fm:/data/*", FnmatchPattern), + ("fm:fm:/data/*", FnmatchPattern), + + # Regular expression + ("re:", RegexPattern), + ("re:.*", RegexPattern), + ("re:^/something/", RegexPattern), + ("re:re:^/something/", RegexPattern), + + # Path prefix + ("pp:", PathPrefixPattern), + ("pp:/", PathPrefixPattern), + ("pp:/data/", PathPrefixPattern), + ("pp:pp:/data/", PathPrefixPattern), + + # Shell-pattern style + ("sh:", ShellPattern), + ("sh:*", ShellPattern), + ("sh:/data/*", ShellPattern), + ("sh:sh:/data/*", ShellPattern), + ]) +def test_parse_pattern(pattern, cls): + assert isinstance(parse_pattern(pattern), cls) + + +@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"]) +def test_parse_pattern_error(pattern): + with pytest.raises(ValueError): + parse_pattern(pattern) + + +def test_pattern_matcher(): + pm = PatternMatcher() + + assert pm.fallback is None + + for i in ["", "foo", "bar"]: + assert pm.match(i) is None + + pm.add([RegexPattern("^a")], "A") + pm.add([RegexPattern("^b"), RegexPattern("^z")], "B") + pm.add([RegexPattern("^$")], "Empty") + pm.fallback = "FileNotFound" + + assert pm.match("") == "Empty" + assert pm.match("aaa") == "A" + assert pm.match("bbb") == "B" + assert pm.match("ccc") == "FileNotFound" + assert pm.match("xyz") == "FileNotFound" + assert pm.match("z") == "B" + + assert PatternMatcher(fallback="hey!").fallback == "hey!" def test_compression_specs(): with pytest.raises(ValueError): CompressionSpec('') - assert CompressionSpec('0') == dict(name='zlib', level=0) - assert CompressionSpec('1') == dict(name='zlib', level=1) - assert CompressionSpec('9') == dict(name='zlib', level=9) - with pytest.raises(ValueError): - CompressionSpec('10') assert CompressionSpec('none') == dict(name='none') assert CompressionSpec('lz4') == dict(name='lz4') assert CompressionSpec('zlib') == dict(name='zlib', level=6) @@ -386,7 +579,7 @@ class TestParseTimestamp(BaseTestCase): def test_get_cache_dir(): - """test that get_cache_dir respects environement""" + """test that get_cache_dir respects environment""" # reset BORG_CACHE_DIR in order to test default old_env = None if os.environ.get('BORG_CACHE_DIR'): @@ -402,6 +595,23 @@ def test_get_cache_dir(): os.environ['BORG_CACHE_DIR'] = old_env +def test_get_keys_dir(): + """test that get_keys_dir respects environment""" + # reset BORG_KEYS_DIR in order to test default + old_env = None + if os.environ.get('BORG_KEYS_DIR'): + old_env = os.environ['BORG_KEYS_DIR'] + del(os.environ['BORG_KEYS_DIR']) + assert get_keys_dir() == os.path.join(os.path.expanduser('~'), '.config', 'borg', 'keys') + os.environ['XDG_CONFIG_HOME'] = '/var/tmp/.config' + assert get_keys_dir() == os.path.join('/var/tmp/.config', 'borg', 'keys') + os.environ['BORG_KEYS_DIR'] = '/var/tmp' + assert get_keys_dir() == '/var/tmp' + # reset old env + if old_env is not None: + os.environ['BORG_KEYS_DIR'] = old_env + + @pytest.fixture() def stats(): stats = Statistics() @@ -480,3 +690,179 @@ def test_file_size_precision(): assert format_file_size(1234, precision=1) == '1.2 kB' # rounded down assert format_file_size(1254, precision=1) == '1.3 kB' # rounded up assert format_file_size(999990000, precision=1) == '1.0 GB' # and not 999.9 MB or 1000.0 MB + + +def test_is_slow_msgpack(): + saved_packer = msgpack.Packer + try: + msgpack.Packer = msgpack.fallback.Packer + assert is_slow_msgpack() + finally: + msgpack.Packer = saved_packer + # this assumes that we have fast msgpack on test platform: + assert not is_slow_msgpack() + + +def test_yes_input(): + inputs = list(TRUISH) + input = FakeInputs(inputs) + for i in inputs: + assert yes(input=input) + inputs = list(FALSISH) + input = FakeInputs(inputs) + for i in inputs: + assert not yes(input=input) + + +def test_yes_input_defaults(): + inputs = list(DEFAULTISH) + input = FakeInputs(inputs) + for i in inputs: + assert yes(default=True, input=input) + input = FakeInputs(inputs) + for i in inputs: + assert not yes(default=False, input=input) + + +def test_yes_input_custom(): + input = FakeInputs(['YES', 'SURE', 'NOPE', ]) + assert yes(truish=('YES', ), input=input) + assert yes(truish=('SURE', ), input=input) + assert not yes(falsish=('NOPE', ), input=input) + + +def test_yes_env(): + for value in TRUISH: + with environment_variable(OVERRIDE_THIS=value): + assert yes(env_var_override='OVERRIDE_THIS') + for value in FALSISH: + with environment_variable(OVERRIDE_THIS=value): + assert not yes(env_var_override='OVERRIDE_THIS') + + +def test_yes_env_default(): + for value in DEFAULTISH: + with environment_variable(OVERRIDE_THIS=value): + assert yes(env_var_override='OVERRIDE_THIS', default=True) + with environment_variable(OVERRIDE_THIS=value): + assert not yes(env_var_override='OVERRIDE_THIS', default=False) + + +def test_yes_defaults(): + input = FakeInputs(['invalid', '', ' ']) + assert not yes(input=input) # default=False + assert not yes(input=input) + assert not yes(input=input) + input = FakeInputs(['invalid', '', ' ']) + assert yes(default=True, input=input) + assert yes(default=True, input=input) + assert yes(default=True, input=input) + input = FakeInputs([]) + assert yes(default=True, input=input) + assert not yes(default=False, input=input) + with pytest.raises(ValueError): + yes(default=None) + + +def test_yes_retry(): + input = FakeInputs(['foo', 'bar', TRUISH[0], ]) + assert yes(retry_msg='Retry: ', input=input) + input = FakeInputs(['foo', 'bar', FALSISH[0], ]) + assert not yes(retry_msg='Retry: ', input=input) + + +def test_yes_no_retry(): + input = FakeInputs(['foo', 'bar', TRUISH[0], ]) + assert not yes(retry=False, default=False, input=input) + input = FakeInputs(['foo', 'bar', FALSISH[0], ]) + assert yes(retry=False, default=True, input=input) + + +def test_yes_output(capfd): + input = FakeInputs(['invalid', 'y', 'n']) + assert yes(msg='intro-msg', false_msg='false-msg', true_msg='true-msg', retry_msg='retry-msg', input=input) + out, err = capfd.readouterr() + assert out == '' + assert 'intro-msg' in err + assert 'retry-msg' in err + assert 'true-msg' in err + assert not yes(msg='intro-msg', false_msg='false-msg', true_msg='true-msg', retry_msg='retry-msg', input=input) + out, err = capfd.readouterr() + assert out == '' + assert 'intro-msg' in err + assert 'retry-msg' not in err + assert 'false-msg' in err + + +def test_progress_percentage_multiline(capfd): + pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=False, msg="%3.0f%%", file=sys.stderr) + pi.show(0) + out, err = capfd.readouterr() + assert err == ' 0%\n' + pi.show(420) + out, err = capfd.readouterr() + assert err == ' 42%\n' + pi.show(1000) + out, err = capfd.readouterr() + assert err == '100%\n' + pi.finish() + out, err = capfd.readouterr() + assert err == '' + + +def test_progress_percentage_sameline(capfd): + pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=True, msg="%3.0f%%", file=sys.stderr) + pi.show(0) + out, err = capfd.readouterr() + assert err == ' 0%\r' + pi.show(420) + out, err = capfd.readouterr() + assert err == ' 42%\r' + pi.show(1000) + out, err = capfd.readouterr() + assert err == '100%\r' + pi.finish() + out, err = capfd.readouterr() + assert err == ' ' * 4 + '\r' + + +def test_progress_percentage_step(capfd): + pi = ProgressIndicatorPercent(100, step=2, start=0, same_line=False, msg="%3.0f%%", file=sys.stderr) + pi.show() + out, err = capfd.readouterr() + assert err == ' 0%\n' + pi.show() + out, err = capfd.readouterr() + assert err == '' # no output at 1% as we have step == 2 + pi.show() + out, err = capfd.readouterr() + assert err == ' 2%\n' + + +def test_progress_endless(capfd): + pi = ProgressIndicatorEndless(step=1, file=sys.stderr) + pi.show() + out, err = capfd.readouterr() + assert err == '.' + pi.show() + out, err = capfd.readouterr() + assert err == '.' + pi.finish() + out, err = capfd.readouterr() + assert err == '\n' + + +def test_progress_endless_step(capfd): + pi = ProgressIndicatorEndless(step=2, file=sys.stderr) + pi.show() + out, err = capfd.readouterr() + assert err == '' # no output here as we have step == 2 + pi.show() + out, err = capfd.readouterr() + assert err == '.' + pi.show() + out, err = capfd.readouterr() + assert err == '' # no output here as we have step == 2 + pi.show() + out, err = capfd.readouterr() + assert err == '.' diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 2f234dd8..4c57d1f0 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -2,18 +2,18 @@ import os import re import shutil import tempfile -from binascii import hexlify +from binascii import hexlify, unhexlify from ..crypto import bytes_to_long, num_aes_blocks from ..key import PlaintextKey, PassphraseKey, KeyfileKey -from ..helpers import Location, unhexlify +from ..helpers import Location from . import BaseTestCase class KeyTestCase(BaseTestCase): class MockArgs: - repository = Location(tempfile.mkstemp()[1]) + location = Location(tempfile.mkstemp()[1]) keyfile2_key_file = """ BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/borg/testsuite/locking.py b/borg/testsuite/locking.py index 4b36e0ca..bc62650d 100644 --- a/borg/testsuite/locking.py +++ b/borg/testsuite/locking.py @@ -2,12 +2,14 @@ import time import pytest -from ..locking import get_id, TimeoutTimer, ExclusiveLock , UpgradableLock, LockRoster, ADD, REMOVE, SHARED, EXCLUSIVE +from ..locking import get_id, TimeoutTimer, ExclusiveLock, UpgradableLock, LockRoster, \ + ADD, REMOVE, SHARED, EXCLUSIVE, LockTimeout ID1 = "foo", 1, 1 ID2 = "bar", 2, 2 + def test_id(): hostname, pid, tid = get_id() assert isinstance(hostname, str) @@ -52,7 +54,7 @@ class TestExclusiveLock: def test_timeout(self, lockpath): with ExclusiveLock(lockpath, id=ID1): - with pytest.raises(ExclusiveLock.LockTimeout): + with pytest.raises(LockTimeout): ExclusiveLock(lockpath, id=ID2, timeout=0.1).acquire() @@ -92,6 +94,17 @@ class TestUpgradableLock: with UpgradableLock(lockpath, exclusive=True, id=ID2): pass + def test_timeout(self, lockpath): + with UpgradableLock(lockpath, exclusive=False, id=ID1): + with pytest.raises(LockTimeout): + UpgradableLock(lockpath, exclusive=True, id=ID2, timeout=0.1).acquire() + with UpgradableLock(lockpath, exclusive=True, id=ID1): + with pytest.raises(LockTimeout): + UpgradableLock(lockpath, exclusive=False, id=ID2, timeout=0.1).acquire() + with UpgradableLock(lockpath, exclusive=True, id=ID1): + with pytest.raises(LockTimeout): + UpgradableLock(lockpath, exclusive=True, id=ID2, timeout=0.1).acquire() + @pytest.fixture() def rosterpath(tmpdir): diff --git a/borg/testsuite/logger.py b/borg/testsuite/logger.py index 1db72bf2..b6dc2965 100644 --- a/borg/testsuite/logger.py +++ b/borg/testsuite/logger.py @@ -1,7 +1,6 @@ import logging from io import StringIO -from mock import Mock import pytest from ..logger import find_parent_module, create_logger, setup_logging @@ -11,7 +10,7 @@ logger = create_logger() @pytest.fixture() def io_logger(): io = StringIO() - handler = setup_logging(io) + handler = setup_logging(stream=io, env_var=None) handler.setFormatter(logging.Formatter('%(name)s: %(message)s')) logger.setLevel(logging.DEBUG) return io @@ -38,3 +37,18 @@ def test_multiple_loggers(io_logger): def test_parent_module(): assert find_parent_module() == __name__ + + +def test_lazy_logger(): + # just calling all the methods of the proxy + logger.setLevel(logging.DEBUG) + logger.debug("debug") + logger.info("info") + logger.warning("warning") + logger.error("error") + logger.critical("critical") + logger.log(logging.INFO, "info") + try: + raise Exception + except Exception: + logger.exception("exception") diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 2b99b83d..0606280e 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -1,12 +1,12 @@ import os import shutil +import sys import tempfile - -from mock import patch +from unittest.mock import patch from ..hashindex import NSIndex from ..helpers import Location, IntegrityError -from ..locking import UpgradableLock +from ..locking import UpgradableLock, LockFailed from ..remote import RemoteRepository, InvalidRPCMethod from ..repository import Repository from . import BaseTestCase @@ -158,9 +158,9 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): for name in os.listdir(self.repository.path): if name.startswith('index.'): os.unlink(os.path.join(self.repository.path, name)) - with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.ExclusiveLockFailed) as upgrade: + with patch.object(UpgradableLock, 'upgrade', side_effect=LockFailed) as upgrade: self.reopen() - self.assert_raises(UpgradableLock.ExclusiveLockFailed, lambda: len(self.repository)) + self.assert_raises(LockFailed, lambda: len(self.repository)) upgrade.assert_called_once_with() def test_crash_before_write_index(self): @@ -311,7 +311,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): # Simulate a crash before compact with patch.object(Repository, 'compact_segments') as compact: self.repository.commit() - compact.assert_called_once_with() + compact.assert_called_once_with(save_space=False) self.reopen() self.check(repair=True) self.assert_equal(self.repository.get(bytes(32)), b'data2') @@ -326,13 +326,24 @@ class RemoteRepositoryTestCase(RepositoryTestCase): self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None)) def test_ssh_cmd(self): - assert self.repository.umask is not None - assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() - assert self.repository.ssh_cmd(Location('ssh://example.com/foo')) == ['ssh', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() - assert self.repository.ssh_cmd(Location('ssh://user@example.com/foo')) == ['ssh', 'user@example.com', 'borg', 'serve'] + self.repository.umask_flag() - assert self.repository.ssh_cmd(Location('ssh://user@example.com:1234/foo')) == ['ssh', '-p', '1234', 'user@example.com', 'borg', 'serve'] + self.repository.umask_flag() + assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', 'example.com'] + assert self.repository.ssh_cmd(Location('ssh://example.com/foo')) == ['ssh', 'example.com'] + assert self.repository.ssh_cmd(Location('ssh://user@example.com/foo')) == ['ssh', 'user@example.com'] + assert self.repository.ssh_cmd(Location('ssh://user@example.com:1234/foo')) == ['ssh', '-p', '1234', 'user@example.com'] os.environ['BORG_RSH'] = 'ssh --foo' - assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', '--foo', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() + assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', '--foo', 'example.com'] + + def test_borg_cmd(self): + class MockArgs: + remote_path = 'borg' + umask = 0o077 + + assert self.repository.borg_cmd(None, testing=True) == [sys.executable, '-m', 'borg.archiver', 'serve'] + args = MockArgs() + # note: test logger is on info log level, so --info gets added automagically + assert self.repository.borg_cmd(args, testing=False) == ['borg', 'serve', '--umask=077', '--info'] + args.remote_path = 'borg-0.28.2' + assert self.repository.borg_cmd(args, testing=False) == ['borg-0.28.2', 'serve', '--umask=077', '--info'] class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): diff --git a/borg/testsuite/shellpattern.py b/borg/testsuite/shellpattern.py new file mode 100644 index 00000000..fae8c75d --- /dev/null +++ b/borg/testsuite/shellpattern.py @@ -0,0 +1,113 @@ +import re + +import pytest + +from .. import shellpattern + + +def check(path, pattern): + compiled = re.compile(shellpattern.translate(pattern)) + + return bool(compiled.match(path)) + + +@pytest.mark.parametrize("path, patterns", [ + # Literal string + ("foo/bar", ["foo/bar"]), + ("foo\\bar", ["foo\\bar"]), + + # Non-ASCII + ("foo/c/\u0152/e/bar", ["foo/*/\u0152/*/bar", "*/*/\u0152/*/*", "**/\u0152/*/*"]), + ("\u00e4\u00f6\u00dc", ["???", "*", "\u00e4\u00f6\u00dc", "[\u00e4][\u00f6][\u00dc]"]), + + # Question mark + ("foo", ["fo?"]), + ("foo", ["f?o"]), + ("foo", ["f??"]), + ("foo", ["?oo"]), + ("foo", ["?o?"]), + ("foo", ["??o"]), + ("foo", ["???"]), + + # Single asterisk + ("", ["*"]), + ("foo", ["*", "**", "***"]), + ("foo", ["foo*"]), + ("foobar", ["foo*"]), + ("foobar", ["foo*bar"]), + ("foobarbaz", ["foo*baz"]), + ("bar", ["*bar"]), + ("foobar", ["*bar"]), + ("foo/bar", ["foo/*bar"]), + ("foo/bar", ["foo/*ar"]), + ("foo/bar", ["foo/*r"]), + ("foo/bar", ["foo/*"]), + ("foo/bar", ["foo*/bar"]), + ("foo/bar", ["fo*/bar"]), + ("foo/bar", ["f*/bar"]), + ("foo/bar", ["*/bar"]), + + # Double asterisk (matches 0..n directory layers) + ("foo/bar", ["foo/**/bar"]), + ("foo/1/bar", ["foo/**/bar"]), + ("foo/1/22/333/bar", ["foo/**/bar"]), + ("foo/", ["foo/**/"]), + ("foo/1/", ["foo/**/"]), + ("foo/1/22/333/", ["foo/**/"]), + ("bar", ["**/bar"]), + ("1/bar", ["**/bar"]), + ("1/22/333/bar", ["**/bar"]), + ("foo/bar/baz", ["foo/**/*"]), + + # Set + ("foo1", ["foo[12]"]), + ("foo2", ["foo[12]"]), + ("foo2/bar", ["foo[12]/*"]), + ("f??f", ["f??f", "f[?][?]f"]), + ("foo]", ["foo[]]"]), + + # Inverted set + ("foo3", ["foo[!12]"]), + ("foo^", ["foo[^!]"]), + ("foo!", ["foo[^!]"]), + ]) +def test_match(path, patterns): + for p in patterns: + assert check(path, p) + + +@pytest.mark.parametrize("path, patterns", [ + ("", ["?", "[]"]), + ("foo", ["foo?"]), + ("foo", ["?foo"]), + ("foo", ["f?oo"]), + + # do not match path separator + ("foo/ar", ["foo?ar"]), + + # do not match/cross over os.path.sep + ("foo/bar", ["*"]), + ("foo/bar", ["foo*bar"]), + ("foo/bar", ["foo*ar"]), + ("foo/bar", ["fo*bar"]), + ("foo/bar", ["fo*ar"]), + + # Double asterisk + ("foobar", ["foo/**/bar"]), + + # Two asterisks without slash do not match directory separator + ("foo/bar", ["**"]), + + # Double asterisk not matching filename + ("foo/bar", ["**/"]), + + # Set + ("foo3", ["foo[12]"]), + + # Inverted set + ("foo1", ["foo[!12]"]), + ("foo2", ["foo[!12]"]), + ]) +def test_mismatch(path, patterns): + for p in patterns: + assert not check(path, p) diff --git a/borg/testsuite/upgrader.py b/borg/testsuite/upgrader.py index 3d045912..9a1f823f 100644 --- a/borg/testsuite/upgrader.py +++ b/borg/testsuite/upgrader.py @@ -12,7 +12,7 @@ except ImportError: from ..upgrader import AtticRepositoryUpgrader, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey -from ..remote import RemoteRepository +from ..archiver import UMASK_DEFAULT from ..repository import Repository @@ -169,7 +169,7 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file, inplace): orig_inode = first_inode(attic_repo.path) repo = AtticRepositoryUpgrader(str(tmpdir), create=False) # replicate command dispatch, partly - os.umask(RemoteRepository.umask) + os.umask(UMASK_DEFAULT) backup = repo.upgrade(dryrun=False, inplace=inplace) if inplace: assert backup is None @@ -179,7 +179,7 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file, inplace): assert first_inode(repo.path) != first_inode(backup) # i have seen cases where the copied tree has world-readable # permissions, which is wrong - assert stat_segment(backup).st_mode & 0o007 == 0 + assert stat_segment(backup).st_mode & UMASK_DEFAULT == 0 assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) diff --git a/borg/upgrader.py b/borg/upgrader.py index 2a8a977d..e739e071 100644 --- a/borg/upgrader.py +++ b/borg/upgrader.py @@ -4,10 +4,9 @@ import logging logger = logging.getLogger(__name__) import os import shutil -import sys import time -from .helpers import get_keys_dir, get_cache_dir +from .helpers import get_keys_dir, get_cache_dir, ProgressIndicatorPercent from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError @@ -16,7 +15,11 @@ ATTIC_MAGIC = b'ATTICSEG' class AtticRepositoryUpgrader(Repository): - def upgrade(self, dryrun=True, inplace=False): + def __init__(self, *args, **kw): + kw['lock'] = False # do not create borg lock files (now) in attic repo + super().__init__(*args, **kw) + + def upgrade(self, dryrun=True, inplace=False, progress=False): """convert an attic repository to a borg repository those are the files that need to be upgraded here, from most @@ -34,8 +37,8 @@ class AtticRepositoryUpgrader(Repository): if not dryrun: shutil.copytree(self.path, backup, copy_function=os.link) logger.info("opening attic repository with borg and converting") - # we need to open the repo to load configuration, keyfiles and segments - self.open(self.path, exclusive=False) + # now lock the repo, after we have made the copy + self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True, timeout=1.0).acquire() segments = [filename for i, filename in self.io.segment_iterator()] try: keyfile = self.find_attic_keyfile() @@ -49,14 +52,22 @@ class AtticRepositoryUpgrader(Repository): exclusive=True).acquire() try: self.convert_cache(dryrun) - self.convert_segments(segments, dryrun=dryrun, inplace=inplace) + self.convert_repo_index(dryrun=dryrun, inplace=inplace) + self.convert_segments(segments, dryrun=dryrun, inplace=inplace, progress=progress) + self.borg_readme() finally: self.lock.release() self.lock = None return backup + def borg_readme(self): + readme = os.path.join(self.path, 'README') + os.remove(readme) + with open(readme, 'w') as fd: + fd.write('This is a Borg repository\n') + @staticmethod - def convert_segments(segments, dryrun=True, inplace=False): + def convert_segments(segments, dryrun=True, inplace=False, progress=False): """convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in @@ -65,17 +76,17 @@ class AtticRepositoryUpgrader(Repository): luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" logger.info("converting %d segments..." % len(segments)) - i = 0 - for filename in segments: - i += 1 - print("\rconverting segment %d/%d, %.2f%% done (%s)" - % (i, len(segments), 100*float(i)/len(segments), filename), - end='', file=sys.stderr) + segment_count = len(segments) + pi = ProgressIndicatorPercent(total=segment_count, msg="Converting segments %3.0f%%", same_line=True) + for i, filename in enumerate(segments): + if progress: + pi.show(i) if dryrun: time.sleep(0.001) else: AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC, inplace=inplace) - print(file=sys.stderr) + if progress: + pi.finish() @staticmethod def header_replace(filename, old_magic, new_magic, inplace=True): @@ -125,7 +136,7 @@ class AtticRepositoryUpgrader(Repository): replacement pattern is `s/ATTIC KEY/BORG_KEY/` in `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or - `$HOME/.borg/keys`. + `$HOME/.config/borg/keys`. no need to decrypt to convert. we need to rewrite the whole key file because magic string length changed, but that's not a @@ -141,8 +152,8 @@ class AtticRepositoryUpgrader(Repository): with open(keyfile, 'w') as f: f.write(data) - def convert_cache(self, dryrun): - """convert caches from attic to borg + def convert_repo_index(self, dryrun, inplace): + """convert some repo files those are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: @@ -152,6 +163,21 @@ class AtticRepositoryUpgrader(Repository): should probably update, with a lock, see `Repository.open()`, which i'm not sure we should use because it may write data on `Repository.close()`... + """ + transaction_id = self.get_index_transaction_id() + if transaction_id is None: + logger.warning('no index file found for repository %s' % self.path) + else: + index = os.path.join(self.path, 'index.%d' % transaction_id) + logger.info("converting repo index %s" % index) + if not dryrun: + AtticRepositoryUpgrader.header_replace(index, b'ATTICIDX', b'BORG_IDX', inplace=inplace) + + def convert_cache(self, dryrun): + """convert caches from attic to borg + + those are all hash indexes, so we need to + `s/ATTICIDX/BORG_IDX/` in a few locations: * the `files` and `chunks` cache (in `$ATTIC_CACHE_DIR` or `$HOME/.cache/attic//`), which we could just drop, @@ -159,15 +185,6 @@ class AtticRepositoryUpgrader(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - transaction_id = self.get_index_transaction_id() - if transaction_id is None: - logger.warning('no index file found for repository %s' % self.path) - else: - index = os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8') - logger.info("converting index index %s" % index) - if not dryrun: - AtticRepositoryUpgrader.header_replace(index, b'ATTICIDX', b'BORG_IDX') - # copy of attic's get_cache_dir() attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', os.path.join(os.path.expanduser('~'), @@ -249,10 +266,61 @@ class AtticKeyfileKey(KeyfileKey): get_keys_dir = cls.get_keys_dir id = hexlify(repository.id).decode('ascii') keys_dir = get_keys_dir() + if not os.path.exists(keys_dir): + raise KeyfileNotFoundError(repository.path, keys_dir) for name in os.listdir(keys_dir): filename = os.path.join(keys_dir, name) with open(filename, 'r') as fd: line = fd.readline().strip() if line and line.startswith(cls.FILE_ID) and line[10:] == id: return filename - raise KeyfileNotFoundError(repository.path, get_keys_dir()) + raise KeyfileNotFoundError(repository.path, keys_dir) + + +class BorgRepositoryUpgrader(Repository): + def upgrade(self, dryrun=True, inplace=False, progress=False): + """convert an old borg repository to a current borg repository + """ + logger.info("converting borg 0.xx to borg current") + try: + keyfile = self.find_borg0xx_keyfile() + except KeyfileNotFoundError: + logger.warning("no key file found for repository") + else: + self.move_keyfiles(keyfile, dryrun) + + def find_borg0xx_keyfile(self): + return Borg0xxKeyfileKey.find_key_file(self) + + def move_keyfiles(self, keyfile, dryrun): + filename = os.path.basename(keyfile) + new_keyfile = os.path.join(get_keys_dir(), filename) + try: + os.rename(keyfile, new_keyfile) + except FileExistsError: + # likely the attic -> borg upgrader already put it in the final location + pass + + +class Borg0xxKeyfileKey(KeyfileKey): + """backwards compatible borg 0.xx key file parser""" + + @staticmethod + def get_keys_dir(): + return os.environ.get('BORG_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.borg', 'keys')) + + @classmethod + def find_key_file(cls, repository): + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + if not os.path.exists(keys_dir): + raise KeyfileNotFoundError(repository.path, keys_dir) + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[len(cls.FILE_ID) + 1:] == id: + return filename + raise KeyfileNotFoundError(repository.path, keys_dir) diff --git a/borg/xattr.py b/borg/xattr.py index ded6d752..27a18df6 100644 --- a/borg/xattr.py +++ b/borg/xattr.py @@ -7,6 +7,9 @@ import tempfile from ctypes import CDLL, create_string_buffer, c_ssize_t, c_size_t, c_char_p, c_int, c_uint32, get_errno from ctypes.util import find_library +from .logger import create_logger +logger = create_logger() + def is_enabled(path=None): """Determine if xattr is enabled on the filesystem @@ -27,8 +30,28 @@ def get_all(path, follow_symlinks=True): if e.errno in (errno.ENOTSUP, errno.EPERM): return {} +libc_name = find_library('c') +if libc_name is None: + # find_library didn't work, maybe we are on some minimal system that misses essential + # tools used by find_library, like ldconfig, gcc/cc, objdump. + # so we can only try some "usual" names for the C library: + if sys.platform.startswith('linux'): + libc_name = 'libc.so.6' + elif sys.platform.startswith(('freebsd', 'netbsd')): + libc_name = 'libc.so' + elif sys.platform == 'darwin': + libc_name = 'libc.dylib' + else: + msg = "Can't find C library. No fallback known. Try installing ldconfig, gcc/cc or objdump." + logger.error(msg) + raise Exception(msg) -libc = CDLL(find_library('c'), use_errno=True) +try: + libc = CDLL(libc_name, use_errno=True) +except OSError as e: + msg = "Can't find C library [%s]. Try installing ldconfig, gcc/cc or objdump." % e + logger.error(msg) + raise Exception(msg) def _check(rv, path=None): @@ -36,7 +59,7 @@ def _check(rv, path=None): raise OSError(get_errno(), path) return rv -if sys.platform.startswith('linux'): +if sys.platform.startswith('linux'): # pragma: linux only libc.llistxattr.argtypes = (c_char_p, c_char_p, c_size_t) libc.llistxattr.restype = c_ssize_t libc.flistxattr.argtypes = (c_int, c_char_p, c_size_t) @@ -100,7 +123,7 @@ if sys.platform.startswith('linux'): func = libc.lsetxattr _check(func(path, name, value, len(value) if value else 0, 0), path) -elif sys.platform == 'darwin': +elif sys.platform == 'darwin': # pragma: darwin only libc.listxattr.argtypes = (c_char_p, c_char_p, c_size_t, c_int) libc.listxattr.restype = c_ssize_t libc.flistxattr.argtypes = (c_int, c_char_p, c_size_t) @@ -166,7 +189,7 @@ elif sys.platform == 'darwin': flags = XATTR_NOFOLLOW _check(func(path, name, value, len(value) if value else 0, 0, flags), path) -elif sys.platform.startswith('freebsd'): +elif sys.platform.startswith('freebsd'): # pragma: freebsd only EXTATTR_NAMESPACE_USER = 0x0001 libc.extattr_list_fd.argtypes = (c_int, c_int, c_char_p, c_size_t) libc.extattr_list_fd.restype = c_ssize_t @@ -208,11 +231,8 @@ elif sys.platform.startswith('freebsd'): mv = memoryview(namebuf.raw) while mv: length = mv[0] - # Python < 3.3 returns bytes instead of int - if isinstance(length, bytes): - length = ord(length) - names.append(os.fsdecode(bytes(mv[1:1+length]))) - mv = mv[1+length:] + names.append(os.fsdecode(bytes(mv[1:1 + length]))) + mv = mv[1 + length:] return names def getxattr(path, name, *, follow_symlinks=True): @@ -247,7 +267,7 @@ elif sys.platform.startswith('freebsd'): func = libc.extattr_set_link _check(func(path, EXTATTR_NAMESPACE_USER, name, value, len(value) if value else 0), path) -else: +else: # pragma: unknown platform only def listxattr(path, *, follow_symlinks=True): return [] diff --git a/docs/_static/logo.png b/docs/_static/logo.png new file mode 100644 index 00000000..1f05c915 Binary files /dev/null and b/docs/_static/logo.png differ diff --git a/docs/_static/logo.xcf b/docs/_static/logo.xcf new file mode 100644 index 00000000..ea9c55c2 Binary files /dev/null and b/docs/_static/logo.xcf differ diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 00000000..628d21d1 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,95 @@ + +API Documentation +================= + +.. automodule:: borg.archiver + :members: + :undoc-members: + +.. automodule:: borg.upgrader + :members: + :undoc-members: + +.. automodule:: borg.archive + :members: + :undoc-members: + +.. automodule:: borg.fuse + :members: + :undoc-members: + +.. automodule:: borg.platform + :members: + :undoc-members: + +.. automodule:: borg.locking + :members: + :undoc-members: + +.. automodule:: borg.shellpattern + :members: + :undoc-members: + +.. automodule:: borg.repository + :members: + :undoc-members: + +.. automodule:: borg.lrucache + :members: + :undoc-members: + +.. automodule:: borg.remote + :members: + :undoc-members: + +.. automodule:: borg.hash_sizes + :members: + :undoc-members: + +.. automodule:: borg.xattr + :members: + :undoc-members: + +.. automodule:: borg.helpers + :members: + :undoc-members: + +.. automodule:: borg.cache + :members: + :undoc-members: + +.. automodule:: borg.key + :members: + :undoc-members: + +.. automodule:: borg.logger + :members: + :undoc-members: + +.. automodule:: borg.platform_darwin + :members: + :undoc-members: + +.. automodule:: borg.platform_linux + :members: + :undoc-members: + +.. automodule:: borg.hashindex + :members: + :undoc-members: + +.. automodule:: borg.compress + :members: + :undoc-members: + +.. automodule:: borg.chunker + :members: + :undoc-members: + +.. automodule:: borg.crypto + :members: + :undoc-members: + +.. automodule:: borg.platform_freebsd + :members: + :undoc-members: diff --git a/docs/borg_theme/css/borg.css b/docs/borg_theme/css/borg.css new file mode 100644 index 00000000..6f4a7f41 --- /dev/null +++ b/docs/borg_theme/css/borg.css @@ -0,0 +1,18 @@ +@import url("theme.css"); + +/* The Return of the Borg. + * + * Have a bit green and grey and darkness (and if only in the upper left corner). + */ + +.wy-side-nav-search { + background-color: black; +} + +.wy-side-nav-search > a { + color: rgba(255, 255, 255, 0.5); +} + +.wy-side-nav-search > div.version { + color: rgba(255, 255, 255, 0.5); +} diff --git a/docs/changes.rst b/docs/changes.rst index 620e6614..818fb866 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,21 +1,324 @@ Changelog ========= -Version 0.28.0 +Version 1.0.0 (not released yet) +-------------------------------- + +The major release number change (0.x -> 1.x) indicates bigger incompatible +changes, please read the compatibility notes, adapt / test your scripts and +check your backup logs. + +Compatibility notes: + +- drop support for python 3.2 and 3.3, require 3.4 or 3.5, #221 #65 #490 + note: we provide binaries that include python 3.5.1 and everything else + needed. they are an option in case you are stuck with < 3.4 otherwise. +- change encryption to be on by default (using "repokey" mode) +- moved keyfile keys from ~/.borg/keys to ~/.config/borg/keys, + you can either move them manually or run "borg upgrade " +- remove support for --encryption=passphrase, + use borg migrate-to-repokey to switch to repokey mode, #97 +- remove deprecated "--compression ", + use --compression zlib, instead + in case of 0, you could also use --compression none +- remove deprecated "--hourly/daily/weekly/monthly/yearly" + use --keep-hourly/daily/weekly/monthly/yearly instead +- remove deprecated "--do-not-cross-mountpoints", + use --one-file-system instead +- disambiguate -p option, #563: + + - -p now is same as --progress + - -P now is same as --prefix +- remove deprecated "borg verify", + use borg extract --dry-run instead +- cleanup environment variable semantics, #355 + the environment variables used to be "yes sayers" when set, this was + conceptually generalized to "automatic answerers" and they just give their + value as answer (as if you typed in that value when being asked). + See the "usage" / "Environment Variables" section of the docs for details. +- change the builtin default for --chunker-params, create 2MiB chunks, #343 + --chunker-params new default: 19,23,21,4095 - old default: 10,23,16,4096 + + one of the biggest issues with borg < 1.0 (and also attic) was that it had a + default target chunk size of 64kiB, thus it created a lot of chunks and thus + also a huge chunk management overhead (high RAM and disk usage). + + please note that the new default won't change the chunks that you already + have in your repository. the new big chunks do not deduplicate with the old + small chunks, so expect your repo to grow at least by the size of every + changed file and in the worst case (e.g. if your files cache was lost / is + not used) by the size of every file (minus any compression you might use). + + in case you want to immediately see a much lower resource usage (RAM / disk) + for chunks management, it might be better to start with a new repo than + continuing in the existing repo (with an existing repo, you'ld have to wait + until all archives with small chunks got pruned to see a lower resource + usage). + + if you used the old --chunker-params default value (or if you did not use + --chunker-params option at all) and you'ld like to continue using small + chunks (and you accept the huge resource usage that comes with that), just + explicitly use borg create --chunker-params=10,23,16,4095. + +New features: + +- borg migrate-to-repokey ("passphrase" -> "repokey" encryption key mode) +- implement --short for borg list REPO, fixes #611 +- implement --list for borg extract (consistency with borg create) +- borg serve: overwrite client's --restrict-to-path with ssh forced command's + option value (but keep everything else from the client commandline), #544 +- use $XDG_CONFIG_HOME/keys for keyfile keys (~/.config/borg/keys), #515 +- "borg upgrade" moves the keyfile keys to the new location + +Bug fixes: + +- normalize trailing slashes for the repository path, #606 + +Other changes: + +- suppress unneeded exception context (PEP 409), simpler tracebacks +- removed special code needed to deal with imperfections / incompatibilities / + missing stuff in py 3.2/3.3, simplify code that can be done simpler in 3.4 +- removed some version requirements that were kept on old versions because + newer did not support py 3.2 any more +- use some py 3.4+ stdlib code instead of own/openssl/pypi code: + + - use os.urandom instead of own cython openssl RAND_bytes wrapper, fixes #493 + - use hashlib.pbkdf2_hmac from py stdlib instead of own openssl wrapper + - use hmac.compare_digest instead of == operator (constant time comparison) + - use stat.filemode instead of homegrown code + - use "mock" library from stdlib, #145 + - remove borg.support (with non-broken argparse copy), it is ok in 3.4+, #358 +- Vagrant: copy CHANGES.rst as symlink, #592 +- cosmetic code cleanups, add flake8 to tox/travis, #4 +- docs / help: + + - make "borg -h" output prettier, #591 + - slightly rephrase prune help + - add missing example for --list option of borg create + - quote exclude line that includes an asterisk to prevent shell expansion + - fix dead link to license + - delete Ubuntu Vivid, it is not supported anymore (EOL) + + +Version 0.30.0 +-------------- + +Compatibility notes: + +- you may need to use -v (or --info) more often to actually see output emitted + at INFO log level (because it is suppressed at the default WARNING log level). + See the "general" section in the usage docs. +- for borg create, you need --list (additionally to -v) to see the long file + list (was needed so you can have e.g. --stats alone without the long list) +- see below about BORG_DELETE_I_KNOW_WHAT_I_AM_DOING (was: + BORG_CHECK_I_KNOW_WHAT_I_AM_DOING) + +Bug fixes: + +- fix crash when using borg create --dry-run --keep-tag-files, #570 +- make sure teardown with cleanup happens for Cache and RepositoryCache, + avoiding leftover locks and TEMP dir contents, #285 (partially), #548 +- fix locking KeyError, partial fix for #502 +- log stats consistently, #526 +- add abbreviated weekday to timestamp format, fixes #496 +- strip whitespace when loading exclusions from file +- unset LD_LIBRARY_PATH before invoking ssh, fixes strange OpenSSL library + version warning when using the borg binary, #514 +- add some error handling/fallback for C library loading, #494 +- added BORG_DELETE_I_KNOW_WHAT_I_AM_DOING for check in "borg delete", #503 +- remove unused "repair" rpc method name + +New features: + +- borg create: implement exclusions using regular expression patterns. +- borg create: implement inclusions using patterns. +- borg extract: support patterns, #361 +- support different styles for patterns: + + - fnmatch (`fm:` prefix, default when omitted), like borg <= 0.29. + - shell (`sh:` prefix) with `*` not matching directory separators and + `**/` matching 0..n directories + - path prefix (`pp:` prefix, for unifying borg create pp1 pp2 into the + patterns system), semantics like in borg <= 0.29 + - regular expression (`re:`), new! +- --progress option for borg upgrade (#291) and borg delete +- update progress indication more often (e.g. for borg create within big + files or for borg check repo), #500 +- finer chunker granularity for items metadata stream, #547, #487 +- borg create --list now used (additionally to -v) to enable the verbose + file list output +- display borg version below tracebacks, #532 + +Other changes: + +- hashtable size (and thus: RAM and disk consumption) follows a growth policy: + grows fast while small, grows slower when getting bigger, #527 +- Vagrantfile: use pyinstaller 3.1 to build binaries, freebsd sqlite3 fix, + fixes #569 +- no separate binaries for centos6 any more because the generic linux binaries + also work on centos6 (or in general: on systems with a slightly older glibc + than debian7 +- dev environment: require virtualenv<14.0 so we get a py32 compatible pip +- docs: + + - add space-saving chunks.archive.d trick to FAQ + - important: clarify -v and log levels in usage -> general, please read! + - sphinx configuration: create a simple man page from usage docs + - add a repo server setup example + - disable unneeded SSH features in authorized_keys examples for security. + - borg prune only knows "--keep-within" and not "--within" + - add gource video to resources docs, #507 + - add netbsd install instructions + - authors: make it more clear what refers to borg and what to attic + - document standalone binary requirements, #499 + - rephrase the mailing list section + - development docs: run build_api and build_usage before tagging release + - internals docs: hash table max. load factor is 0.75 now + - markup, typo, grammar, phrasing, clarifications and other fixes. + - add gcc gcc-c++ to redhat/fedora/corora install docs, fixes #583 + + +Version 0.29.0 +-------------- + +Compatibility notes: + +- when upgrading to 0.29.0 you need to upgrade client as well as server + installations due to the locking and commandline interface changes otherwise + you'll get an error msg about a RPC protocol mismatch or a wrong commandline + option. + if you run a server that needs to support both old and new clients, it is + suggested that you have a "borg-0.28.2" and a "borg-0.29.0" command. + clients then can choose via e.g. "borg --remote-path=borg-0.29.0 ...". +- the default waiting time for a lock changed from infinity to 1 second for a + better interactive user experience. if the repo you want to access is + currently locked, borg will now terminate after 1s with an error message. + if you have scripts that shall wait for the lock for a longer time, use + --lock-wait N (with N being the maximum wait time in seconds). + +Bug fixes: + +- hash table tuning (better chosen hashtable load factor 0.75 and prime initial + size of 1031 gave ~1000x speedup in some scenarios) +- avoid creation of an orphan lock for one case, #285 +- --keep-tag-files: fix file mode and multiple tag files in one directory, #432 +- fixes for "borg upgrade" (attic repo converter), #466 +- remove --progress isatty magic (and also --no-progress option) again, #476 +- borg init: display proper repo URL +- fix format of umask in help pages, #463 + +New features: + +- implement --lock-wait, support timeout for UpgradableLock, #210 +- implement borg break-lock command, #157 +- include system info below traceback, #324 +- sane remote logging, remote stderr, #461: + + - remote log output: intercept it and log it via local logging system, + with "Remote: " prefixed to message. log remote tracebacks. + - remote stderr: output it to local stderr with "Remote: " prefixed. +- add --debug and --info (same as --verbose) to set the log level of the + builtin logging configuration (which otherwise defaults to warning), #426 + note: there are few messages emitted at DEBUG level currently. +- optionally configure logging via env var BORG_LOGGING_CONF +- add --filter option for status characters: e.g. to show only the added + or modified files (and also errors), use "borg create -v --filter=AME ...". +- more progress indicators, #394 +- use ISO-8601 date and time format, #375 +- "borg check --prefix" to restrict archive checking to that name prefix, #206 + +Other changes: + +- hashindex_add C implementation (speed up cache re-sync for new archives) +- increase FUSE read_size to 1024 (speed up metadata operations) +- check/delete/prune --save-space: free unused segments quickly, #239 +- increase rpc protocol version to 2 (see also Compatibility notes), #458 +- silence borg by default (via default log level WARNING) +- get rid of C compiler warnings, #391 +- upgrade OS X FUSE to 3.0.9 on the OS X binary build system +- use python 3.5.1 to build binaries +- docs: + + - new mailing list borgbackup@python.org, #468 + - readthedocs: color and logo improvements + - load coverage icons over SSL (avoids mixed content) + - more precise binary installation steps + - update release procedure docs about OS X FUSE + - FAQ entry about unexpected 'A' status for unchanged file(s), #403 + - add docs about 'E' file status + - add "borg upgrade" docs, #464 + - add developer docs about output and logging + - clarify encryption, add note about client-side encryption + - add resources section, with videos, talks, presentations, #149 + - Borg moved to Arch Linux [community] + - fix wrong installation instructions for archlinux + + +Version 0.28.2 -------------- New features: +- borg create --exclude-if-present TAGFILE - exclude directories that have the + given file from the backup. You can additionally give --keep-tag-files to + preserve just the directory roots and the tag-files (but not backup other + directory contents), #395, attic #128, attic #142 + +Other changes: + +- do not create docs sources at build time (just have them in the repo), + completely remove have_cython() hack, do not use the "mock" library at build + time, #384 +- avoid hidden import, make it easier for PyInstaller, easier fix for #218 +- docs: + + - add description of item flags / status output, fixes #402 + - explain how to regenerate usage and API files (build_api or + build_usage) and when to commit usage files directly into git, #384 + - minor install docs improvements + + +Version 0.28.1 +-------------- + +Bug fixes: + +- do not try to build api / usage docs for production install, + fixes unexpected "mock" build dependency, #384 + +Other changes: + +- avoid using msgpack.packb at import time +- fix formatting issue in changes.rst +- fix build on readthedocs + + +Version 0.28.0 +-------------- + +Compatibility notes: + +- changed return codes (exit codes), see docs. in short: + old: 0 = ok, 1 = error. now: 0 = ok, 1 = warning, 2 = error + +New features: + - refactor return codes (exit codes), fixes #61 -- give a final status into the log output, including exit code, fixes #58 +- add --show-rc option enable "terminating with X status, rc N" output, fixes 58, #351 - borg create backups atime and ctime additionally to mtime, fixes #317 - extract: support atime additionally to mtime - FUSE: support ctime and atime additionally to mtime - support borg --version +- emit a warning if we have a slow msgpack installed +- borg list --prefix=thishostname- REPO, fixes #205 +- Debug commands (do not use except if you know what you do: debug-get-obj, + debug-put-obj, debug-delete-obj, debug-dump-archive-items. Bug fixes: - setup.py: fix bug related to BORG_LZ4_PREFIX processing +- fix "check" for repos that have incomplete chunks, fixes #364 - borg mount: fix unlocking of repository at umount time, fixes #331 - fix reading files without touching their atime, #334 - non-ascii ACL fixes for Linux, FreeBSD and OS X, #277 @@ -23,13 +326,20 @@ Bug fixes: - borg upgrade: do not upgrade repositories in place by default, #299 - fix cascading failure with the index conversion code, #269 - borg check: implement 'cmdline' archive metadata value decoding, #311 +- fix RobustUnpacker, it missed some metadata keys (new atime and ctime keys + were missing, but also bsdflags). add check for unknown metadata keys. +- create from stdin: also save atime, ctime (cosmetic) +- use default_notty=False for confirmations, fixes #345 +- vagrant: fix msgpack installation on centos, fixes #342 +- deal with unicode errors for symlinks in same way as for regular files and + have a helpful warning message about how to fix wrong locale setup, fixes #382 +- add ACL keys the RobustUnpacker must know about Other changes: -- improve file size displays -- convert to more flexible size formatters -- explicitely commit to the units standard, #289 -- archiver: add E status (means that an error occured when processing this +- improve file size displays, more flexible size formatters +- explicitly commit to the units standard, #289 +- archiver: add E status (means that an error occurred when processing this (single) item - do binary releases via "github releases", closes #214 - create: use -x and --one-file-system (was: --do-not-cross-mountpoints), #296 @@ -37,17 +347,28 @@ Other changes: - show progress display if on a tty, output more progress information, #303 - factor out status output so it is consistent, fix surrogates removal, maybe fixes #309 -- benchmarks: test create, extract, list, delete, info, check, help, fixes #146 -- benchmarks: test with both the binary and the python code +- move away from RawConfigParser to ConfigParser +- archive checker: better error logging, give chunk_id and sequence numbers + (can be used together with borg debug-dump-archive-items). +- do not mention the deprecated passphrase mode +- emit a deprecation warning for --compression N (giving a just a number) +- misc .coverragerc fixes (and coverage measurement improvements), fixes #319 +- refactor confirmation code, reduce code duplication, add tests +- prettier error messages, fixes #307, #57 - tests: + - add a test to find disk-full issues, #327 - travis: also run tests on Python 3.5 - travis: use tox -r so it rebuilds the tox environments - test the generated pyinstaller-based binary by archiver unit tests, #215 - vagrant: tests: announce whether fakeroot is used or not - vagrant: add vagrant user to fuse group for debianoid systems also - vagrant: llfuse install on darwin needs pkgconfig installed + - vagrant: use pyinstaller from develop branch, fixes #336 + - benchmarks: test create, extract, list, delete, info, check, help, fixes #146 + - benchmarks: test with both the binary and the python code - archiver tests: test with both the binary and the python code, fixes #215 + - make basic test more robust - docs: - moved docs to borgbackup.readthedocs.org, #155 @@ -64,7 +385,12 @@ Other changes: - remove api docs (too much breakage on rtd) - borgbackup install + basics presentation (asciinema) - describe the current style guide in documentation - + - add section about debug commands + - warn about not running out of space + - add example for rename + - improve chunker params docs, fixes #362 + - minor development docs update + Version 0.27.0 -------------- diff --git a/docs/conf.py b/docs/conf.py index db9cd2de..72eb833a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,7 +44,7 @@ master_doc = 'index' # General information about the project. project = 'Borg - Deduplicating Archiver' -copyright = '2010-2014 Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)' +copyright = '2010-2014 Jonas Borgström, 2015-2016 The Borg Collective (see AUTHORS file)' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -63,7 +63,7 @@ release = version # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +today_fmt = '%Y-%m-%d' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -99,6 +99,15 @@ if not on_rtd: # only import and set the theme if we're building docs locally import sphinx_rtd_theme html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + html_style = 'css/borg.css' +else: + html_context = { + 'css_files': [ + 'https://media.readthedocs.org/css/sphinx_rtd_theme.css', + 'https://media.readthedocs.org/css/readthedocs-doc-embed.css', + '_static/css/borg.css', + ], + } # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -106,7 +115,7 @@ if not on_rtd: # only import and set the theme if we're building docs locally #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ['_themes'] +#html_theme_path = ['_themes'] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". @@ -117,7 +126,7 @@ html_theme_path = ['_themes'] # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = '_static/favicon.ico' +html_logo = '_static/logo.png' # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 @@ -127,11 +136,11 @@ html_favicon = '_static/favicon.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +html_static_path = ['borg_theme'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +html_last_updated_fmt = '%Y-%m-%d' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. @@ -219,10 +228,12 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -#man_pages = [ -# ('man', 'borg', 'Borg', -# ['see "AUTHORS" file'], 1) -#] +man_pages = [ + ('usage', 'borg', + 'BorgBackup is a deduplicating backup program with optional compression and authenticated encryption.', + ['The Borg Collective (see AUTHORS file)'], + 1), +] extensions = ['sphinx.ext.extlinks', 'sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] diff --git a/docs/deployment.rst b/docs/deployment.rst new file mode 100644 index 00000000..7349b9bd --- /dev/null +++ b/docs/deployment.rst @@ -0,0 +1,166 @@ +.. include:: global.rst.inc +.. _deployment: + +Deployment +========== + +This chapter will give an example how to setup a borg repository server for multiple +clients. + +Machines +-------- + +There are multiple machines used in this chapter and will further be named by their +respective fully qualified domain name (fqdn). + +* The backup server: `backup01.srv.local` +* The clients: + + - John Doe's desktop: `johndoe.clnt.local` + - Webserver 01: `web01.srv.local` + - Application server 01: `app01.srv.local` + +User and group +-------------- + +The repository server needs to have only one UNIX user for all the clients. +Recommended user and group with additional settings: + +* User: `backup` +* Group: `backup` +* Shell: `/bin/bash` (or other capable to run the `borg serve` command) +* Home: `/home/backup` + +Most clients shall initiate a backup from the root user to catch all +users, groups and permissions (e.g. when backing up `/home`). + +Folders +------- + +The following folder tree layout is suggested on the repository server: + +* User home directory, /home/backup +* Repositories path (storage pool): /home/backup/repos +* Clients restricted paths (`/home/backup/repos/`): + + - johndoe.clnt.local: `/home/backup/repos/johndoe.clnt.local` + - web01.srv.local: `/home/backup/repos/web01.srv.local` + - app01.srv.local: `/home/backup/repos/app01.srv.local` + +Restrictions +------------ + +Borg is instructed to restrict clients into their own paths: +``borg serve --restrict-path /home/backup/repos/`` + +There is only one ssh key per client allowed. Keys are added for ``johndoe.clnt.local``, ``web01.srv.local`` and +``app01.srv.local``. But they will access the backup under only one UNIX user account as: +``backup@backup01.srv.local``. Every key in ``$HOME/.ssh/authorized_keys`` has a +forced command and restrictions applied as shown below: + +:: + + command="cd /home/backup/repos/; + borg serve --restrict-path /home/backup/repos/", + no-port-forwarding,no-X11-forwarding,no-pty, + no-agent-forwarding,no-user-rc + +.. note:: The text shown above needs to be written on a single line! + +The options which are added to the key will perform the following: + +1. Change working directory +2. Run ``borg serve`` restricted to the client base path +3. Restrict ssh and do not allow stuff which imposes a security risk + +Due to the ``cd`` command we use, the server automatically changes the current +working directory. Then client doesn't need to have knowledge of the absolute +or relative remote repository path and can directly access the repositories at +``@:``. + +.. note:: The setup above ignores all client given commandline parameters + which are normally appended to the `borg serve` command. + +Client +------ + +The client needs to initialize the `pictures` repository like this: + + borg init backup@backup01.srv.local:pictures + +Or with the full path (should actually never be used, as only for demonstrational purposes). +The server should automatically change the current working directory to the `` folder. + + borg init backup@backup01.srv.local:/home/backup/repos/johndoe.clnt.local/pictures + +When `johndoe.clnt.local` tries to access a not restricted path the following error is raised. +John Doe tries to backup into the Web 01 path: + + borg init backup@backup01.srv.local:/home/backup/repos/web01.srv.local/pictures + +:: + + ~~~ SNIP ~~~ + Remote: borg.remote.PathNotAllowed: /home/backup/repos/web01.srv.local/pictures + ~~~ SNIP ~~~ + Repository path not allowed + +Ansible +------- + +Ansible takes care of all the system-specific commands to add the user, create the +folder. Even when the configuration is changed the repository server configuration is +satisfied and reproducible. + +Automate setting up an repository server with the user, group, folders and +permissions a Ansible playbook could be used. Keep in mind the playbook +uses the Arch Linux `pacman `_ +package manager to install and keep borg up-to-date. + +:: + + - hosts: backup01.srv.local + vars: + user: backup + group: backup + home: /home/backup + pool: "{{ home }}/repos" + auth_users: + - host: johndoe.clnt.local + key: "{{ lookup('file', '/path/to/keys/johndoe.clnt.local.pub') }}" + - host: web01.clnt.local + key: "{{ lookup('file', '/path/to/keys/web01.clnt.local.pub') }}" + - host: app01.clnt.local + key: "{{ lookup('file', '/path/to/keys/app01.clnt.local.pub') }}" + tasks: + - pacman: name=borg state=latest update_cache=yes + - group: name="{{ group }}" state=present + - user: name="{{ user }}" shell=/bin/bash home="{{ home }}" createhome=yes group="{{ group }}" groups= state=present + - file: path="{{ home }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory + - file: path="{{ home }}/.ssh" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory + - file: path="{{ pool }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory + - authorized_key: user="{{ user }}" + key="{{ item.key }}" + key_options='command="cd {{ pool }}/{{ item.host }};borg serve --restrict-to-path {{ pool }}/{{ item.host }}",no-port-forwarding,no-X11-forwarding,no-pty,no-agent-forwarding,no-user-rc' + with_items: auth_users + - file: path="{{ home }}/.ssh/authorized_keys" owner="{{ user }}" group="{{ group }}" mode=0600 state=file + - file: path="{{ pool }}/{{ item.host }}" owner="{{ user }}" group="{{ group }}" mode=0700 state=directory + with_items: auth_users + +Enhancements +------------ + +As this chapter only describes a simple and effective setup it could be further +enhanced when supporting (a limited set) of client supplied commands. A wrapper +for starting `borg serve` could be written. Or borg itself could be enhanced to +autodetect it runs under SSH by checking the `SSH_ORIGINAL_COMMAND` environment +variable. This is left open for future improvements. + +When extending ssh autodetection in borg no external wrapper script is necessary +and no other interpreter or application has to be deployed. + +See also +-------- + +* `SSH Daemon manpage `_ +* `Ansible `_ diff --git a/docs/development.rst b/docs/development.rst index 75ec53df..132dc74c 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -4,7 +4,7 @@ Development =========== -This chapter will get you started with |project_name|' development. +This chapter will get you started with |project_name| development. |project_name| is written in Python (with a little bit of Cython and C for the performance critical parts). @@ -15,9 +15,20 @@ Style guide We generally follow `pep8 `_, with 120 columns instead of 79. We do *not* use form-feed (``^L``) characters to -separate sections either. The `flake8 -`_ commandline tool should be used to -check for style errors before sending pull requests. +separate sections either. Compliance is tested automatically when +you run the tests. + +Output and Logging +------------------ +When writing logger calls, always use correct log level (debug only for +debugging, info for informative messages, warning for warnings, error for +errors, critical for critical errors/states). + +When directly talking to the user (e.g. Y/N questions), do not use logging, +but directly output to stderr (not: stdout, it could be connected to a pipe). + +To control the amount and kinds of messages output to stderr or emitted at +info level, use flags like ``--stats`` or ``--list``. Building a development environment ---------------------------------- @@ -48,7 +59,7 @@ Some more advanced examples:: # verify a changed tox.ini (run this after any change to tox.ini): fakeroot -u tox --recreate - fakeroot -u tox -e py32 # run all tests, but only on python 3.2 + fakeroot -u tox -e py34 # run all tests, but only on python 3.4 fakeroot -u tox borg.testsuite.locking # only run 1 test module @@ -58,9 +69,26 @@ Some more advanced examples:: Important notes: -- When using -- to give options to py.test, you MUST also give borg.testsuite[.module]. +- When using ``--`` to give options to py.test, you MUST also give ``borg.testsuite[.module]``. +Regenerate usage files +---------------------- + +Usage and API documentation is currently committed directly to git, +although those files are generated automatically from the source +tree. + +When a new module is added, the ``docs/api.rst`` file needs to be +regenerated:: + + ./setup.py build_api + +When a command is added, a commandline flag changed, added or removed, +the usage docs need to be rebuilt as well:: + + ./setup.py build_usage + Building the docs with Sphinx ----------------------------- @@ -83,7 +111,7 @@ main repository. Using Vagrant ------------- -We use Vagrant for the automated creation of testing environment and borgbackup +We use Vagrant for the automated creation of testing environments and borgbackup standalone binaries for various platforms. For better security, there is no automatic sync in the VM to host direction. @@ -91,16 +119,16 @@ The plugin `vagrant-scp` is useful to copy stuff from the VMs to the host. Usage:: - To create and provision the VM: - vagrant up OS - To create an ssh session to the VM: - vagrant ssh OS command - To shut down the VM: - vagrant halt OS - To shut down and destroy the VM: - vagrant destroy OS - To copy files from the VM (in this case, the generated binary): - vagrant scp OS:/vagrant/borg/borg/dist/borg . + # To create and provision the VM: + vagrant up OS + # To create an ssh session to the VM: + vagrant ssh OS command + # To shut down the VM: + vagrant halt OS + # To shut down and destroy the VM: + vagrant destroy OS + # To copy files from the VM (in this case, the generated binary): + vagrant scp OS:/vagrant/borg/borg.exe . Creating standalone binaries @@ -111,8 +139,8 @@ When using the Vagrant VMs, pyinstaller will already be installed. With virtual env activated:: - pip install pyinstaller>=3.0 # or git checkout master - pyinstaller -F -n borg-PLATFORM --hidden-import=logging.config borg/__main__.py + pip install pyinstaller # or git checkout master + pyinstaller -F -n borg-PLATFORM borg/__main__.py for file in dist/borg-*; do gpg --armor --detach-sign $file; done If you encounter issues, see also our `Vagrantfile` for details. @@ -135,11 +163,11 @@ Checklist: - update ``CHANGES.rst``, based on ``git log $PREVIOUS_RELEASE..`` - check version number of upcoming release in ``CHANGES.rst`` - verify that ``MANIFEST.in`` and ``setup.py`` are complete +- ``python setup.py build_api ; python setup.py build_usage`` and commit - tag the release:: git tag -s -m "tagged/signed release X.Y.Z" X.Y.Z -- build fresh docs and update the web site with them - create a release on PyPi:: python setup.py register sdist upload --identity="Thomas Waldmann" --sign @@ -147,10 +175,15 @@ Checklist: - close release milestone on Github - announce on: - - `mailing list `_ + - Mailing list - Twitter (follow @ThomasJWaldmann for these tweets) - - `IRC channel `_ (change ``/topic``) + - IRC channel (change ``/topic``) - create a Github release, include: + * standalone binaries (see above for how to create them) + + + for OS X, document the OS X Fuse version in the README of the binaries. + OS X FUSE uses a kernel extension that needs to be compatible with the + code contained in the binary. * a link to ``CHANGES.rst`` diff --git a/docs/faq.rst b/docs/faq.rst index d98d2625..70322d14 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -26,6 +26,26 @@ repository is only modified from one place. Also keep in mind that |project_name| will keep an exclusive lock on the repository while creating or deleting archives, which may make *simultaneous* backups fail. +Can I copy or synchronize my repo to another location? +------------------------------------------------------ + +Yes, you could just copy all the files. Make sure you do that while no +backup is running. So what you get here is this: + +- client machine ---borg create---> repo1 +- repo1 ---copy---> repo2 + +There is no special borg command to do the copying, just use cp or rsync if +you want to do that. + +But think about whether that is really what you want. If something goes +wrong in repo1, you will have the same issue in repo2 after the copy. + +If you want to have 2 independent backups, it is better to do it like this: + +- client machine ---borg create---> repo1 +- client machine ---borg create---> repo2 + Which file types, attributes, etc. are preserved? ------------------------------------------------- @@ -37,9 +57,9 @@ Which file types, attributes, etc. are preserved? * FIFOs ("named pipes") * Name * Contents - * Time of last modification (nanosecond precision with Python >= 3.3) - * User ID of owner - * Group ID of owner + * Timestamps in nanosecond precision: mtime, atime, ctime + * IDs of owning user and owning group + * Names of owning user and owning group (if the IDs can be resolved) * Unix Mode/Permissions (u/g/o permissions, suid, sgid, sticky) * Extended Attributes (xattrs) on Linux, OS X and FreeBSD * Access Control Lists (ACL_) on Linux, OS X and FreeBSD @@ -57,6 +77,7 @@ Which file types, attributes, etc. are *not* preserved? backed up as (deduplicated and compressed) runs of zero bytes. Archive extraction has optional support to extract all-zero chunks as holes in a sparse file. + * filesystem specific attributes, like ext4 immutable bit, see :issue:`618`. Why is my backup bigger than with attic? Why doesn't |project_name| do compression by default? ---------------------------------------------------------------------------------------------- @@ -67,14 +88,14 @@ adjust the level or algorithm). |project_name| offers a lot of different compression algorithms and levels. Which of them is the best for you pretty much depends on your -use case, your data, your hardware - so you need to do an informed +use case, your data, your hardware -- so you need to do an informed decision about whether you want to use compression, which algorithm and which level you want to use. This is why compression defaults to none. How can I specify the encryption passphrase programmatically? ------------------------------------------------------------- - + The encryption passphrase can be specified programmatically using the `BORG_PASSPHRASE` environment variable. This is convenient when setting up automated encrypted backups. Another option is to use @@ -89,11 +110,11 @@ key file based encryption with a blank passphrase. See ``export`` in a shell script file should be safe, however, as the environment of a process is `accessible only to that user - `_. + `_. When backing up to remote encrypted repos, is encryption done locally? ---------------------------------------------------------------------- - + Yes, file and directory metadata and data is locally encrypted, before leaving the local machine. We do not mean the transport layer encryption by that, but the data/metadata itself. Transport layer encryption (e.g. @@ -111,6 +132,36 @@ into the repository. Yes, as an attacker with access to the remote server could delete (or otherwise make unavailable) all your backups. +The borg cache eats way too much disk space, what can I do? +----------------------------------------------------------- + +There is a temporary (but maybe long lived) hack to avoid using lots of disk +space for chunks.archive.d (see :issue:`235` for details): + +:: + + # this assumes you are working with the same user as the backup. + # you can get the REPOID from the "config" file inside the repository. + cd ~/.cache/borg/ + rm -rf chunks.archive.d ; touch chunks.archive.d + +This deletes all the cached archive chunk indexes and replaces the directory +that kept them with a file, so borg won't be able to store anything "in" there +in future. + +This has some pros and cons, though: + +- much less disk space needs for ~/.cache/borg. +- chunk cache resyncs will be slower as it will have to transfer chunk usage + metadata for all archives from the repository (which might be slow if your + repo connection is slow) and it will also have to build the hashtables from + that data. + chunk cache resyncs happen e.g. if your repo was written to by another + machine (if you share same backup repo between multiple machines) or if + your local chunks cache was lost somehow. + +The long term plan to improve this is called "borgception", see :issue:`474`. + If a backup stops mid-way, does the already-backed-up data stay there? ---------------------------------------------------------------------- @@ -165,6 +216,38 @@ Yes, if you want to detect accidental data damage (like bit rot), use the If you want to be able to detect malicious tampering also, use a encrypted repo. It will then be able to check using CRCs and HMACs. +.. _a_status_oddity: + +I am seeing 'A' (added) status for a unchanged file!? +----------------------------------------------------- + +The files cache is used to determine whether |project_name| already +"knows" / has backed up a file and if so, to skip the file from +chunking. It does intentionally *not* contain files that: + +- have >= 10 as "entry age" (|project_name| has not seen this file for a while) +- have a modification time (mtime) same as the newest mtime in the created + archive + +So, if you see an 'A' status for unchanged file(s), they are likely the files +with the most recent mtime in that archive. + +This is expected: it is to avoid data loss with files that are backed up from +a snapshot and that are immediately changed after the snapshot (but within +mtime granularity time, so the mtime would not change). Without the code that +removes these files from the files cache, the change that happened right after +the snapshot would not be contained in the next backup as |project_name| would +think the file is unchanged. + +This does not affect deduplication, the file will be chunked, but as the chunks +will often be the same and already stored in the repo (except in the above +mentioned rare condition), it will just re-use them as usual and not store new +data chunks. + +Since only the files cache is used in the display of files status, +those files are reported as being added when, really, chunks are +already used. + Why was Borg forked from Attic? ------------------------------- diff --git a/docs/global.rst.inc b/docs/global.rst.inc index eaa4648b..d34f0965 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -1,5 +1,5 @@ .. highlight:: bash -.. |project_name| replace:: ``Borg`` +.. |project_name| replace:: Borg .. |package_dirname| replace:: borgbackup-|version| .. |package_filename| replace:: |package_dirname|.tar.gz .. |package_url| replace:: https://pypi.python.org/packages/source/b/borgbackup/|package_filename| @@ -8,22 +8,21 @@ .. _issue tracker: https://github.com/borgbackup/borg/issues .. _deduplication: https://en.wikipedia.org/wiki/Data_deduplication .. _AES: https://en.wikipedia.org/wiki/Advanced_Encryption_Standard -.. _HMAC-SHA256: http://en.wikipedia.org/wiki/HMAC +.. _HMAC-SHA256: https://en.wikipedia.org/wiki/HMAC .. _SHA256: https://en.wikipedia.org/wiki/SHA-256 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list -.. _libacl: http://savannah.nongnu.org/projects/acl/ -.. _libattr: http://savannah.nongnu.org/projects/attr/ +.. _libacl: https://savannah.nongnu.org/projects/acl/ +.. _libattr: https://savannah.nongnu.org/projects/attr/ .. _liblz4: https://github.com/Cyan4973/lz4 .. _OpenSSL: https://www.openssl.org/ -.. _`Python 3`: http://www.python.org/ +.. _`Python 3`: https://www.python.org/ .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash -.. _msgpack: http://msgpack.org/ +.. _msgpack: https://msgpack.org/ .. _`msgpack-python`: https://pypi.python.org/pypi/msgpack-python/ .. _llfuse: https://pypi.python.org/pypi/llfuse/ .. _homebrew: http://brew.sh/ .. _userspace filesystems: https://en.wikipedia.org/wiki/Filesystem_in_Userspace -.. _librelist: http://librelist.com/ .. _Cython: http://cython.org/ .. _virtualenv: https://pypi.python.org/pypi/virtualenv/ .. _mailing list discussion about internals: http://librelist.com/browser/attic/2014/5/6/questions-and-suggestions-about-inner-working-of-attic> diff --git a/docs/index.rst b/docs/index.rst index 210db4a0..89a907de 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,8 +12,10 @@ Borg Documentation installation quickstart usage + deployment faq support + resources changes internals development diff --git a/docs/installation.rst b/docs/installation.rst index efe8dadb..ff3bf450 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -6,57 +6,99 @@ Installation There are different ways to install |project_name|: -- **distribution package** - easy and fast if a package is available for your - Linux/BSD distribution. -- **PyInstaller binary** - easy and fast, we provide a ready-to-use binary file +- :ref:`distribution-package` - easy and fast if a package is + available from your distribution. +- :ref:`pyinstaller-binary` - easy and fast, we provide a ready-to-use binary file that comes bundled with all dependencies. -- **pip** - installing a source package with pip needs more installation steps - and requires all dependencies with development headers and a compiler. -- **git** - for developers and power users who want to have the latest code or - use revision control (each release is tagged). +- :ref:`source-install`, either: + - :ref:`pip-installation` - installing a source package with pip needs + more installation steps and requires all dependencies with + development headers and a compiler. + - :ref:`git-installation` - for developers and power users who want to + have the latest code or use revision control (each release is + tagged). -Installation (Distribution Package) ------------------------------------ +.. _distribution-package: -Some Linux and BSD distributions might offer a ready-to-use ``borgbackup`` +Distribution Package +-------------------- + +Some distributions might offer a ready-to-use ``borgbackup`` package which can be installed with the package manager. As |project_name| is still a young project, such a package might be not available for your system -yet. Please ask package maintainers to build a package or, if you can package / -submit it yourself, please help us with that! +yet. -* On **Arch Linux**, there is a package available in the AUR_. +============ ============================================= ======= +Distribution Source Command +============ ============================================= ======= +Arch Linux `[community]`_ ``pacman -S borg`` +Debian `stretch`_, `unstable/sid`_ ``apt install borgbackup`` +NetBSD `pkgsrc`_ ``pkg_add py-borgbackup`` +NixOS `.nix file`_ N/A +OS X `Brew cask`_ ``brew cask install borgbackup`` +Ubuntu `Xenial 16.04`_, `Wily 15.10 (backport PPA)`_ ``apt install borgbackup`` +Ubuntu `Trusty 14.04 (backport PPA)`_ ``apt install borgbackup`` +============ ============================================= ======= + +.. _[community]: https://www.archlinux.org/packages/?name=borg +.. _stretch: https://packages.debian.org/stretch/borgbackup +.. _unstable/sid: https://packages.debian.org/sid/borgbackup +.. _pkgsrc: http://pkgsrc.se/sysutils/py-borgbackup +.. _Xenial 16.04: https://launchpad.net/ubuntu/xenial/+source/borgbackup +.. _Wily 15.10 (backport PPA): https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup +.. _Trusty 14.04 (backport PPA): https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup +.. _.nix file: https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/backup/borg/default.nix +.. _Brew cask: http://caskroom.io/ + +Please ask package maintainers to build a package or, if you can package / +submit it yourself, please help us with that! See :issue:`105` on +github to followup on packaging efforts. If a package is available, it might be interesting to check its version and compare that to our latest release and review the :doc:`changes`. -.. _AUR: https://aur.archlinux.org/packages/borgbackup/ +.. _pyinstaller-binary: +Standalone Binary +----------------- -Installation (PyInstaller Binary) ---------------------------------- - -The |project_name| binary is available on the releases_ page for the following -platforms: +|project_name| binaries (generated with `pyinstaller`_) are available +on the releases_ page for the following platforms: * **Linux**: glibc >= 2.13 (ok for most supported Linux releases) * **Mac OS X**: 10.10 (unknown whether it works for older releases) * **FreeBSD**: 10.2 (unknown whether it works for older releases) -These binaries work without requiring specific installation steps. Just drop -them into a directory in your ``PATH`` and then you can run ``borg``. If a new -version is released, you will have to manually download it and replace the old -version. +To install such a binary, just drop it into a directory in your ``PATH``, +make borg readable and executable for its users and then you can run ``borg``:: + sudo cp borg-linux64 /usr/local/bin/borg + sudo chown root:root /usr/local/bin/borg + sudo chmod 755 /usr/local/bin/borg + +Note that the binary uses /tmp to unpack |project_name| with all dependencies. +It will fail if /tmp has not enough free space or is mounted with the ``noexec`` option. +You can change the temporary directory by setting the ``TEMP`` environment variable before running |project_name|. + +If a new version is released, you will have to manually download it and replace +the old version using the same steps as shown above. + +.. _pyinstaller: http://www.pyinstaller.org .. _releases: https://github.com/borgbackup/borg/releases -Installing the Dependencies ---------------------------- +.. _source-install: -To install |project_name| from a source package, you have to install the +From Source +----------- + +Dependencies +~~~~~~~~~~~~ + +To install |project_name| from a source package (including pip), you have to install the following dependencies first: -* `Python 3`_ >= 3.2.2. Even though Python 3 is not the default Python version on +* `Python 3`_ >= 3.4.0. Even though Python 3 is not the default Python version on most systems, it is usually available as an optional install. * OpenSSL_ >= 1.0.0 * libacl_ (that pulls in libattr_ also) @@ -75,15 +117,15 @@ After you have installed the dependencies, you can proceed with steps outlined under :ref:`pip-installation`. Debian / Ubuntu -~~~~~~~~~~~~~~~ ++++++++++++++++ Install the dependencies with development headers:: - sudo apt-get install python3 python3-dev python3-pip python-virtualenv - sudo apt-get install libssl-dev openssl - sudo apt-get install libacl1-dev libacl1 - sudo apt-get install liblz4-dev liblz4-1 - sudo apt-get install build-essential + sudo apt-get install python3 python3-dev python3-pip python-virtualenv \ + libssl-dev openssl \ + libacl1-dev libacl1 \ + liblz4-dev liblz4-1 \ + build-essential sudo apt-get install libfuse-dev fuse pkg-config # optional, for FUSE support In case you get complaints about permission denied on ``/etc/fuse.conf``: on @@ -91,7 +133,7 @@ Ubuntu this means your user is not in the ``fuse`` group. Add yourself to that group, log out and log in again. Fedora / Korora -~~~~~~~~~~~~~~~ ++++++++++++++++ Install the dependencies with development headers:: @@ -99,11 +141,12 @@ Install the dependencies with development headers:: sudo dnf install openssl-devel openssl sudo dnf install libacl-devel libacl sudo dnf install lz4-devel + sudo dnf install gcc gcc-c++ sudo dnf install fuse-devel fuse pkgconfig # optional, for FUSE support Mac OS X -~~~~~~~~ +++++++++ Assuming you have installed homebrew_, the following steps will install all the dependencies:: @@ -117,7 +160,7 @@ FUSE for OS X, which is available as a pre-release_. .. _pre-release: https://github.com/osxfuse/osxfuse/releases Cygwin -~~~~~~ +++++++ .. note:: Running under Cygwin is experimental and has only been tested with Cygwin @@ -144,8 +187,8 @@ In case the creation of the virtual environment fails, try deleting this file:: .. _pip-installation: -Installation (pip) ------------------- +Using pip +~~~~~~~~~ Virtualenv_ can be used to build and install |project_name| without affecting the system Python or requiring root access. Using a virtual environment is @@ -172,9 +215,10 @@ activating your virtual environment:: pip install -U borgbackup +.. _git-installation: -Installation (git) ------------------- +Using git +~~~~~~~~~ This uses latest, unreleased development code from git. While we try not to break master, there are no guarantees on anything. :: diff --git a/docs/internals.rst b/docs/internals.rst index d989fd9c..9d1bbd84 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -60,8 +60,8 @@ created by some other process), lock acquisition fails. The cache lock is usually in `~/.cache/borg/REPOID/lock.*`. The repository lock is in `repository/lock.*`. -In case you run into troubles with the locks, you can just delete the `lock.*` -directory and file IF you first make sure that no |project_name| process is +In case you run into troubles with the locks, you can use the ``borg break-lock`` +command after you first have made sure that no |project_name| process is running on any machine that accesses this resource. Be very careful, the cache or repository might get damaged if multiple processes use it at the same time. @@ -181,21 +181,21 @@ Each item represents a file, directory or other fs item and is stored as an * mode (item type + permissions) * source (for links) * rdev (for devices) -* mtime +* mtime, atime, ctime in nanoseconds * xattrs * acl * bsdfiles -``ctime`` (change time) is not stored because there is no API to set -it and it is reset every time an inode's metadata is changed. - All items are serialized using msgpack and the resulting byte stream -is fed into the same chunker used for regular file data and turned -into deduplicated chunks. The reference to these chunks is then added -to the archive metadata. +is fed into the same chunker algorithm as used for regular file data +and turned into deduplicated chunks. The reference to these chunks is then added +to the archive metadata. To achieve a finer granularity on this metadata +stream, we use different chunker params for this chunker, which result in +smaller chunks. A chunk is stored as an object as well, of course. +.. _chunker_details: Chunks ------ @@ -204,24 +204,21 @@ The |project_name| chunker uses a rolling hash computed by the Buzhash_ algorith It triggers (chunks) when the last HASH_MASK_BITS bits of the hash are zero, producing chunks of 2^HASH_MASK_BITS Bytes on average. -create --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE +``borg create --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE`` can be used to tune the chunker parameters, the default is: -- CHUNK_MIN_EXP = 10 (minimum chunk size = 2^10 B = 1 kiB) +- CHUNK_MIN_EXP = 19 (minimum chunk size = 2^19 B = 512 kiB) - CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB) -- HASH_MASK_BITS = 16 (statistical medium chunk size ~= 2^16 B = 64 kiB) +- HASH_MASK_BITS = 21 (statistical medium chunk size ~= 2^21 B = 2 MiB) - HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) -The default parameters are OK for relatively small backup data volumes and -repository sizes and a lot of available memory (RAM) and disk space for the -chunk index. If that does not apply, you are advised to tune these parameters -to keep the chunk count lower than with the defaults. - The buzhash table is altered by XORing it with a seed randomly generated once for the archive, and stored encrypted in the keyfile. This is to prevent chunk size based fingerprinting attacks on your encrypted repo contents (to guess what files you have based on a specific set of chunk sizes). +For some more general usage hints see also ``--chunker-params``. + Indexes / Caches ---------------- @@ -278,10 +275,10 @@ buckets. As a consequence the hash is just a start position for a linear search, and if the element is not in the table the index is linearly crossed until an empty bucket is found. -When the hash table is almost full at 90%, its size is doubled. When it's -almost empty at 25%, its size is halved. So operations on it have a variable +When the hash table is filled to 75%, its size is grown. When it's +emptied to 25%, its size is shrinked. So operations on it have a variable complexity between constant and linear with low factor, and memory overhead -varies between 10% and 300%. +varies between 33% and 300%. Indexes / Caches memory usage @@ -311,28 +308,27 @@ more chunks than estimated above, because 1 file is at least 1 chunk). If a remote repository is used the repo index will be allocated on the remote side. -E.g. backing up a total count of 1Mi files with a total size of 1TiB. +E.g. backing up a total count of 1 Mi (IEC binary prefix e.g. 2^20) files with a total size of 1TiB. -a) with create --chunker-params 10,23,16,4095 (default): +a) with ``create --chunker-params 10,23,16,4095`` (custom, like borg < 1.0 or attic): mem_usage = 2.8GiB -b) with create --chunker-params 10,23,20,4095 (custom): +b) with ``create --chunker-params 19,23,21,4095`` (default): - mem_usage = 0.4GiB - -Note: there is also the --no-files-cache option to switch off the files cache. -You'll save some memory, but it will need to read / chunk all the files then as -it can not skip unmodified files then. + mem_usage = 0.31GiB +.. note:: There is also the ``--no-files-cache`` option to switch off the files cache. + You'll save some memory, but it will need to read / chunk all the files as + it can not skip unmodified files then. Encryption ---------- -AES_ is used in CTR mode (so no need for padding). A 64bit initialization +AES_-256 is used in CTR mode (so no need for padding). A 64bit initialization vector is used, a `HMAC-SHA256`_ is computed on the encrypted chunk with a random 64bit nonce and both are stored in the chunk. -The header of each chunk is : ``TYPE(1)`` + ``HMAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``. +The header of each chunk is: ``TYPE(1)`` + ``HMAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``. Encryption and HMAC use two different keys. In AES CTR mode you can think of the IV as the start value for the counter. @@ -345,7 +341,12 @@ To reduce payload size, only 8 bytes of the 16 bytes nonce is saved in the payload, the first 8 bytes are always zeros. This does not affect security but limits the maximum repository capacity to only 295 exabytes (2**64 * 16 bytes). -Encryption keys are either derived from a passphrase or kept in a key file. +Encryption keys (and other secrets) are kept either in a key file on the client +('keyfile' mode) or in the repository config on the server ('repokey' mode). +In both cases, the secrets are generated from random and then encrypted by a +key derived from your passphrase (this happens on the client before the key +is stored into the keyfile or as repokey). + The passphrase is passed through the ``BORG_PASSPHRASE`` environment variable or prompted for interactive usage. @@ -354,7 +355,7 @@ Key files --------- When initialized with the ``init -e keyfile`` command, |project_name| -needs an associated file in ``$HOME/.borg/keys`` to read and write +needs an associated file in ``$HOME/.config/borg/keys`` to read and write the repository. The format is based on msgpack_, base64 encoding and PBKDF2_ SHA256 hashing, which is then encoded again in a msgpack_. diff --git a/docs/misc/create_chunker-params.txt b/docs/misc/create_chunker-params.txt index 73cac6a3..3e322b66 100644 --- a/docs/misc/create_chunker-params.txt +++ b/docs/misc/create_chunker-params.txt @@ -6,7 +6,7 @@ About borg create --chunker-params CHUNK_MIN_EXP and CHUNK_MAX_EXP give the exponent N of the 2^N minimum and maximum chunk size. Required: CHUNK_MIN_EXP < CHUNK_MAX_EXP. -Defaults: 10 (2^10 == 1KiB) minimum, 23 (2^23 == 8MiB) maximum. +Defaults: 19 (2^19 == 512KiB) minimum, 23 (2^23 == 8MiB) maximum. HASH_MASK_BITS is the number of least-significant bits of the rolling hash that need to be zero to trigger a chunk cut. @@ -14,7 +14,7 @@ Recommended: CHUNK_MIN_EXP + X <= HASH_MASK_BITS <= CHUNK_MAX_EXP - X, X >= 2 (this allows the rolling hash some freedom to make its cut at a place determined by the windows contents rather than the min/max. chunk size). -Default: 16 (statistically, chunks will be about 2^16 == 64kiB in size) +Default: 21 (statistically, chunks will be about 2^21 == 2MiB in size) HASH_WINDOW_SIZE: the size of the window used for the rolling hash computation. Default: 4095B diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 19ac429b..3793b0bb 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -8,6 +8,30 @@ This chapter will get you started with |project_name|. The first section presents a simple step by step example that uses |project_name| to backup data. The next section continues by showing how backups can be automated. +Important note about free space +------------------------------- + +Before you start creating backups, please make sure that there is **always** +a good amount of free space on the filesystem that has your backup repository +(and also on ~/.cache). It is hard to tell how much, maybe 1-5%. + +If you run out of disk space, it can be hard or impossible to free space, +because |project_name| needs free space to operate - even to delete backup +archives. There is a ``--save-space`` option for some commands, but even with +that |project_name| will need free space to operate. + +You can use some monitoring process or just include the free space information +in your backup log files (you check them regularly anyway, right?). + +Also helpful: + +- create a big file as a "space reserve", that you can delete to free space +- if you use LVM: use a LV + a filesystem that you can resize later and have + some unallocated PEs you can add to the LV. +- consider using quotas +- use `prune` regularly + + A step by step example ---------------------- @@ -22,7 +46,7 @@ A step by step example 3. The next day create a new archive called *Tuesday*:: - $ borg create --stats /mnt/backup::Tuesday ~/src ~/Documents + $ borg create -v --stats /mnt/backup::Tuesday ~/src ~/Documents This backup will be a lot quicker and a lot smaller since only new never before seen data is stored. The ``--stats`` option causes |project_name| to @@ -77,11 +101,11 @@ certain number of old archives:: # Backup all of /home and /var/www except a few # excluded directories - borg create --stats \ + borg create -v --stats \ $REPOSITORY::`hostname`-`date +%Y-%m-%d` \ /home \ /var/www \ - --exclude /home/*/.cache \ + --exclude '/home/*/.cache' \ --exclude /home/Ben/Music/Justin\ Bieber \ --exclude '*.pyc' @@ -122,26 +146,28 @@ Keep an eye on CPU load and throughput. Repository encryption --------------------- -Repository encryption is enabled at repository creation time:: +Repository encryption can be enabled or disabled at repository creation time +(the default is enabled, with `repokey` method):: - $ borg init --encryption=repokey|keyfile PATH + $ borg init --encryption=none|repokey|keyfile PATH When repository encryption is enabled all data is encrypted using 256-bit AES_ encryption and the integrity and authenticity is verified using `HMAC-SHA256`_. -All data is encrypted before being written to the repository. This means that -an attacker who manages to compromise the host containing an encrypted -archive will not be able to access any of the data. +All data is encrypted on the client before being written to the repository. This +means that an attacker who manages to compromise the host containing an +encrypted archive will not be able to access any of the data, even as the backup +is being made. |project_name| supports different methods to store the AES and HMAC keys. ``repokey`` mode The key is stored inside the repository (in its "config" file). Use this mode if you trust in your good passphrase giving you enough - protection. + protection. The repository server never sees the plaintext key. ``keyfile`` mode - The key is stored on your local disk (in ``~/.borg/keys/``). + The key is stored on your local disk (in ``~/.config/borg/keys/``). Use this mode if you want "passphrase and having-the-key" security. In both modes, the key is stored in encrypted form and can be only decrypted @@ -154,7 +180,7 @@ For automated backups the passphrase can be specified using the :ref:`this note about password environments ` for more information. -.. important:: The repository data is totally inaccessible without the key:** +.. warning:: The repository data is totally inaccessible without the key: Make a backup copy of the key file (``keyfile`` mode) or repo config file (``repokey`` mode) and keep it at a safe place, so you still have the key in case it gets corrupted or lost. @@ -178,11 +204,10 @@ or:: Remote operations over SSH can be automated with SSH keys. You can restrict the use of the SSH keypair by prepending a forced command to the SSH public key in -the remote server's authorized_keys file. Only the forced command will be run -when the key authenticates a connection. This example will start |project_name| in server -mode, and limit the |project_name| server to a specific filesystem path:: +the remote server's `authorized_keys` file. This example will start |project_name| +in server mode and limit it to a specific filesystem path:: - command="borg serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] + command="borg serve --restrict-to-path /mnt/backup",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-user-rc ssh-rsa AAAAB3[...] If it is not possible to install |project_name| on the remote host, it is still possible to use the remote host to store a repository by diff --git a/docs/resources.rst b/docs/resources.rst new file mode 100644 index 00000000..4113c11d --- /dev/null +++ b/docs/resources.rst @@ -0,0 +1,41 @@ +.. include:: global.rst.inc +.. _resources: + +Resources +========= + +This is a collection of additional resources that are somehow related to +borgbackup. + + +Videos, Talks, Presentations +---------------------------- + +Some of them refer to attic, but you can do the same stuff (and more) with borgbackup. + +- `BorgBackup Installation and Basic Usage `_ (english screencast) + +- `TW's slides for borgbackup talks / lightning talks `_ (just grab the latest ones) + +- "Attic / Borg Backup" talk from GPN 2015 (video, german audio, english slides): + `media.ccc.de `_ + or + `youtube `_ + +- "Attic" talk from Easterhegg 2015 (video, german audio, english slides): + `media.ccc.de `_ + or + `youtube `_ + +- "Attic Backup: Mount your encrypted backups over ssh", 2014 (video, english): + `youtube `_ + +- "Evolution of Borg", Oct 2015 (gource visualization of attic and borg development): + `youtube `_ + +Software +-------- + +- `BorgWeb - a very simple web UI for BorgBackup `_ +- some other stuff found at the `BorgBackup Github organisation `_ +- `atticmatic `_ (includes borgmatic) diff --git a/docs/support.rst b/docs/support.rst index e5986267..1547c666 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -4,8 +4,8 @@ Support ======= -Please first read the docs and existing issue tracker issues and mailing -list posts, a lot of stuff is already documented / explained / discussed / +Please first read the docs, the existing issue tracker issues and mailing +list posts -- a lot of stuff is already documented / explained / discussed / filed there. Issue Tracker @@ -26,15 +26,10 @@ Stay connected. Mailing list ------------ -There is a mailing list for Borg on librelist_ that you can use for feature -requests and general discussions about Borg. A mailing list archive is -available `here `_. - -To subscribe to the list, send an email to borgbackup@librelist.com and reply -to the confirmation mail. - -To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply -to the confirmation mail. +To find out about the mailing list, its topic, how to subscribe, how to +unsubscribe and where you can find the archives of the list, see the +`mailing list homepage +`_. Bounties and Fundraisers ------------------------ diff --git a/docs/usage.rst b/docs/usage.rst index 6b88d5c6..519834d7 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -8,15 +8,33 @@ Usage a number of arguments and options. The following sections will describe each command in detail. -Quiet by default ----------------- +General +------- -Like most UNIX commands |project_name| is quiet by default but the ``-v`` or -``--verbose`` option can be used to get the program to output more status -messages as it is processing. +Type of log output +~~~~~~~~~~~~~~~~~~ + +The log level of the builtin logging configuration defaults to WARNING. +This is because we want |project_name| to be mostly silent and only output +warnings (plus errors and critical messages). + +Use ``--verbose`` or ``--info`` to set INFO (you will get informative output then +additionally to warnings, errors, critical messages). +Use ``--debug`` to set DEBUG to get output made for debugging. + +All log messages created with at least the set level will be output. + +Log levels: DEBUG < INFO < WARNING < ERROR < CRITICAL + +While you can set misc. log levels, do not expect that every command will +give different output on different log levels - it's just a possibility. + +.. warning:: While some options (like ``--stats`` or ``--list``) will emit more +informational messages, you have to use INFO (or lower) log level to make +them show up in log output. Use ``-v`` or a logging configuration. Return codes ------------- +~~~~~~~~~~~~ |project_name| can exit with the following return codes (rc): @@ -33,7 +51,7 @@ The return code is also logged at the indicated level as the last log entry. Environment Variables ---------------------- +~~~~~~~~~~~~~~~~~~~~~ |project_name| uses some environment variables for automation: @@ -44,28 +62,30 @@ General: can either leave it away or abbreviate as `::`, if a positional parameter is required. BORG_PASSPHRASE When set, use the value to answer the passphrase question for encrypted repositories. + BORG_LOGGING_CONF + When set, use the given filename as INI_-style logging configuration. BORG_RSH When set, use this command instead of ``ssh``. TMPDIR where temporary files are stored (might need a lot of temporary space for some operations) -Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning): - BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK +Some automatic "answerers" (if set, they automatically answer confirmation questions): + BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=no (or =yes) For "Warning: Attempting to access a previously unknown unencrypted repository" - BORG_RELOCATED_REPO_ACCESS_IS_OK + BORG_RELOCATED_REPO_ACCESS_IS_OK=no (or =yes) For "Warning: The repository at location ... was previously located at ..." - BORG_CHECK_I_KNOW_WHAT_I_AM_DOING + BORG_CHECK_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) For "Warning: 'check --repair' is an experimental feature that might result in data loss." - BORG_CYTHON_DISABLE - Disables the loading of Cython modules. This is currently - experimental and is used only to generate usage docs at build - time. It is unlikely to produce good results on a regular - run. The variable should be set to the name of the calling class, and - should be unique across all of borg. It is currently only used by ``build_usage``. + BORG_DELETE_I_KNOW_WHAT_I_AM_DOING=NO (or =YES) + For "You requested to completely DELETE the repository *including* all archives it contains:" + + Note: answers are case sensitive. setting an invalid answer value might either give the default + answer or ask you interactively, depending on whether retries are allowed (they by default are + allowed). So please test your scripts interactively before making them a non-interactive script. Directories: BORG_KEYS_DIR - Default to '~/.borg/keys'. This directory contains keys for encrypted repositories. + Default to '~/.config/borg/keys'. This directory contains keys for encrypted repositories. BORG_CACHE_DIR Default to '~/.cache/borg'. This directory contains the local cache and might need a lot of space for dealing with big repositories). @@ -84,8 +104,10 @@ Please note: (e.g. mode 600, root:root). +.. _INI: https://docs.python.org/3.4/library/logging.config.html#configuration-file-format + Resource Usage --------------- +~~~~~~~~~~~~~~ |project_name| might use a lot of resources depending on the size of the data set it is dealing with. @@ -131,7 +153,7 @@ In case you are interested in more details, please read the internals documentat Units ------ +~~~~~ To display quantities, |project_name| takes care of respecting the usual conventions of scale. Disk sizes are displayed in `decimal @@ -143,6 +165,14 @@ indicated using the `IEC binary prefixes using powers of two (so ``KiB`` means 1024 bytes). +Date and Time +~~~~~~~~~~~~~ + +We format date and time conforming to ISO-8601, that is: YYYY-MM-DD and HH:MM:SS + +For more information, see: https://xkcd.com/1179/ + + .. include:: usage/init.rst.inc Examples @@ -168,19 +198,14 @@ an attacker has access to your backup repository. But be careful with the key / the passphrase: -``--encryption=passphrase`` is DEPRECATED and will be removed in next major release. -This mode has very fundamental, unfixable problems (like you can never change -your passphrase or the pbkdf2 iteration count for an existing repository, because -the encryption / decryption key is directly derived from the passphrase). - -If you want "passphrase-only" security, just use the ``repokey`` mode. The key will +If you want "passphrase-only" security, use the ``repokey`` mode. The key will be stored inside the repository (in its "config" file). In above mentioned attack scenario, the attacker will have the key (but not the passphrase). If you want "passphrase and having-the-key" security, use the ``keyfile`` mode. -The key will be stored in your home directory (in ``.borg/keys``). In the attack -scenario, the attacker who has just access to your repo won't have the key (and -also not the passphrase). +The key will be stored in your home directory (in ``.config/borg/keys``). In +the attack scenario, the attacker who has just access to your repo won't have +the key (and also not the passphrase). Make a backup copy of the key file (``keyfile`` mode) or repo config file (``repokey`` mode) and keep it at a safe place, so you still have the key in @@ -190,8 +215,10 @@ The backup that is encrypted with that key won't help you with that, of course. Make sure you use a good passphrase. Not too short, not too simple. The real encryption / decryption key is encrypted with / locked by your passphrase. If an attacker gets your key, he can't unlock and use it without knowing the -passphrase. In ``repokey`` and ``keyfile`` modes, you can change your passphrase -for existing repos. +passphrase. + +You can change your passphrase for existing repos at any time, it won't affect +the encryption/decryption key or other secrets. .. include:: usage/create.rst.inc @@ -203,19 +230,33 @@ Examples # Backup ~/Documents into an archive named "my-documents" $ borg create /mnt/backup::my-documents ~/Documents + # same, but verbosely list all files as we process them + $ borg create -v --list /mnt/backup::my-documents ~/Documents + # Backup ~/Documents and ~/src but exclude pyc files $ borg create /mnt/backup::my-files \ ~/Documents \ ~/src \ --exclude '*.pyc' + # Backup home directories excluding image thumbnails (i.e. only + # /home/*/.thumbnails is excluded, not /home/*/*/.thumbnails) + $ borg create /mnt/backup::my-files /home \ + --exclude 're:^/home/[^/]+/\.thumbnails/' + + # Do the same using a shell-style pattern + $ borg create /mnt/backup::my-files /home \ + --exclude 'sh:/home/*/.thumbnails' + # Backup the root filesystem into an archive named "root-YYYY-MM-DD" # use zlib compression (good, but slow) - default is no compression NAME="root-`date +%Y-%m-%d`" - $ borg create -C zlib,6 /mnt/backup::$NAME / --do-not-cross-mountpoints + $ borg create -C zlib,6 /mnt/backup::$NAME / --one-file-system - # Backup huge files with little chunk management overhead - $ borg create --chunker-params 19,23,21,4095 /mnt/backup::VMs /srv/VMs + # Make a big effort in fine granular deduplication (big chunk management + # overhead, needs a lot of RAM and disk space, see formula in internals + # docs - same parameters as borg < 1.0 or attic): + $ borg create --chunker-params 10,23,16,4095 /mnt/backup::small /smallstuff # Backup a raw device (must not be active/in use/mounted at that time) $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda - @@ -242,7 +283,7 @@ Examples $ borg extract /mnt/backup::my-files # Extract entire archive and list files while processing - $ borg extract -v /mnt/backup::my-files + $ borg extract -v --list /mnt/backup::my-files # Extract the "src" directory $ borg extract /mnt/backup::my-files home/USERNAME/src @@ -257,6 +298,19 @@ Note: currently, extract always writes into the current working directory ("."), .. include:: usage/rename.rst.inc +Examples +~~~~~~~~ +:: + + $ borg create /mnt/backup::archivename ~ + $ borg list /mnt/backup + archivename Mon Nov 2 20:40:06 2015 + + $ borg rename /mnt/backup::archivename newname + $ borg list /mnt/backup + newname Mon Nov 2 20:40:06 2015 + + .. include:: usage/delete.rst.inc .. include:: usage/list.rst.inc @@ -290,29 +344,29 @@ Be careful, prune is potentially dangerous command, it will remove backup archives. The default of prune is to apply to **all archives in the repository** unless -you restrict its operation to a subset of the archives using `--prefix`. -When using --prefix, be careful to choose a good prefix - e.g. do not use a +you restrict its operation to a subset of the archives using ``--prefix``. +When using ``--prefix``, be careful to choose a good prefix - e.g. do not use a prefix "foo" if you do not also want to match "foobar". -It is strongly recommended to always run `prune --dry-run ...` first so you +It is strongly recommended to always run ``prune --dry-run ...`` first so you will see what it would do without it actually doing anything. :: # Keep 7 end of day and 4 additional end of week archives. # Do a dry-run without actually deleting anything. - $ borg prune /mnt/backup --dry-run --keep-daily=7 --keep-weekly=4 + $ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /mnt/backup # Same as above but only apply to archive names starting with "foo": - $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --prefix=foo + $ borg prune --keep-daily=7 --keep-weekly=4 --prefix=foo /mnt/backup # Keep 7 end of day, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 + $ borg prune --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /mnt/backup # Keep all backups in the last 10 days, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune /mnt/backup --keep-within=10d --keep-weekly=4 --keep-monthly=-1 + $ borg prune --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /mnt/backup .. include:: usage/info.rst.inc @@ -327,7 +381,7 @@ Examples Hostname: myhostname Username: root Time: Fri Aug 2 15:18:17 2013 - Command line: /usr/bin/borg create --stats -C zlib,6 /mnt/backup::root-2013-08-02 / --do-not-cross-mountpoints + Command line: /usr/bin/borg create --stats -C zlib,6 /mnt/backup::root-2013-08-02 / --one-file-system Number of files: 147429 Original size: 5344169493 (4.98 GB) Compressed size: 1748189642 (1.63 GB) @@ -357,27 +411,46 @@ Examples Initializing repository at "/mnt/backup" Enter passphrase (empty for no passphrase): Enter same passphrase again: - Key file "/home/USER/.borg/keys/mnt_backup" created. + Key file "/home/USER/.config/borg/keys/mnt_backup" created. Keep this file safe. Your data will be inaccessible without it. # Change key file passphrase $ borg change-passphrase /mnt/backup - Enter passphrase for key file /home/USER/.borg/keys/mnt_backup: + Enter passphrase for key file /home/USER/.config/borg/keys/mnt_backup: New passphrase: Enter same passphrase again: - Key file "/home/USER/.borg/keys/mnt_backup" updated + Key file "/home/USER/.config/borg/keys/mnt_backup" updated .. include:: usage/serve.rst.inc Examples ~~~~~~~~ + +borg serve has special support for ssh forced commands (see ``authorized_keys`` +example below): it will detect that you use such a forced command and extract +the value of the ``--restrict-to-path`` option(s). +It will then parse the original command that came from the client, makes sure +that it is also ``borg serve`` and enforce path restriction(s) as given by the +forced command. That way, other options given by the client (like ``--info`` or +``--umask``) are preserved (and are not fixed by the forced command). + :: - # Allow an SSH keypair to only run |project_name|, and only have access to /mnt/backup. + # Allow an SSH keypair to only run borg, and only have access to /mnt/backup. + # Use key options to disable unneeded and potentially dangerous SSH functionality. # This will help to secure an automated remote backup system. $ cat ~/.ssh/authorized_keys - command="borg serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] + command="borg serve --restrict-to-path /mnt/backup",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-user-rc ssh-rsa AAAAB3[...] + + +.. include:: usage/upgrade.rst.inc + +Examples +~~~~~~~~ +:: + + borg upgrade -v /mnt/backup Miscellaneous Help @@ -386,11 +459,103 @@ Miscellaneous Help .. include:: usage/help.rst.inc +Debug Commands +-------------- +There are some more commands (all starting with "debug-") which are all +**not intended for normal use** and **potentially very dangerous** if used incorrectly. + +They exist to improve debugging capabilities without direct system access, e.g. +in case you ever run into some severe malfunction. Use them only if you know +what you are doing or if a trusted |project_name| developer tells you what to do. + + Additional Notes ---------------- Here are misc. notes about topics that are maybe not covered in enough detail in the usage section. +Item flags +~~~~~~~~~~ + +``borg create -v --list`` outputs a verbose list of all files, directories and other +file system items it considered (no matter whether they had content changes +or not). For each item, it prefixes a single-letter flag that indicates type +and/or status of the item. + +If you are interested only in a subset of that output, you can give e.g. +``--filter=AME`` and it will only show regular files with A, M or E status (see +below). + +A uppercase character represents the status of a regular file relative to the +"files" cache (not relative to the repo -- this is an issue if the files cache +is not used). Metadata is stored in any case and for 'A' and 'M' also new data +chunks are stored. For 'U' all data chunks refer to already existing chunks. + +- 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ) +- 'M' = regular file, modified +- 'U' = regular file, unchanged +- 'E' = regular file, an error happened while accessing/reading *this* file + +A lowercase character means a file type other than a regular file, +borg usually just stores their metadata: + +- 'd' = directory +- 'b' = block device +- 'c' = char device +- 'h' = regular file, hardlink (to already seen inodes) +- 's' = symlink +- 'f' = fifo + +Other flags used include: + +- 'i' = backup data was read from standard input (stdin) +- '-' = dry run, item was *not* backed up +- '?' = missing status code (if you see this, please file a bug report!) + + +--chunker-params +~~~~~~~~~~~~~~~~ +The chunker params influence how input files are cut into pieces (chunks) +which are then considered for deduplication. They also have a big impact on +resource usage (RAM and disk space) as the amount of resources needed is +(also) determined by the total amount of chunks in the repository (see +`Indexes / Caches memory usage` for details). + +``--chunker-params=10,23,16,4095`` results in a fine-grained deduplication +and creates a big amount of chunks and thus uses a lot of resources to manage +them. This is good for relatively small data volumes and if the machine has a +good amount of free RAM and disk space. + +``--chunker-params=19,23,21,4095`` (default) results in a coarse-grained +deduplication and creates a much smaller amount of chunks and thus uses less +resources. This is good for relatively big data volumes and if the machine has +a relatively low amount of free RAM and disk space. + +If you already have made some archives in a repository and you then change +chunker params, this of course impacts deduplication as the chunks will be +cut differently. + +In the worst case (all files are big and were touched in between backups), this +will store all content into the repository again. + +Usually, it is not that bad though: + +- usually most files are not touched, so it will just re-use the old chunks + it already has in the repo +- files smaller than the (both old and new) minimum chunksize result in only + one chunk anyway, so the resulting chunks are same and deduplication will apply + +If you switch chunker params to save resources for an existing repo that +already has some backup archives, you will see an increasing effect over time, +when more and more files have been touched and stored again using the bigger +chunksize **and** all references to the smaller older chunks have been removed +(by deleting / pruning archives). + +If you want to see an immediate big effect on resource usage, you better start +a new repository when changing chunker params. + +For more details, see :ref:`chunker_details`. + --read-special ~~~~~~~~~~~~~~ @@ -412,7 +577,7 @@ You need to be careful with what you give as filename when using ``--read-specia e.g. if you give ``/dev/zero``, your backup will never terminate. The given files' metadata is saved as it would be saved without -``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) - but +``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but additionally, also the content read from it will be saved for it. Restoring such files' content is currently only supported one at a time via @@ -421,7 +586,7 @@ maybe directly into an existing device file of your choice or indirectly via ``dd``). Example -~~~~~~~ ++++++++ Imagine you have made some snapshots of logical volumes (LVs) you want to backup. diff --git a/docs/usage/break-lock.rst.inc b/docs/usage/break-lock.rst.inc new file mode 100644 index 00000000..d59b1dc0 --- /dev/null +++ b/docs/usage/break-lock.rst.inc @@ -0,0 +1,34 @@ +.. _borg_break-lock: + +borg break-lock +--------------- +:: + + usage: borg break-lock [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] + REPOSITORY + + Break the repository lock (e.g. in case it was left by a dead borg. + + positional arguments: + REPOSITORY repository for which to break the locks + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + +Description +~~~~~~~~~~~ + +This command breaks the repository and cache locks. +Please use carefully and only while no borg process (on any machine) is +trying to access the Cache or the Repository. diff --git a/docs/usage/change-passphrase.rst.inc b/docs/usage/change-passphrase.rst.inc new file mode 100644 index 00000000..eb52399c --- /dev/null +++ b/docs/usage/change-passphrase.rst.inc @@ -0,0 +1,34 @@ +.. _borg_change-passphrase: + +borg change-passphrase +---------------------- +:: + + usage: borg change-passphrase [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] + [--remote-path PATH] + [REPOSITORY] + + Change repository key file passphrase + + positional arguments: + REPOSITORY + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + +Description +~~~~~~~~~~~ + +The key files used for repository encryption are optionally passphrase +protected. This command can be used to change this passphrase. diff --git a/docs/usage/check.rst.inc b/docs/usage/check.rst.inc new file mode 100644 index 00000000..020881e4 --- /dev/null +++ b/docs/usage/check.rst.inc @@ -0,0 +1,75 @@ +.. _borg_check: + +borg check +---------- +:: + + usage: borg check [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] + [--repository-only] [--archives-only] [--repair] + [--save-space] [--last N] [-p PREFIX] + [REPOSITORY_OR_ARCHIVE] + + Check repository consistency + + positional arguments: + REPOSITORY_OR_ARCHIVE + repository or archive to check consistency of + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + --repository-only only perform repository checks + --archives-only only perform archives checks + --repair attempt to repair any inconsistencies found + --save-space work slower, but using less space + --last N only check last N archives (Default: all) + -p PREFIX, --prefix PREFIX + only consider archive names starting with this prefix + +Description +~~~~~~~~~~~ + +The check command verifies the consistency of a repository and the corresponding archives. + +First, the underlying repository data files are checked: + +- For all segments the segment magic (header) is checked +- For all objects stored in the segments, all metadata (e.g. crc and size) and + all data is read. The read data is checked by size and CRC. Bit rot and other + types of accidental damage can be detected this way. +- If we are in repair mode and a integrity error is detected for a segment, + we try to recover as many objects from the segment as possible. +- In repair mode, it makes sure that the index is consistent with the data + stored in the segments. +- If you use a remote repo server via ssh:, the repo check is executed on the + repo server without causing significant network traffic. +- The repository check can be skipped using the --archives-only option. + +Second, the consistency and correctness of the archive metadata is verified: + +- Is the repo manifest present? If not, it is rebuilt from archive metadata + chunks (this requires reading and decrypting of all metadata and data). +- Check if archive metadata chunk is present. if not, remove archive from + manifest. +- For all files (items) in the archive, for all chunks referenced by these + files, check if chunk is present (if not and we are in repair mode, replace + it with a same-size chunk of zeros). This requires reading of archive and + file metadata, but not data. +- If we are in repair mode and we checked all the archives: delete orphaned + chunks from the repo. +- if you use a remote repo server via ssh:, the archive check is executed on + the client machine (because if encryption is enabled, the checks will require + decryption and this is always done client-side, because key access will be + required). +- The archive checks can be time consuming, they can be skipped using the + --repository-only option. diff --git a/docs/usage/create.rst.inc b/docs/usage/create.rst.inc new file mode 100644 index 00000000..273ba969 --- /dev/null +++ b/docs/usage/create.rst.inc @@ -0,0 +1,80 @@ +.. _borg_create: + +borg create +----------- +:: + + usage: borg create [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] [-s] + [-p] [--list] [--filter STATUSCHARS] [-e PATTERN] + [--exclude-from EXCLUDEFILE] [--exclude-caches] + [--exclude-if-present FILENAME] [--keep-tag-files] + [-c SECONDS] [-x] [--numeric-owner] + [--timestamp yyyy-mm-ddThh:mm:ss] + [--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE] + [-C COMPRESSION] [--read-special] [-n] + ARCHIVE PATH [PATH ...] + + Create new archive + + positional arguments: + ARCHIVE name of archive to create (must be also a valid + directory name) + PATH paths to archive + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + -s, --stats print statistics for the created archive + -p, --progress show progress display while creating the archive, + showing Original, Compressed and Deduplicated sizes, + followed by the Number of files seen and the path + being processed, default: False + --list output verbose list of items (files, dirs, ...) + --filter STATUSCHARS only display items with the given status characters + -e PATTERN, --exclude PATTERN + exclude paths matching PATTERN + --exclude-from EXCLUDEFILE + read exclude patterns from EXCLUDEFILE, one per line + --exclude-caches exclude directories that contain a CACHEDIR.TAG file + (http://www.brynosaurus.com/cachedir/spec.html) + --exclude-if-present FILENAME + exclude directories that contain the specified file + --keep-tag-files keep tag files of excluded caches/directories + -c SECONDS, --checkpoint-interval SECONDS + write checkpoint every SECONDS seconds (Default: 300) + -x, --one-file-system + stay in same file system, do not cross mount points + --numeric-owner only store numeric user and group identifiers + --timestamp yyyy-mm-ddThh:mm:ss + manually specify the archive creation date/time (UTC). + alternatively, give a reference file/directory. + --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE + specify the chunker parameters. default: 10,23,16,4095 + -C COMPRESSION, --compression COMPRESSION + select compression algorithm (and level): none == no + compression (default), lz4 == lz4, zlib == zlib + (default level 6), zlib,0 .. zlib,9 == zlib (with + level 0..9), lzma == lzma (default level 6), lzma,0 .. + lzma,9 == lzma (with level 0..9). + --read-special open and read special files as if they were regular + files + -n, --dry-run do not create a backup archive + +Description +~~~~~~~~~~~ + +This command creates a backup archive containing all files found while recursively +traversing all paths specified. The archive will consume almost no disk space for +files or parts of files that have already been stored in other archives. + +See the output of the "borg help patterns" command for more help on exclude patterns. diff --git a/docs/usage/debug-delete-obj.rst.inc b/docs/usage/debug-delete-obj.rst.inc new file mode 100644 index 00000000..b02d7b72 --- /dev/null +++ b/docs/usage/debug-delete-obj.rst.inc @@ -0,0 +1,34 @@ +.. _borg_debug-delete-obj: + +borg debug-delete-obj +--------------------- +:: + + usage: borg debug-delete-obj [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] + [--remote-path PATH] + [REPOSITORY] IDs [IDs ...] + + delete the objects with the given IDs from the repo + + positional arguments: + REPOSITORY repository to use + IDs hex object ID(s) to delete from the repo + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + +Description +~~~~~~~~~~~ + +This command deletes objects from the repository. diff --git a/docs/usage/debug-dump-archive-items.rst.inc b/docs/usage/debug-dump-archive-items.rst.inc new file mode 100644 index 00000000..9265f2c0 --- /dev/null +++ b/docs/usage/debug-dump-archive-items.rst.inc @@ -0,0 +1,33 @@ +.. _borg_debug-dump-archive-items: + +borg debug-dump-archive-items +----------------------------- +:: + + usage: borg debug-dump-archive-items [-h] [-v] [--debug] [--lock-wait N] + [--show-rc] [--no-files-cache] + [--umask M] [--remote-path PATH] + ARCHIVE + + dump (decrypted, decompressed) archive items metadata (not: data) + + positional arguments: + ARCHIVE archive to dump + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + +Description +~~~~~~~~~~~ + +This command dumps raw (but decrypted and decompressed) archive items (only metadata) to files. diff --git a/docs/usage/debug-get-obj.rst.inc b/docs/usage/debug-get-obj.rst.inc new file mode 100644 index 00000000..f3213152 --- /dev/null +++ b/docs/usage/debug-get-obj.rst.inc @@ -0,0 +1,34 @@ +.. _borg_debug-get-obj: + +borg debug-get-obj +------------------ +:: + + usage: borg debug-get-obj [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] + [REPOSITORY] ID PATH + + get object contents from the repository and write it into file + + positional arguments: + REPOSITORY repository to use + ID hex object ID to get from the repo + PATH file to write object data into + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + +Description +~~~~~~~~~~~ + +This command gets an object from the repository. diff --git a/docs/usage/debug-put-obj.rst.inc b/docs/usage/debug-put-obj.rst.inc new file mode 100644 index 00000000..44767c27 --- /dev/null +++ b/docs/usage/debug-put-obj.rst.inc @@ -0,0 +1,33 @@ +.. _borg_debug-put-obj: + +borg debug-put-obj +------------------ +:: + + usage: borg debug-put-obj [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] + [REPOSITORY] PATH [PATH ...] + + put file(s) contents into the repository + + positional arguments: + REPOSITORY repository to use + PATH file(s) to read and create object(s) from + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + +Description +~~~~~~~~~~~ + +This command puts objects into the repository. diff --git a/docs/usage/delete.rst.inc b/docs/usage/delete.rst.inc new file mode 100644 index 00000000..a278cc92 --- /dev/null +++ b/docs/usage/delete.rst.inc @@ -0,0 +1,39 @@ +.. _borg_delete: + +borg delete +----------- +:: + + usage: borg delete [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] [-p] + [-s] [-c] [--save-space] + [TARGET] + + Delete an existing repository or archive + + positional arguments: + TARGET archive or repository to delete + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + -p, --progress show progress display while deleting a single archive + -s, --stats print statistics for the deleted archive + -c, --cache-only delete only the local cache for the given repository + --save-space work slower, but using less space + +Description +~~~~~~~~~~~ + +This command deletes an archive from the repository or the complete repository. +Disk space is reclaimed accordingly. If you delete the complete repository, the +local cache for it (if any) is also deleted. diff --git a/docs/usage/extract.rst.inc b/docs/usage/extract.rst.inc new file mode 100644 index 00000000..9f2924fc --- /dev/null +++ b/docs/usage/extract.rst.inc @@ -0,0 +1,54 @@ +.. _borg_extract: + +borg extract +------------ +:: + + usage: borg extract [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] [-n] + [-e PATTERN] [--exclude-from EXCLUDEFILE] + [--numeric-owner] [--strip-components NUMBER] [--stdout] + [--sparse] + ARCHIVE [PATH [PATH ...]] + + Extract archive contents + + positional arguments: + ARCHIVE archive to extract + PATH paths to extract; patterns are supported + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + -n, --dry-run do not actually change any files + -e PATTERN, --exclude PATTERN + exclude paths matching PATTERN + --exclude-from EXCLUDEFILE + read exclude patterns from EXCLUDEFILE, one per line + --numeric-owner only obey numeric user and group identifiers + --strip-components NUMBER + Remove the specified number of leading path elements. + Pathnames with fewer elements will be silently + skipped. + --stdout write all extracted data to stdout + --sparse create holes in output sparse file from all-zero + chunks + +Description +~~~~~~~~~~~ + +This command extracts the contents of an archive. By default the entire +archive is extracted but a subset of files and directories can be selected +by passing a list of ``PATHs`` as arguments. The file selection can further +be restricted by using the ``--exclude`` option. + +See the output of the "borg help patterns" command for more help on exclude patterns. diff --git a/docs/usage/help.rst.inc b/docs/usage/help.rst.inc new file mode 100644 index 00000000..b7ea093b --- /dev/null +++ b/docs/usage/help.rst.inc @@ -0,0 +1,92 @@ +.. _borg_patterns: + +borg help patterns +~~~~~~~~~~~~~~~~~~ +:: + + +Exclusion patterns support four separate styles, fnmatch, shell, regular +expressions and path prefixes. If followed by a colon (':') the first two +characters of a pattern are used as a style selector. Explicit style +selection is necessary when a non-default style is desired or when the +desired pattern starts with two alphanumeric characters followed by a colon +(i.e. `aa:something/*`). + +`Fnmatch `_, selector `fm:` + + These patterns use a variant of shell pattern syntax, with '*' matching + any number of characters, '?' matching any single character, '[...]' + matching any single character specified, including ranges, and '[!...]' + matching any character not specified. For the purpose of these patterns, + the path separator ('\' for Windows and '/' on other systems) is not + treated specially. Wrap meta-characters in brackets for a literal match + (i.e. `[?]` to match the literal character `?`). For a path to match + a pattern, it must completely match from start to end, or must match from + the start to just before a path separator. Except for the root path, + paths will never end in the path separator when matching is attempted. + Thus, if a given pattern ends in a path separator, a '*' is appended + before matching is attempted. + +Shell-style patterns, selector `sh:` + + Like fnmatch patterns these are similar to shell patterns. The difference + is that the pattern may include `**/` for matching zero or more directory + levels, `*` for matching zero or more arbitrary characters with the + exception of any path separator. + +Regular expressions, selector `re:` + + Regular expressions similar to those found in Perl are supported. Unlike + shell patterns regular expressions are not required to match the complete + path and any substring match is sufficient. It is strongly recommended to + anchor patterns to the start ('^'), to the end ('$') or both. Path + separators ('\' for Windows and '/' on other systems) in paths are + always normalized to a forward slash ('/') before applying a pattern. The + regular expression syntax is described in the `Python documentation for + the re module `_. + +Prefix path, selector `pp:` + + This pattern style is useful to match whole sub-directories. The pattern + `pp:/data/bar` matches `/data/bar` and everything therein. + +Exclusions can be passed via the command line option `--exclude`. When used +from within a shell the patterns should be quoted to protect them from +expansion. + +The `--exclude-from` option permits loading exclusion patterns from a text +file with one pattern per line. Lines empty or starting with the number sign +('#') after removing whitespace on both ends are ignored. The optional style +selector prefix is also supported for patterns loaded from a file. Due to +whitespace removal paths with whitespace at the beginning or end can only be +excluded using regular expressions. + +Examples: + +# Exclude '/home/user/file.o' but not '/home/user/file.odt': +$ borg create -e '*.o' backup / + +# Exclude '/home/user/junk' and '/home/user/subdir/junk' but +# not '/home/user/importantjunk' or '/etc/junk': +$ borg create -e '/home/*/junk' backup / + +# Exclude the contents of '/home/user/cache' but not the directory itself: +$ borg create -e /home/user/cache/ backup / + +# The file '/home/user/cache/important' is *not* backed up: +$ borg create -e /home/user/cache/ backup / /home/user/cache/important + +# The contents of directories in '/home' are not backed up when their name +# ends in '.tmp' +$ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / + +# Load exclusions from file +$ cat >exclude.txt < repokey + + positional arguments: + REPOSITORY + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + +Description +~~~~~~~~~~~ + +This command migrates a repository from passphrase mode (not supported any +more) to repokey mode. + +You will be first asked for the repository passphrase (to open it in passphrase +mode). This is the same passphrase as you used to use for this repo before 1.0. + +It will then derive the different secrets from this passphrase. + +Then you will be asked for a new passphrase (twice, for safety). This +passphrase will be used to protect the repokey (which contains these same +secrets in encrypted form). You may use the same passphrase as you used to +use, but you may also use a different one. + +After migrating to repokey mode, you can change the passphrase at any time. +But please note: the secrets will always stay the same and they could always +be derived from your (old) passphrase-mode passphrase. diff --git a/docs/usage/mount.rst.inc b/docs/usage/mount.rst.inc new file mode 100644 index 00000000..380df549 --- /dev/null +++ b/docs/usage/mount.rst.inc @@ -0,0 +1,40 @@ +.. _borg_mount: + +borg mount +---------- +:: + + usage: borg mount [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] [-f] + [-o OPTIONS] + REPOSITORY_OR_ARCHIVE MOUNTPOINT + + Mount archive or an entire repository as a FUSE fileystem + + positional arguments: + REPOSITORY_OR_ARCHIVE + repository/archive to mount + MOUNTPOINT where to mount filesystem + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + -f, --foreground stay in foreground, do not daemonize + -o OPTIONS Extra mount options + +Description +~~~~~~~~~~~ + +This command mounts an archive as a FUSE filesystem. This can be useful for +browsing an archive or restoring individual files. Unless the ``--foreground`` +option is given the command will run in the background until the filesystem +is ``umounted``. diff --git a/docs/usage/prune.rst.inc b/docs/usage/prune.rst.inc new file mode 100644 index 00000000..d5253264 --- /dev/null +++ b/docs/usage/prune.rst.inc @@ -0,0 +1,72 @@ +.. _borg_prune: + +borg prune +---------- +:: + + usage: borg prune [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] [-n] + [-s] [--keep-within WITHIN] [-H HOURLY] [-d DAILY] + [-w WEEKLY] [-m MONTHLY] [-y YEARLY] [-p PREFIX] + [--save-space] + [REPOSITORY] + + Prune repository archives according to specified rules + + positional arguments: + REPOSITORY repository to prune + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + -n, --dry-run do not change repository + -s, --stats print statistics for the deleted archive + --keep-within WITHIN keep all archives within this time interval + -H HOURLY, --keep-hourly HOURLY + number of hourly archives to keep + -d DAILY, --keep-daily DAILY + number of daily archives to keep + -w WEEKLY, --keep-weekly WEEKLY + number of weekly archives to keep + -m MONTHLY, --keep-monthly MONTHLY + number of monthly archives to keep + -y YEARLY, --keep-yearly YEARLY + number of yearly archives to keep + -p PREFIX, --prefix PREFIX + only consider archive names starting with this prefix + --save-space work slower, but using less space + +Description +~~~~~~~~~~~ + +The prune command prunes a repository by deleting archives not matching +any of the specified retention options. This command is normally used by +automated backup scripts wanting to keep a certain number of historic backups. + +As an example, "-d 7" means to keep the latest backup on each day for 7 days. +Days without backups do not count towards the total. +The rules are applied from hourly to yearly, and backups selected by previous +rules do not count towards those of later rules. The time that each backup +completes is used for pruning purposes. Dates and times are interpreted in +the local timezone, and weeks go from Monday to Sunday. Specifying a +negative number of archives to keep means that there is no limit. + +The "--keep-within" option takes an argument of the form "", +where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means +to keep all archives that were created within the past 48 hours. +"1m" is taken to mean "31d". The archives kept with this option do not +count towards the totals specified by any other options. + +If a prefix is set with -p, then only archives that start with the prefix are +considered for deletion and only those archives count towards the totals +specified by the rules. +Otherwise, *all* archives in the repository are candidates for deletion! diff --git a/docs/usage/rename.rst.inc b/docs/usage/rename.rst.inc new file mode 100644 index 00000000..8e0a4b61 --- /dev/null +++ b/docs/usage/rename.rst.inc @@ -0,0 +1,33 @@ +.. _borg_rename: + +borg rename +----------- +:: + + usage: borg rename [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] + ARCHIVE NEWNAME + + Rename an existing archive + + positional arguments: + ARCHIVE archive to rename + NEWNAME the new archive name to use + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + +Description +~~~~~~~~~~~ + +This command renames an archive in the repository. diff --git a/docs/usage/serve.rst.inc b/docs/usage/serve.rst.inc new file mode 100644 index 00000000..1e29ff2a --- /dev/null +++ b/docs/usage/serve.rst.inc @@ -0,0 +1,32 @@ +.. _borg_serve: + +borg serve +---------- +:: + + usage: borg serve [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] + [--restrict-to-path PATH] + + Start in server mode. This command is usually not used manually. + + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + --restrict-to-path PATH + restrict repository access to PATH + +Description +~~~~~~~~~~~ + +This command starts a repository server process. This command is usually not used manually. diff --git a/docs/usage/upgrade.rst.inc b/docs/usage/upgrade.rst.inc new file mode 100644 index 00000000..a630de13 --- /dev/null +++ b/docs/usage/upgrade.rst.inc @@ -0,0 +1,71 @@ +.. _borg_upgrade: + +borg upgrade +------------ +:: + + usage: borg upgrade [-h] [-v] [--debug] [--lock-wait N] [--show-rc] + [--no-files-cache] [--umask M] [--remote-path PATH] [-p] + [-n] [-i] + [REPOSITORY] + + upgrade a repository from a previous version + + positional arguments: + REPOSITORY path to the repository to be upgraded + + optional arguments: + -h, --help show this help message and exit + -v, --verbose, --info + enable informative (verbose) output, work on log level + INFO + --debug enable debug output, work on log level DEBUG + --lock-wait N wait for the lock, but max. N seconds (default: 1). + --show-rc show/log the return code (rc) + --no-files-cache do not load/update the file metadata cache used to + detect unchanged files + --umask M set umask to M (local and remote, default: 0077) + --remote-path PATH set remote path to executable (default: "borg") + -p, --progress show progress display while upgrading the repository + -n, --dry-run do not change repository + -i, --inplace rewrite repository in place, with no chance of going + back to older versions of the repository. + +Description +~~~~~~~~~~~ + +Upgrade an existing Borg repository. This currently +only supports converting an Attic repository, but may +eventually be extended to cover major Borg upgrades as well. + +It will change the magic strings in the repository's segments +to match the new Borg magic strings. The keyfiles found in +$ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and +copied to $BORG_KEYS_DIR or ~/.borg/keys. + +The cache files are converted, from $ATTIC_CACHE_DIR or +~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the +cache layout between Borg and Attic changed, so it is possible +the first backup after the conversion takes longer than expected +due to the cache resync. + +Upgrade should be able to resume if interrupted, although it +will still iterate over all segments. If you want to start +from scratch, use `borg delete` over the copied repository to +make sure the cache files are also removed: + + borg delete borg + +Unless ``--inplace`` is specified, the upgrade process first +creates a backup copy of the repository, in +REPOSITORY.upgrade-DATETIME, using hardlinks. This takes +longer than in place upgrades, but is much safer and gives +progress information (as opposed to ``cp -al``). Once you are +satisfied with the conversion, you can safely destroy the +backup copy. + +WARNING: Running the upgrade in place will make the current +copy unusable with older version, with no way of going back +to previous versions. This can PERMANENTLY DAMAGE YOUR +REPOSITORY! Attic CAN NOT READ BORG REPOSITORIES, as the +magic strings have changed. You have been warned. \ No newline at end of file diff --git a/requirements.d/development.txt b/requirements.d/development.txt index 5ec1ed14..a0cb3c2a 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -1,6 +1,6 @@ +virtualenv<14.0 tox -mock pytest -pytest-cov<2.0.0 -pytest-benchmark==3.0.0b1 +pytest-cov +pytest-benchmark Cython diff --git a/setup.cfg b/setup.cfg index ecb8cdc1..812f6bee 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,11 @@ python_files = testsuite/*.py [flake8] -max-line-length = 120 -exclude = build,dist,.git,.idea,.cache,.tox +# please note that the values are adjusted so that they do not cause failures +# with existing code. if you want to change them, you should first fix all +# flake8 failures that appear with your change. +ignore = E122,E123,E125,E126,E127,E128,E226,E402,F401,F811 +# line length long term target: 120 +max-line-length = 255 +exclude = build,dist,.git,.idea,.cache,.tox,docs/conf.py + diff --git a/setup.py b/setup.py index 6a5ed586..b64f5bb3 100644 --- a/setup.py +++ b/setup.py @@ -6,11 +6,8 @@ from glob import glob from distutils.command.build import build from distutils.core import Command -from distutils.errors import DistutilsOptionError -from distutils import log -from setuptools.command.build_py import build_py -min_python = (3, 2) +min_python = (3, 4) my_python = sys.version_info if my_python < min_python: @@ -22,7 +19,7 @@ on_rtd = os.environ.get('READTHEDOCS') # msgpack pure python data corruption was fixed in 0.4.6. # Also, we might use some rather recent API features. -install_requires=['msgpack-python>=0.4.6', ] +install_requires = ['msgpack-python>=0.4.6', ] from setuptools import setup, Extension @@ -123,12 +120,14 @@ elif not on_rtd: with open('README.rst', 'r') as fd: long_description = fd.read() + class build_usage(Command): description = "generate usage for each command" user_options = [ ('output=', 'O', 'output directory'), ] + def initialize_options(self): pass @@ -138,8 +137,6 @@ class build_usage(Command): def run(self): print('generating usage docs') # allows us to build docs without the C modules fully loaded during help generation - if 'BORG_CYTHON_DISABLE' not in os.environ: - os.environ['BORG_CYTHON_DISABLE'] = self.__class__.__name__ from borg.archiver import Archiver parser = Archiver().build_parser(prog='borg') choices = {} @@ -169,9 +166,6 @@ class build_usage(Command): doc.write(re.sub("^", " ", parser.format_help(), flags=re.M)) doc.write("\nDescription\n~~~~~~~~~~~\n") doc.write(epilog) - # return to regular Cython configuration, if we changed it - if os.environ.get('BORG_CYTHON_DISABLE') == self.__class__.__name__: - del os.environ['BORG_CYTHON_DISABLE'] class build_api(Command): @@ -180,6 +174,7 @@ class build_api(Command): user_options = [ ('output=', 'O', 'output directory'), ] + def initialize_options(self): pass @@ -203,40 +198,11 @@ API Documentation :undoc-members: """ % mod) -# (function, predicate), see http://docs.python.org/2/distutils/apiref.html#distutils.cmd.Command.sub_commands -# seems like this doesn't work on RTD, see below for build_py hack. -build.sub_commands.append(('build_api', None)) -build.sub_commands.append(('build_usage', None)) - - -class build_py_custom(build_py): - """override build_py to also build our stuff - - it is unclear why this is necessary, but in some environments - (Readthedocs.org, specifically), the above - ``build.sub_commands.append()`` doesn't seem to have an effect: - our custom build commands seem to be ignored when running - ``setup.py install``. - - This class overrides the ``build_py`` target by forcing it to run - our custom steps as well. - - See also the `bug report on RTD - `_. - """ - def run(self): - super().run() - self.announce('calling custom build steps', level=log.INFO) - self.run_command('build_ext') - self.run_command('build_api') - self.run_command('build_usage') - cmdclass = { 'build_ext': build_ext, 'build_api': build_api, 'build_usage': build_usage, - 'build_py': build_py_custom, 'sdist': Sdist } @@ -248,7 +214,7 @@ if not on_rtd: Extension('borg.chunker', [chunker_source]), Extension('borg.hashindex', [hashindex_source]) ] - if sys.platform.startswith('linux'): + if sys.platform == 'linux': ext_modules.append(Extension('borg.platform_linux', [platform_linux_source], libraries=['acl'])) elif sys.platform.startswith('freebsd'): ext_modules.append(Extension('borg.platform_freebsd', [platform_freebsd_source])) @@ -261,7 +227,7 @@ setup( 'write_to': 'borg/_version.py', }, author='The Borg Collective (see AUTHORS file)', - author_email='borgbackup@librelist.com', + author_email='borgbackup@python.org', url='https://borgbackup.readthedocs.org/', description='Deduplicated, encrypted, authenticated and compressed backups', long_description=long_description, @@ -279,14 +245,12 @@ setup( 'Operating System :: POSIX :: Linux', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.2', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Topic :: Security :: Cryptography', 'Topic :: System :: Archiving :: Backup', ], - packages=['borg', 'borg.testsuite', 'borg.support', ], + packages=['borg', 'borg.testsuite', ], entry_points={ 'console_scripts': [ 'borg = borg.archiver:main', diff --git a/tox.ini b/tox.ini index c260b506..0473cb27 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ # fakeroot -u tox --recreate [tox] -envlist = py{32,33,34,35} +envlist = py{34,35},flake8 [testenv] # Change dir to avoid import problem for cython code. The directory does @@ -11,6 +11,11 @@ changedir = {toxworkdir} deps = -rrequirements.d/development.txt attic -commands = py.test --cov=borg --benchmark-skip --pyargs {posargs:borg.testsuite} +commands = py.test --cov=borg --cov-config=../.coveragerc --benchmark-skip --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * + +[testenv:flake8] +changedir = +deps = flake8 +commands = flake8