From d6bfdafdefb869dd2d50cf918826bc7fadf1aa44 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 25 Sep 2016 02:32:02 +0200 Subject: [PATCH] borg help compression, fixes #1582 --- src/borg/archiver.py | 117 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 96 insertions(+), 21 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 2fe8ff111..ac39a063b 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -1258,6 +1258,92 @@ class Archiver: borg create /path/to/repo::{hostname}-{user}-{utcnow} ... borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ... borg prune --prefix '{hostname}-' ...\n\n''') + helptext['compression'] = textwrap.dedent(''' + Compression is off by default, if you want some, you have to specify what you want. + + Valid compression specifiers are: + + none + + Do not compress. (default) + + lz4 + + Use lz4 compression. High speed, low compression. + + zlib[,L] + + Use zlib ("gz") compression. Medium speed, medium compression. + If you do not explicitely give the compression level L (ranging from 0 + to 9), it will use level 6. + Giving level 0 (means "no compression", but still has zlib protocol + overhead) is usually pointless, you better use "none" compression. + + lzma[,L] + + Use lzma ("xz") compression. Low speed, high compression. + If you do not explicitely give the compression level L (ranging from 0 + to 9), it will use level 6. + Giving levels above 6 is pointless and counterproductive because it does + not compress better due to the buffer size used by borg - but it wastes + lots of CPU cycles and RAM. + + auto,C[,L] + + Use a built-in heuristic to decide per chunk whether to compress or not. + The heuristic tries with lz4 whether the data is compressible. + For incompressible data, it will not use compression (uses "none"). + For compressible data, it uses the given C[,L] compression - with C[,L] + being any valid compression specifier. + + The decision about which compression to use is done by borg like this: + + 1. find a compression specifier (per file): + match the path/filename against all patterns in all --compression-from + files (if any). If a pattern matches, use the compression spec given for + that pattern. If no pattern matches (and also if you do not give any + --compression-from option), default to the compression spec given by + --compression. See docs/misc/compression.conf for an example config. + + 2. if the found compression spec is not "auto", the decision is taken: + use the found compression spec. + + 3. if the found compression spec is "auto", test compressibility of each + chunk using lz4. + If it is compressible, use the C,[L] compression spec given within the + "auto" specifier. If it is not compressible, use no compression. + + Examples:: + + borg create --compression lz4 REPO::ARCHIVE data + borg create --compression zlib REPO::ARCHIVE data + borg create --compression zlib,1 REPO::ARCHIVE data + borg create --compression auto,lzma,6 REPO::ARCHIVE data + borg create --compression-from compression.conf --compression auto,lzma ... + + compression.conf has entries like: + + # example config file for --compression-from option + # + # Format of non-comment / non-empty lines: + # : + # compression-spec is same format as for --compression option + # path/filename pattern is same format as for --exclude option + none:*.gz + none:*.zip + none:*.mp3 + none:*.ogg + + General remarks: + + It is no problem to mix different compression methods in one repo, + deduplication is done on the source data chunks (not on the compressed + or encrypted data). + + If some specific chunk was once compressed and stored into the repo, creating + another backup that also uses this chunk will not change the stored chunk. + So if you use different compression specs for the backups, whichever stores a + chunk first determines its compression. See also borg recreate.\n\n''') def do_help(self, parser, commands, args): if not args.topic: @@ -1624,19 +1710,13 @@ class Archiver: help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS) archive_group.add_argument('-C', '--compression', dest='compression', type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION', - help='select compression algorithm (and level):\n' - 'none == no compression (default),\n' - 'auto,C[,L] == built-in heuristic (try with lz4 whether the data is\n' - ' compressible) decides between none or C[,L] - with C[,L]\n' - ' being any valid compression algorithm (and optional level),\n' - 'lz4 == lz4,\n' - 'zlib == zlib (default level 6),\n' - 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' - 'lzma == lzma (default level 6),\n' - 'lzma,0 .. lzma,9 == lzma (with level 0..9).') + help='select compression algorithm, see the output of the ' + '"borg help compression" command for details.') archive_group.add_argument('--compression-from', dest='compression_files', type=argparse.FileType('r'), action='append', - metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line') + metavar='COMPRESSIONCONFIG', + help='read compression patterns from COMPRESSIONCONFIG, see the output of the ' + '"borg help compression" command for details.') subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), @@ -2146,21 +2226,16 @@ class Archiver: 'alternatively, give a reference file/directory.') archive_group.add_argument('-C', '--compression', dest='compression', type=CompressionSpec, default=None, metavar='COMPRESSION', - help='select compression algorithm (and level):\n' - 'none == no compression (default),\n' - 'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n' - ' being any valid compression algorithm (and optional level),\n' - 'lz4 == lz4,\n' - 'zlib == zlib (default level 6),\n' - 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' - 'lzma == lzma (default level 6),\n' - 'lzma,0 .. lzma,9 == lzma (with level 0..9).') + help='select compression algorithm, see the output of the ' + '"borg help compression" command for details.') archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true', help='always recompress chunks, don\'t skip chunks already compressed with the same' 'algorithm.') archive_group.add_argument('--compression-from', dest='compression_files', type=argparse.FileType('r'), action='append', - metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line') + metavar='COMPRESSIONCONFIG', + help='read compression patterns from COMPRESSIONCONFIG, see the output of the ' + '"borg help compression" command for details.') archive_group.add_argument('--chunker-params', dest='chunker_params', type=ChunkerParams, default=None, metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',