From 6964799d13e28077fec6e9310e2c43f596af125d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 25 Jun 2015 22:16:23 +0200 Subject: [PATCH] borg create --compression 0..9 for variable compression --- borg/archiver.py | 5 ++ borg/key.py | 5 +- docs/misc/create_compression.txt | 130 +++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 docs/misc/create_compression.txt diff --git a/borg/archiver.py b/borg/archiver.py index e18c1b277..3cd588b84 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -101,6 +101,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") t0 = datetime.now() repository = self.open_repository(args.archive, exclusive=True) manifest, key = Manifest.load(repository) + key.compression_level = args.compression cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, @@ -630,6 +631,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=ChunkerParams, default=CHUNKER_PARAMS, metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS) + subparser.add_argument('-C', '--compression', dest='compression', + type=int, default=0, metavar='N', + help='select compression algorithm and level. 0..9 is supported and means zlib ' + 'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/key.py b/borg/key.py index 3e44b092e..b13295101 100644 --- a/borg/key.py +++ b/borg/key.py @@ -53,6 +53,7 @@ class KeyBase: def __init__(self): self.TYPE_STR = bytes([self.TYPE]) + self.compression_level = 0 def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -83,7 +84,7 @@ class PlaintextKey(KeyBase): return sha256(data).digest() def encrypt(self, data): - return b''.join([self.TYPE_STR, zlib.compress(data)]) + return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)]) def decrypt(self, id, data): if data[0] != self.TYPE: @@ -115,7 +116,7 @@ class AESKeyBase(KeyBase): return HMAC(self.id_key, data, sha256).digest() def encrypt(self, data): - data = zlib.compress(data) + data = zlib.compress(data, self.compression_level) self.enc_cipher.reset() data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) hmac = HMAC(self.enc_hmac_key, data, sha256).digest() diff --git a/docs/misc/create_compression.txt b/docs/misc/create_compression.txt new file mode 100644 index 000000000..89ffdf4d9 --- /dev/null +++ b/docs/misc/create_compression.txt @@ -0,0 +1,130 @@ +data compression +================ + +borg create --compression N repo::archive data + +Currently, borg only supports zlib compression. There are plans to expand this +to other, faster or better compression algorithms in the future. + +N == 0 -> zlib level 0 == very quick, no compression +N == 1 -> zlib level 1 == quick, low compression +... +N == 9 -> zlib level 9 == slow, high compression + +Measurements made on a Haswell Ultrabook, SSD storage, Linux. + + +Example 1: lots of relatively small text files (linux kernel src) +----------------------------------------------------------------- + +N == 1 does a good job here, it saves the additional time needed for +compression because it needs to store less into storage (see N == 0). + +N == 6 is also quite ok, a little slower, a little less repo size. +6 was the old default of borg. + +High compression levels only give a little more compression, but take a lot +of cpu time. + +$ borg create --stats --compression 0 +------------------------------------------------------------------------------ +Duration: 50.40 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 1.18 GB 1.01 GB + + Unique chunks Total chunks +Chunk index: 70263 82309 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 1 +------------------------------------------------------------------------------ +Duration: 49.29 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 368.62 MB 295.22 MB + + Unique chunks Total chunks +Chunk index: 70280 82326 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 5 +------------------------------------------------------------------------------ +Duration: 59.99 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 331.70 MB 262.20 MB + + Unique chunks Total chunks +Chunk index: 70290 82336 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 6 +------------------------------------------------------------------------------ +Duration: 1 minutes 13.64 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 328.79 MB 259.56 MB + + Unique chunks Total chunks +Chunk index: 70279 82325 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 9 +------------------------------------------------------------------------------ +Duration: 3 minutes 1.58 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 326.57 MB 257.57 MB + + Unique chunks Total chunks +Chunk index: 70292 82338 +------------------------------------------------------------------------------ + + +Example 2: large VM disk file (sparse file) +------------------------------------------- + +The file's directory size is 80GB, but a lot of it is sparse (and reads as +zeros). + +$ borg create --stats --compression 0 +------------------------------------------------------------------------------ +Duration: 13 minutes 48.47 seconds +Number of files: 1 + + Original size Compressed size Deduplicated size +This archive: 80.54 GB 80.55 GB 10.87 GB + + Unique chunks Total chunks +Chunk index: 147307 177109 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 1 +------------------------------------------------------------------------------ +Duration: 15 minutes 31.34 seconds +Number of files: 1 + + Original size Compressed size Deduplicated size +This archive: 80.54 GB 6.68 GB 5.67 GB + + Unique chunks Total chunks +Chunk index: 147309 177111 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 6 +------------------------------------------------------------------------------ +Duration: 18 minutes 57.54 seconds +Number of files: 1 + + Original size Compressed size Deduplicated size +This archive: 80.54 GB 6.19 GB 5.44 GB + + Unique chunks Total chunks +Chunk index: 147307 177109 +------------------------------------------------------------------------------