Merge pull request #67 from ThomasWaldmann/zlib-compression-level

borg create --compression 0..9 for variable compression
This commit is contained in:
TW 2015-06-25 22:41:43 +02:00
commit e4c4d458a6
3 changed files with 138 additions and 2 deletions

View File

@ -101,6 +101,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
t0 = datetime.now()
repository = self.open_repository(args.archive, exclusive=True)
manifest, key = Manifest.load(repository)
key.compression_level = args.compression
cache = Cache(repository, key, manifest, do_files=args.cache_files)
archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
create=True, checkpoint_interval=args.checkpoint_interval,
@ -630,6 +631,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
type=ChunkerParams, default=CHUNKER_PARAMS,
metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
subparser.add_argument('-C', '--compression', dest='compression',
type=int, default=0, metavar='N',
help='select compression algorithm and level. 0..9 is supported and means zlib '
'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).')
subparser.add_argument('archive', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to create')

View File

@ -53,6 +53,7 @@ class KeyBase:
def __init__(self):
self.TYPE_STR = bytes([self.TYPE])
self.compression_level = 0
def id_hash(self, data):
"""Return HMAC hash using the "id" HMAC key
@ -83,7 +84,7 @@ class PlaintextKey(KeyBase):
return sha256(data).digest()
def encrypt(self, data):
return b''.join([self.TYPE_STR, zlib.compress(data)])
return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)])
def decrypt(self, id, data):
if data[0] != self.TYPE:
@ -115,7 +116,7 @@ class AESKeyBase(KeyBase):
return HMAC(self.id_key, data, sha256).digest()
def encrypt(self, data):
data = zlib.compress(data)
data = zlib.compress(data, self.compression_level)
self.enc_cipher.reset()
data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
hmac = HMAC(self.enc_hmac_key, data, sha256).digest()

View File

@ -0,0 +1,130 @@
data compression
================
borg create --compression N repo::archive data
Currently, borg only supports zlib compression. There are plans to expand this
to other, faster or better compression algorithms in the future.
N == 0 -> zlib level 0 == very quick, no compression
N == 1 -> zlib level 1 == quick, low compression
...
N == 9 -> zlib level 9 == slow, high compression
Measurements made on a Haswell Ultrabook, SSD storage, Linux.
Example 1: lots of relatively small text files (linux kernel src)
-----------------------------------------------------------------
N == 1 does a good job here, it saves the additional time needed for
compression because it needs to store less into storage (see N == 0).
N == 6 is also quite ok, a little slower, a little less repo size.
6 was the old default of borg.
High compression levels only give a little more compression, but take a lot
of cpu time.
$ borg create --stats --compression 0
------------------------------------------------------------------------------
Duration: 50.40 seconds
Number of files: 72890
Original size Compressed size Deduplicated size
This archive: 1.17 GB 1.18 GB 1.01 GB
Unique chunks Total chunks
Chunk index: 70263 82309
------------------------------------------------------------------------------
$ borg create --stats --compression 1
------------------------------------------------------------------------------
Duration: 49.29 seconds
Number of files: 72890
Original size Compressed size Deduplicated size
This archive: 1.17 GB 368.62 MB 295.22 MB
Unique chunks Total chunks
Chunk index: 70280 82326
------------------------------------------------------------------------------
$ borg create --stats --compression 5
------------------------------------------------------------------------------
Duration: 59.99 seconds
Number of files: 72890
Original size Compressed size Deduplicated size
This archive: 1.17 GB 331.70 MB 262.20 MB
Unique chunks Total chunks
Chunk index: 70290 82336
------------------------------------------------------------------------------
$ borg create --stats --compression 6
------------------------------------------------------------------------------
Duration: 1 minutes 13.64 seconds
Number of files: 72890
Original size Compressed size Deduplicated size
This archive: 1.17 GB 328.79 MB 259.56 MB
Unique chunks Total chunks
Chunk index: 70279 82325
------------------------------------------------------------------------------
$ borg create --stats --compression 9
------------------------------------------------------------------------------
Duration: 3 minutes 1.58 seconds
Number of files: 72890
Original size Compressed size Deduplicated size
This archive: 1.17 GB 326.57 MB 257.57 MB
Unique chunks Total chunks
Chunk index: 70292 82338
------------------------------------------------------------------------------
Example 2: large VM disk file (sparse file)
-------------------------------------------
The file's directory size is 80GB, but a lot of it is sparse (and reads as
zeros).
$ borg create --stats --compression 0
------------------------------------------------------------------------------
Duration: 13 minutes 48.47 seconds
Number of files: 1
Original size Compressed size Deduplicated size
This archive: 80.54 GB 80.55 GB 10.87 GB
Unique chunks Total chunks
Chunk index: 147307 177109
------------------------------------------------------------------------------
$ borg create --stats --compression 1
------------------------------------------------------------------------------
Duration: 15 minutes 31.34 seconds
Number of files: 1
Original size Compressed size Deduplicated size
This archive: 80.54 GB 6.68 GB 5.67 GB
Unique chunks Total chunks
Chunk index: 147309 177111
------------------------------------------------------------------------------
$ borg create --stats --compression 6
------------------------------------------------------------------------------
Duration: 18 minutes 57.54 seconds
Number of files: 1
Original size Compressed size Deduplicated size
This archive: 80.54 GB 6.19 GB 5.44 GB
Unique chunks Total chunks
Chunk index: 147307 177109
------------------------------------------------------------------------------