diff --git a/CHANGES-experimental.txt b/CHANGES-experimental.txt new file mode 100644 index 00000000..d42f0946 --- /dev/null +++ b/CHANGES-experimental.txt @@ -0,0 +1,48 @@ +Important note about "experimental" branch +========================================== + +Goal of the "experimental" branch is to merge all the stuff: +- changesets from master branch +- features that DO IMPACT compatibility +- play with new technologies +- etc. + +THERE IS NO GUARANTEE THAT IT IS COMPATIBLE WITH MASTER BRANCH OR PREVIOUS +"experimental" CODE nor THAT YOU CAN SWITCH BACK AND FORTH BETWEEN BRANCHES +WITHIN THE SAME REPOSITORY WITHOUT ENCOUNTERING SEVERE ISSUES. + +Please also see the LICENSE for more informations. + + +Stuff in "experimental" that is not in "master" minus minor changes +=================================================================== + +added tuning docs + +attic init --compression NN --cipher NN --mac NN ... +(see attic init --help) + +new hashes: sha512-256 + sha512 + sha1 + ghash (default) +new MACs: hmac-sha512-256 + hmac-sha512 + hmac-sha1 + gmac (default) +new ciphers: aes256-ctr + hmac-sha512-256 + aes256-gcm (default) +new compression: no compression (default) + zlib level 1..9 (previously, level 6 was hardcoded) + lzma preset 0..9 + lz4 (and other) multi-threaded algos from blosc library + +source: more flexible type 0x03 header format, allowing to give hash algo, +compression algo and level, encryption algo, key type. + +IV is stored in full length, length of stored IV/MAC/hash is flexible. +Indexing key size (key = id_hash()) is flexible and configurable per repo. + +source: less hardcoding, numeric offsets / lengths +source: flexible hashing, compression, encryption, key dispatching + diff --git a/README.rst b/README.rst index 214b7fb0..759c81e6 100644 --- a/README.rst +++ b/README.rst @@ -34,7 +34,7 @@ Space efficient storage Optional data encryption All data can be protected using 256-bit AES encryption and data integrity - and authenticity is verified using HMAC-SHA256. + and authenticity is verified using a MAC (message authentication code). Off-site backups Borg can store data on any remote host accessible over SSH. This is @@ -49,6 +49,7 @@ What do I need? Borg requires Python 3.2 or above to work. Borg also requires a sufficiently recent OpenSSL (>= 1.0.0). In order to mount archives as filesystems, llfuse is required. +For other python requirements, please see setup.py install_requires. How do I install it? -------------------- diff --git a/borg/_hashindex.c b/borg/_hashindex.c index e2589d0b..a33be300 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -366,7 +366,7 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs void *key = NULL; while((key = hashindex_next_key(index, key))) { - values = key + 32; + values = key + index->key_size; unique_size += values[1]; unique_csize += values[2]; size += values[0] * values[1]; diff --git a/borg/archive.py b/borg/archive.py index e6d55747..569b2225 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -616,7 +616,7 @@ class ArchiveChecker: self.repository = repository self.init_chunks() self.key = self.identify_key(repository) - if Manifest.MANIFEST_ID not in self.chunks: + if Manifest.manifest_id(repository) not in self.chunks: self.manifest = self.rebuild_manifest() else: self.manifest, _ = Manifest.load(repository, key=self.key) @@ -635,7 +635,7 @@ class ArchiveChecker: # Explicity set the initial hash table capacity to avoid performance issues # due to hash table "resonance" capacity = int(len(self.repository) * 1.2) - self.chunks = ChunkIndex(capacity) + self.chunks = ChunkIndex(capacity, key_size=self.repository.key_size) marker = None while True: result = self.repository.list(limit=10000, marker=marker) @@ -687,7 +687,7 @@ class ArchiveChecker: Missing and/or incorrect data is repaired when detected """ # Exclude the manifest from chunks - del self.chunks[Manifest.MANIFEST_ID] + del self.chunks[Manifest.manifest_id(self.repository)] def mark_as_possibly_superseded(id_): if self.chunks.get(id_, (0,))[0] == 0: diff --git a/borg/archiver.py b/borg/archiver.py index 438418ab..7d9a5702 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -16,7 +16,7 @@ from . import __version__ from .archive import Archive, ArchiveChecker from .repository import Repository from .cache import Cache -from .key import key_creator +from .key import key_creator, maccer_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ @@ -30,11 +30,11 @@ class Archiver: def __init__(self): self.exit_code = 0 - def open_repository(self, location, create=False, exclusive=False): + def open_repository(self, location, create=False, exclusive=False, key_size=None): if location.proto == 'ssh': - repository = RemoteRepository(location, create=create) + repository = RemoteRepository(location, create=create, key_size=key_size) else: - repository = Repository(location.path, create=create, exclusive=exclusive) + repository = Repository(location.path, create=create, exclusive=exclusive, key_size=key_size) repository._location = location return repository @@ -59,10 +59,12 @@ class Archiver: def do_init(self, args): """Initialize an empty repository""" print('Initializing repository at "%s"' % args.repository.orig) - repository = self.open_repository(args.repository, create=True, exclusive=True) - key = key_creator(repository, args) + key_cls = key_creator(args) + maccer_cls = maccer_creator(args, key_cls) + repository = self.open_repository(args.repository, create=True, exclusive=True, + key_size=maccer_cls.digest_size) + key = key_cls.create(repository, args) manifest = Manifest(key, repository) - manifest.key = key manifest.write() repository.commit() Cache(repository, key, manifest, warn_if_unencrypted=False) @@ -523,8 +525,39 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") init_epilog = textwrap.dedent(""" This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. - Encryption can be enabled at repository init time. - """) + Encryption can be enabled, compression, cipher and mac method can be chosen at + repository init time. + + --compression METHODs (default: %02d): + + - 00 no compression + - 01..09 zlib levels 1..9 (1 means low compression, 9 max. compression) + - 10..19 lzma levels 0..9 (0 means low compression, 9 max. compression) + - 20..29 lz4 (blosc) levels 0..9 (0 = no, 9 = max. compression) + - 30..39 lz4hc (blosc) levels 0..9 (0 = no, 9 = max. compression) + - 40..49 blosclz (blosc) levels 0..9 (0 = no, 9 = max. compression) + - 50..59 snappy (blosc) levels 0..9 (0 = no, 9 = max. compression) + - 60..69 zlib (blosc) levels 0..9 (0 = no, 9 = max. compression) + + --cipher METHODs (default: %02d or %02d) + + - 00 No encryption + - 01 AEAD: AES-CTR + HMAC-SHA256 + - 02 AEAD: AES-GCM + + --mac METHODs (default: %02d or %02d): + + - 00 sha256 (simple hash, no MAC, faster on 32bit CPU) + - 01 sha512-256 (simple hash, no MAC, faster on 64bit CPU) + - 02 ghash (simple hash, no MAC, fastest on CPUs with AES-GCM support) + - 03 sha1 (simple hash, no MAC, fastest on CPUs without AES-GCM support) + - 04 sha512 (simple hash, no MAC, faster on 64bit CPU) + - 10 hmac-sha256 (MAC, faster on 32bit CPU) + - 11 hmac-sha512-256 (MAC, faster on 64bit CPU) + - 13 hmac-sha1 (MAC, fastest on CPUs without AES-GCM support) + - 14 hmac-sha512 (MAC, faster on 64bit CPU) + - 20 gmac (MAC, fastest on CPUs with AES-GCM support) + """ % (COMPR_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT, HASH_DEFAULT, MAC_DEFAULT)) subparser = subparsers.add_parser('init', parents=[common_parser], description=self.do_init.__doc__, epilog=init_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -534,7 +567,16 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', choices=('none', 'passphrase', 'keyfile'), default='none', - help='select encryption method') + help='select encryption key method') + subparser.add_argument('-C', '--cipher', dest='cipher', + type=int, default=None, metavar='METHOD', + help='select cipher (0..2)') + subparser.add_argument('-c', '--compression', dest='compression', + type=int, default=COMPR_DEFAULT, metavar='METHOD', + help='select compression method (0..19)') + subparser.add_argument('-m', '--mac', dest='mac', + type=int, default=None, metavar='METHOD', + help='select hash/mac method (0..3)') check_epilog = textwrap.dedent(""" The check command verifies the consistency of a repository and the corresponding diff --git a/borg/cache.py b/borg/cache.py index 110f088d..3879d561 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -95,7 +95,7 @@ class Cache: config.set('cache', 'manifest', '') with open(os.path.join(self.path, 'config'), 'w') as fd: config.write(fd) - ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) + ChunkIndex(key_size=self.repository.key_size).write(os.path.join(self.path, 'chunks').encode('utf-8')) with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd: pass # empty file with open(os.path.join(self.path, 'files'), 'wb') as fd: @@ -118,7 +118,8 @@ class Cache: self.timestamp = self.config.get('cache', 'timestamp', fallback=None) self.key_type = self.config.get('cache', 'key_type', fallback=None) self.previous_location = self.config.get('cache', 'previous_location', fallback=None) - self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8')) + self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'), + key_size=self.repository.key_size) self.files = None def open(self): @@ -272,7 +273,7 @@ class Cache: return archive_name def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out): - chunk_idx = ChunkIndex() + chunk_idx = ChunkIndex(key_size=repository.key_size) cdata = repository.get(archive_id) data = key.decrypt(archive_id, cdata) add(chunk_idx, archive_id, len(data), len(cdata)) @@ -299,13 +300,13 @@ class Cache: tf_out.addfile(tarinfo, f) os.unlink(file_tmp) - def create_master_idx(chunk_idx, tf_in, tmp_dir): + def create_master_idx(chunk_idx, repository, tf_in, tmp_dir): chunk_idx.clear() for tarinfo in tf_in: archive_id_hex = tarinfo.name tf_in.extract(archive_id_hex, tmp_dir) chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') - archive_chunk_idx = ChunkIndex.read(chunk_idx_path) + archive_chunk_idx = ChunkIndex.read(chunk_idx_path, key_size=repository.key_size) for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems(): add(chunk_idx, chunk_id, size, csize, incr=count) os.unlink(chunk_idx_path) @@ -334,7 +335,7 @@ class Cache: rename_out_archive() print('Merging collection into master chunks cache...') in_archive = open_in_archive() - create_master_idx(self.chunks, in_archive, tmp_dir) + create_master_idx(self.chunks, repository, in_archive, tmp_dir) close_archive(in_archive) print('Done.') diff --git a/borg/crypto.pyx b/borg/crypto.pyx index 61dbc42d..4a82d8c6 100644 --- a/borg/crypto.pyx +++ b/borg/crypto.pyx @@ -7,6 +7,12 @@ from libc.stdlib cimport malloc, free API_VERSION = 2 +AES_CTR_MODE = 1 +AES_GCM_MODE = 2 + +MAC_SIZE = 16 # bytes; 128 bits is the maximum allowed value. see "hack" below. +IV_SIZE = 16 # bytes; 128 bits + cdef extern from "openssl/rand.h": int RAND_bytes(unsigned char *buf, int num) @@ -23,6 +29,7 @@ cdef extern from "openssl/evp.h": pass const EVP_MD *EVP_sha256() const EVP_CIPHER *EVP_aes_256_ctr() + const EVP_CIPHER *EVP_aes_256_gcm() void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a) void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a) @@ -36,20 +43,33 @@ cdef extern from "openssl/evp.h": const unsigned char *in_, int inl) int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) - + int EVP_CIPHER_CTX_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, unsigned char *ptr) int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen, const unsigned char *salt, int saltlen, int iter, const EVP_MD *digest, int keylen, unsigned char *out) + int EVP_CTRL_GCM_GET_TAG + int EVP_CTRL_GCM_SET_TAG + int EVP_CTRL_GCM_SET_IVLEN import struct _int = struct.Struct('>I') -_long = struct.Struct('>Q') +_2long = struct.Struct('>QQ') bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0] -bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] -long_to_bytes = lambda x: _long.pack(x) + + +def bytes16_to_int(b, offset=0): + h, l = _2long.unpack_from(b, offset) + return (h << 64) + l + + +def int_to_bytes16(i): + max_uint64 = 0xffffffffffffffff + l = i & max_uint64 + h = (i >> 64) & max_uint64 + return _2long.pack(h, l) def num_aes_blocks(length): @@ -59,6 +79,22 @@ def num_aes_blocks(length): return (length + 15) // 16 +def increment_iv(iv, amount): + """ + increment the given IV considering that bytes of data was + encrypted based on it. In CTR / GCM mode, the IV is just a counter and + must never repeat. + + :param iv: current IV, 16 bytes (128 bit) + :param amount: amount of data (in bytes) that was encrypted + :return: new IV, 16 bytes (128 bit) + """ + iv = bytes16_to_int(iv) + iv += num_aes_blocks(amount) + iv = int_to_bytes16(iv) + return iv + + def pbkdf2_sha256(password, salt, iterations, size): """Password based key derivation function 2 (RFC2898) """ @@ -93,12 +129,19 @@ cdef class AES: """ cdef EVP_CIPHER_CTX ctx cdef int is_encrypt + cdef int mode - def __cinit__(self, is_encrypt, key, iv=None): + def __cinit__(self, mode, is_encrypt, key, iv=None): EVP_CIPHER_CTX_init(&self.ctx) + self.mode = mode self.is_encrypt = is_encrypt # Set cipher type and mode - cipher_mode = EVP_aes_256_ctr() + if mode == AES_CTR_MODE: + cipher_mode = EVP_aes_256_ctr() + elif mode == AES_GCM_MODE: + cipher_mode = EVP_aes_256_gcm() + else: + raise Exception('unknown mode') if self.is_encrypt: if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL): raise Exception('EVP_EncryptInit_ex failed') @@ -117,6 +160,10 @@ cdef class AES: key2 = key if iv: iv2 = iv + if self.mode == AES_GCM_MODE: + # Set IV length (bytes) + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_IVLEN, IV_SIZE, NULL): + raise Exception('EVP_CIPHER_CTX_ctrl SET IVLEN failed') # Initialise key and IV if self.is_encrypt: if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): @@ -125,16 +172,26 @@ cdef class AES: if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): raise Exception('EVP_DecryptInit_ex failed') - @property - def iv(self): - return self.ctx.iv[:16] + def add(self, aad): + cdef int aadl = len(aad) + cdef int outl + if self.mode != AES_GCM_MODE: + raise Exception('additional data only supported for AES GCM mode') + # Zero or more calls to specify any AAD + if self.is_encrypt: + if not EVP_EncryptUpdate(&self.ctx, NULL, &outl, aad, aadl): + raise Exception('EVP_EncryptUpdate failed') + else: # decrypt + if not EVP_DecryptUpdate(&self.ctx, NULL, &outl, aad, aadl): + raise Exception('EVP_DecryptUpdate failed') - def encrypt(self, data): + def compute_mac_and_encrypt(self, data): cdef int inl = len(data) cdef int ctl = 0 cdef int outl = 0 - # note: modes that use padding, need up to one extra AES block (16b) + # note: modes that use padding, need up to one extra AES block (16B) cdef unsigned char *out = malloc(inl+16) + cdef unsigned char *mac = malloc(MAC_SIZE) if not out: raise MemoryError try: @@ -144,15 +201,20 @@ cdef class AES: if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl): raise Exception('EVP_EncryptFinal failed') ctl += outl - return out[:ctl] + if self.mode == AES_GCM_MODE: + # Get tag (mac) - only GCM mode. for CTR, the returned mac is undefined + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, MAC_SIZE, mac): + raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed') + return (mac[:MAC_SIZE]), out[:ctl] finally: + free(mac) free(out) - def decrypt(self, data): + def check_mac_and_decrypt(self, mac, data): cdef int inl = len(data) cdef int ptl = 0 cdef int outl = 0 - # note: modes that use padding, need up to one extra AES block (16b). + # note: modes that use padding, need up to one extra AES block (16B). # This is what the openssl docs say. I am not sure this is correct, # but OTOH it will not cause any harm if our buffer is a little bigger. cdef unsigned char *out = malloc(inl+16) @@ -162,10 +224,12 @@ cdef class AES: if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl): raise Exception('EVP_DecryptUpdate failed') ptl = outl + if self.mode == AES_GCM_MODE: + # Set expected tag (mac) value. + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, MAC_SIZE, mac): + raise Exception('EVP_CIPHER_CTX_ctrl SET TAG failed') if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0: - # this error check is very important for modes with padding or - # authentication. for them, a failure here means corrupted data. - # CTR mode does not use padding nor authentication. + # for GCM mode, a failure here means corrupted / tampered tag (mac) or data raise Exception('EVP_DecryptFinal failed') ptl += outl return out[:ptl] diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 13f9da93..7a65cbce 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -26,9 +26,11 @@ _NoDefault = object() cdef class IndexBase: cdef HashIndex *index - key_size = 32 + cdef int key_size - def __cinit__(self, capacity=0, path=None): + def __cinit__(self, capacity=0, path=None, key_size=None): + assert key_size is not None + self.key_size = key_size if path: self.index = hashindex_read(os.fsencode(path)) if not self.index: @@ -43,8 +45,8 @@ cdef class IndexBase: hashindex_free(self.index) @classmethod - def read(cls, path): - return cls(path=path) + def read(cls, path, key_size=None): + return cls(path=path, key_size=key_size) def write(self, path): if not hashindex_write(self.index, os.fsencode(path)): @@ -61,7 +63,7 @@ cdef class IndexBase: self[key] = value def __delitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size if not hashindex_delete(self.index, key): raise Exception('hashindex_delete failed') @@ -90,14 +92,14 @@ cdef class NSIndex(IndexBase): value_size = 8 def __getitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) if not data: raise KeyError return _le32toh(data[0]), _le32toh(data[1]) def __setitem__(self, key, value): - assert len(key) == 32 + assert len(key) == self.key_size cdef int[2] data data[0] = _htole32(value[0]) data[1] = _htole32(value[1]) @@ -105,20 +107,20 @@ cdef class NSIndex(IndexBase): raise Exception('hashindex_set failed') def __contains__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) return data != NULL def iteritems(self, marker=None): cdef const void *key - iter = NSKeyIterator() + iter = NSKeyIterator(self.key_size) iter.idx = self iter.index = self.index if marker: key = hashindex_get(self.index, marker) if marker is None: raise IndexError - iter.key = key - 32 + iter.key = key - self.key_size return iter @@ -126,9 +128,11 @@ cdef class NSKeyIterator: cdef NSIndex idx cdef HashIndex *index cdef const void *key + cdef int key_size - def __cinit__(self): + def __cinit__(self, key_size): self.key = NULL + self.key_size = key_size def __iter__(self): return self @@ -137,8 +141,8 @@ cdef class NSKeyIterator: self.key = hashindex_next_key(self.index, self.key) if not self.key: raise StopIteration - cdef int *value = (self.key + 32) - return (self.key)[:32], (_le32toh(value[0]), _le32toh(value[1])) + cdef int *value = (self.key + self.key_size) + return (self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1])) cdef class ChunkIndex(IndexBase): @@ -146,14 +150,14 @@ cdef class ChunkIndex(IndexBase): value_size = 12 def __getitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) if not data: raise KeyError return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2]) def __setitem__(self, key, value): - assert len(key) == 32 + assert len(key) == self.key_size cdef int[3] data data[0] = _htole32(value[0]) data[1] = _htole32(value[1]) @@ -162,20 +166,20 @@ cdef class ChunkIndex(IndexBase): raise Exception('hashindex_set failed') def __contains__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) return data != NULL def iteritems(self, marker=None): cdef const void *key - iter = ChunkKeyIterator() + iter = ChunkKeyIterator(self.key_size) iter.idx = self iter.index = self.index if marker: key = hashindex_get(self.index, marker) if marker is None: raise IndexError - iter.key = key - 32 + iter.key = key - self.key_size return iter def summarize(self): @@ -188,9 +192,11 @@ cdef class ChunkKeyIterator: cdef ChunkIndex idx cdef HashIndex *index cdef const void *key + cdef int key_size - def __cinit__(self): + def __cinit__(self, key_size): self.key = NULL + self.key_size = key_size def __iter__(self): return self @@ -199,5 +205,5 @@ cdef class ChunkKeyIterator: self.key = hashindex_next_key(self.index, self.key) if not self.key: raise StopIteration - cdef int *value = (self.key + 32) - return (self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2])) + cdef int *value = (self.key + self.key_size) + return (self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2])) diff --git a/borg/helpers.py b/borg/helpers.py index e97c88bf..1afce914 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -82,18 +82,20 @@ def check_extension_modules(): class Manifest: - MANIFEST_ID = b'\0' * 32 - def __init__(self, key, repository): self.archives = {} self.config = {} self.key = key self.repository = repository + @classmethod + def manifest_id(cls, repository): + return b'\0' * repository.key_size + @classmethod def load(cls, repository, key=None): from .key import key_factory - cdata = repository.get(cls.MANIFEST_ID) + cdata = repository.get(cls.manifest_id(repository)) if not key: key = key_factory(repository, cdata) manifest = cls(key, repository) @@ -118,7 +120,7 @@ class Manifest: 'config': self.config, })) self.id = self.key.id_hash(data) - self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) + self.repository.put(self.manifest_id(self.repository), self.key.encrypt(data)) def list_archive_infos(self, sort_by=None, reverse=False): # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts diff --git a/borg/key.py b/borg/key.py index 3e44b092..97bd4445 100644 --- a/borg/key.py +++ b/borg/key.py @@ -3,14 +3,33 @@ from getpass import getpass import os import msgpack import textwrap +from collections import namedtuple import hmac -from hashlib import sha256 +from hashlib import sha1, sha256, sha512 import zlib -from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks +try: + import lzma # python >= 3.3 +except ImportError: + try: + from backports import lzma # backports.lzma from pypi + except ImportError: + lzma = None + +try: + import blosc +except ImportError: + blosc = None + +from .crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES_GCM_MODE, \ + bytes_to_int, increment_iv, num_aes_blocks from .helpers import IntegrityError, get_keys_dir, Error -PREFIX = b'\0' * 8 +# TODO fix cyclic import: +#from .archive import CHUNK_MAX +CHUNK_MAX = 10 * 1024 * 1024 + +Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, iv, legacy') class UnsupportedPayloadError(Error): @@ -22,47 +41,393 @@ class KeyfileNotFoundError(Error): """ +class sha512_256(object): # note: can't subclass sha512 + """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms""" + digestsize = digest_size = 32 + block_size = 64 + + def __init__(self, data=None): + self.name = 'sha512-256' + self._h = sha512() + if data: + self.update(data) + + def update(self, data): + self._h.update(data) + + def digest(self): + return self._h.digest()[:self.digest_size] + + def hexdigest(self): + return self._h.hexdigest()[:self.digest_size * 2] + + def copy(self): + new = sha512_256.__new__(sha512_256) + new._h = self._h.copy() + return new + + +# HASH / MAC stuff below all has a mac-like interface, so it can be used in the same way. +# special case: hashes do not use keys (and thus, do not sign/authenticate) + +class HASH: # note: we can't subclass sha1/sha256/sha512 + TYPE = 0 # override in subclass + digest_size = 0 # override in subclass + hash_func = None # override in subclass + + def __init__(self, key, data=b''): + # signature is like for a MAC, we ignore the key as this is a simple hash + if key is not None: + raise Exception("use a HMAC if you have a key") + self.h = self.hash_func(data) + + def update(self, data): + self.h.update(data) + + def digest(self): + return self.h.digest() + + def hexdigest(self): + return self.h.hexdigest() + + +class SHA256(HASH): + TYPE = 0 + digest_size = 32 + hash_func = sha256 + + +class SHA512_256(HASH): + TYPE = 1 + digest_size = 32 + hash_func = sha512_256 + + +class GHASH: + TYPE = 2 + digest_size = 16 + + def __init__(self, key, data=b''): + # signature is like for a MAC, we ignore the key as this is a simple hash + if key is not None: + raise Exception("use a MAC if you have a key") + self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=b'\0' * 32, iv=b'\0' * 16) + if data: + self.update(data) + + def update(self, data): + # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data + self.mac_cipher.add(bytes(data)) + + def digest(self): + hash, _ = self.mac_cipher.compute_mac_and_encrypt(b'') + return hash + + +class SHA1(HASH): + TYPE = 3 + digest_size = 20 + hash_func = sha1 + + +class SHA512(HASH): + TYPE = 4 + digest_size = 64 + hash_func = sha512 + + class HMAC(hmac.HMAC): - """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews - """ + TYPE = 0 # override in subclass + digest_size = 0 # override in subclass + hash_func = None # override in subclass + + def __init__(self, key, data): + if key is None: + raise Exception("do not use HMAC if you don't have a key") + super().__init__(key, data, self.hash_func) + def update(self, msg): + # Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews self.inner.update(msg) -def key_creator(repository, args): - if args.encryption == 'keyfile': - return KeyfileKey.create(repository, args) - elif args.encryption == 'passphrase': - return PassphraseKey.create(repository, args) - else: - return PlaintextKey.create(repository, args) +class HMAC_SHA256(HMAC): + TYPE = 10 + digest_size = 32 + hash_func = sha256 -def key_factory(repository, manifest_data): - if manifest_data[0] == KeyfileKey.TYPE: - return KeyfileKey.detect(repository, manifest_data) - elif manifest_data[0] == PassphraseKey.TYPE: - return PassphraseKey.detect(repository, manifest_data) - elif manifest_data[0] == PlaintextKey.TYPE: - return PlaintextKey.detect(repository, manifest_data) - else: - raise UnsupportedPayloadError(manifest_data[0]) +class HMAC_SHA512_256(HMAC): + TYPE = 11 + digest_size = 32 + hash_func = sha512_256 -class KeyBase: +class HMAC_SHA1(HMAC): + TYPE = 13 + digest_size = 20 + hash_func = sha1 + + +class HMAC_SHA512(HMAC): + TYPE = 14 + digest_size = 64 + hash_func = sha512 + + +class GMAC(GHASH): + TYPE = 20 + digest_size = 16 + + def __init__(self, key, data=b''): + if key is None: + raise Exception("do not use GMAC if you don't have a key") + self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0' * 16) + if data: + self.update(data) + + +# defaults are optimized for speed on modern CPUs with AES hw support +HASH_DEFAULT = GHASH.TYPE +MAC_DEFAULT = GMAC.TYPE + + +# compressor classes, all same interface + +class NullCompressor(object): # uses 0 in the mapping + TYPE = 0 + + def compress(self, data): + return bytes(data) + + def decompress(self, data): + return bytes(data) + + +class ZlibCompressor(object): # uses 1..9 in the mapping + TYPE = 0 + LEVELS = range(10) + + def compress(self, data): + level = self.TYPE - ZlibCompressor.TYPE + return zlib.compress(data, level) + + def decompress(self, data): + return zlib.decompress(data) + + +class LzmaCompressor(object): # uses 10..19 in the mapping + TYPE = 10 + PRESETS = range(10) def __init__(self): - self.TYPE_STR = bytes([self.TYPE]) + if lzma is None: + raise NotImplemented("lzma compression needs Python >= 3.3 or backports.lzma from PyPi") + + def compress(self, data): + preset = self.TYPE - LzmaCompressor.TYPE + return lzma.compress(data, preset=preset) + + def decompress(self, data): + return lzma.decompress(data) + + +class BLOSCCompressor(object): + TYPE = 0 # override in subclass + LEVELS = range(10) + CNAME = '' # override in subclass + + def __init__(self): + if blosc is None: + raise NotImplemented("%s compression needs blosc from PyPi" % self.CNAME) + if self.CNAME not in blosc.compressor_list(): + raise NotImplemented("%s compression is not supported by blosc" % self.CNAME) + blosc.set_blocksize(16384) # 16kiB is the minimum, so 64kiB are enough for 4 threads + + def _get_level(self): + raise NotImplemented + + def compress(self, data): + return blosc.compress(bytes(data), 1, cname=self.CNAME, clevel=self._get_level()) + + def decompress(self, data): + return blosc.decompress(data) + + +class LZ4Compressor(BLOSCCompressor): + TYPE = 20 + CNAME = 'lz4' + + def _get_level(self): + return self.TYPE - LZ4Compressor.TYPE + + +class LZ4HCCompressor(BLOSCCompressor): + TYPE = 30 + CNAME = 'lz4hc' + + def _get_level(self): + return self.TYPE - LZ4HCCompressor.TYPE + + +class BLOSCLZCompressor(BLOSCCompressor): + TYPE = 40 + CNAME = 'blosclz' + + def _get_level(self): + return self.TYPE - BLOSCLZCompressor.TYPE + + +class SnappyCompressor(BLOSCCompressor): + TYPE = 50 + CNAME = 'snappy' + + def _get_level(self): + return self.TYPE - SnappyCompressor.TYPE + + +class BLOSCZlibCompressor(BLOSCCompressor): + TYPE = 60 + CNAME = 'zlib' + + def _get_level(self): + return self.TYPE - BLOSCZlibCompressor.TYPE + + +# default is optimized for speed +COMPR_DEFAULT = NullCompressor.TYPE # no compression + + +# ciphers - AEAD (authenticated encryption with assoc. data) style interface +# special case: PLAIN dummy does not encrypt / authenticate + +class PLAIN: + TYPE = 0 + enc_iv = None # dummy + + def __init__(self, **kw): + pass + + def compute_mac_and_encrypt(self, meta, data): + return None, data + + def check_mac_and_decrypt(self, mac, meta, data): + return data + + +def get_aad(meta): + """get additional authenticated data for AEAD ciphers""" + if meta.legacy: + # legacy format computed the mac over (iv_last8 + data) + return meta.iv[8:] + else: + return msgpack.packb(meta) + + +class AES_CTR_HMAC: + TYPE = 1 + + def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, enc_hmac_key=b'\0' * 32, **kw): + self.hmac_key = enc_hmac_key + self.enc_iv = enc_iv + self.enc_cipher = AES(mode=AES_CTR_MODE, is_encrypt=True, key=enc_key, iv=enc_iv) + self.dec_cipher = AES(mode=AES_CTR_MODE, is_encrypt=False, key=enc_key) + + def compute_mac_and_encrypt(self, meta, data): + self.enc_cipher.reset(iv=meta.iv) + _, data = self.enc_cipher.compute_mac_and_encrypt(data) + self.enc_iv = increment_iv(meta.iv, len(data)) + aad = get_aad(meta) + mac = HMAC_SHA256(self.hmac_key, aad + data).digest() # XXX mac / hash flexibility + return mac, data + + def check_mac_and_decrypt(self, mac, meta, data): + aad = get_aad(meta) + if HMAC_SHA256(self.hmac_key, aad + data).digest() != mac: # XXX mac / hash flexibility + raise IntegrityError('Encryption envelope checksum mismatch') + self.dec_cipher.reset(iv=meta.iv) + data = self.dec_cipher.check_mac_and_decrypt(None, data) + return data + + +class AES_GCM: + TYPE = 2 + + def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, **kw): + # note: hmac_key is not used for aes-gcm, it does aes+gmac in 1 pass + self.enc_iv = enc_iv + self.enc_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=enc_key, iv=enc_iv) + self.dec_cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=enc_key) + + def compute_mac_and_encrypt(self, meta, data): + self.enc_cipher.reset(iv=meta.iv) + aad = get_aad(meta) + self.enc_cipher.add(aad) + mac, data = self.enc_cipher.compute_mac_and_encrypt(data) + self.enc_iv = increment_iv(meta.iv, len(data)) + return mac, data + + def check_mac_and_decrypt(self, mac, meta, data): + self.dec_cipher.reset(iv=meta.iv) + aad = get_aad(meta) + self.dec_cipher.add(aad) + try: + data = self.dec_cipher.check_mac_and_decrypt(mac, data) + except Exception: + raise IntegrityError('Encryption envelope checksum mismatch') + return data + + +# cipher default is optimized for speed on modern CPUs with AES hw support +PLAIN_DEFAULT = PLAIN.TYPE +CIPHER_DEFAULT = AES_GCM.TYPE + + +# misc. types of keys +# special case: no keys (thus: no encryption, no signing/authentication) + +class KeyBase(object): + TYPE = 0x00 # override in derived classes + + def __init__(self, compressor_cls, maccer_cls, cipher_cls): + self.compressor = compressor_cls() + self.maccer_cls = maccer_cls # hasher/maccer used by id_hash + self.cipher_cls = cipher_cls # plaintext dummy or AEAD cipher + self.cipher = cipher_cls() + self.id_key = None def id_hash(self, data): - """Return HMAC hash using the "id" HMAC key + """Return a HASH (no id_key) or a MAC (using the "id_key" key) + + XXX do we need a cryptographic hash function here or is a keyed hash + function like GMAC / GHASH good enough? See NIST SP 800-38D. + + IMPORTANT: in 1 repo, there should be only 1 kind of id_hash, otherwise + data hashed/maced with one id_hash might result in same ID as already + exists in the repo for other data created with another id_hash method. + somehow unlikely considering 128 or 256bits, but still. """ + return self.maccer_cls(self.id_key, data).digest() def encrypt(self, data): - pass + data = self.compressor.compress(data) + meta = Meta(compr_type=self.compressor.TYPE, key_type=self.TYPE, + mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE, + iv=self.cipher.enc_iv, legacy=False) + mac, data = self.cipher.compute_mac_and_encrypt(meta, data) + return generate(mac, meta, data) def decrypt(self, id, data): - pass + mac, meta, data = parser(data) + compressor, keyer, maccer, cipher = get_implementations(meta) + assert isinstance(self, keyer) + assert self.maccer_cls is maccer + assert self.cipher_cls is cipher + data = self.cipher.check_mac_and_decrypt(mac, meta, data) + data = self.compressor.decompress(data) + if id and self.id_hash(data) != id: + raise IntegrityError('Chunk id verification failed') + return data class PlaintextKey(KeyBase): @@ -73,71 +438,34 @@ class PlaintextKey(KeyBase): @classmethod def create(cls, repository, args): print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.') - return cls() + compressor = compressor_creator(args) + maccer = maccer_creator(args, cls) + cipher = cipher_creator(args, cls) + return cls(compressor, maccer, cipher) @classmethod def detect(cls, repository, manifest_data): - return cls() - - def id_hash(self, data): - return sha256(data).digest() - - def encrypt(self, data): - return b''.join([self.TYPE_STR, zlib.compress(data)]) - - def decrypt(self, id, data): - if data[0] != self.TYPE: - raise IntegrityError('Invalid encryption envelope') - data = zlib.decompress(memoryview(data)[1:]) - if id and sha256(data).digest() != id: - raise IntegrityError('Chunk id verification failed') - return data + mac, meta, data = parser(manifest_data) + compressor, keyer, maccer, cipher = get_implementations(meta) + return cls(compressor, maccer, cipher) class AESKeyBase(KeyBase): """Common base class shared by KeyfileKey and PassphraseKey - Chunks are encrypted using 256bit AES in Counter Mode (CTR) + Chunks are encrypted using 256bit AES in CTR or GCM mode. + Chunks are authenticated by a GCM GMAC or a HMAC. - Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT + Payload layout: TYPE(1) + MAC(32) + NONCE(8) + CIPHERTEXT To reduce payload size only 8 bytes of the 16 bytes nonce is saved in the payload, the first 8 bytes are always zeros. This does not affect security but limits the maximum repository capacity to only 295 exabytes! """ - - PAYLOAD_OVERHEAD = 1 + 32 + 8 # TYPE + HMAC + NONCE - - def id_hash(self, data): - """Return HMAC hash using the "id" HMAC key - """ - return HMAC(self.id_key, data, sha256).digest() - - def encrypt(self, data): - data = zlib.compress(data) - self.enc_cipher.reset() - data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) - hmac = HMAC(self.enc_hmac_key, data, sha256).digest() - return b''.join((self.TYPE_STR, hmac, data)) - - def decrypt(self, id, data): - if data[0] != self.TYPE: - raise IntegrityError('Invalid encryption envelope') - hmac = memoryview(data)[1:33] - if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac: - raise IntegrityError('Encryption envelope checksum mismatch') - self.dec_cipher.reset(iv=PREFIX + data[33:41]) - data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview - if id and HMAC(self.id_key, data, sha256).digest() != id: - raise IntegrityError('Chunk id verification failed') - return data - - def extract_nonce(self, payload): - if payload[0] != self.TYPE: - raise IntegrityError('Invalid encryption envelope') - nonce = bytes_to_long(payload[33:41]) - return nonce + def extract_iv(self, payload): + _, meta, _ = parser(payload) + return meta.iv def init_from_random_data(self, data): self.enc_key = data[0:32] @@ -148,9 +476,13 @@ class AESKeyBase(KeyBase): if self.chunk_seed & 0x80000000: self.chunk_seed = self.chunk_seed - 0xffffffff - 1 - def init_ciphers(self, enc_iv=b''): - self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv) - self.dec_cipher = AES(is_encrypt=False, key=self.enc_key) + def init_ciphers(self, enc_iv=b'\0' * 16): + self.cipher = self.cipher_cls(enc_key=self.enc_key, enc_iv=enc_iv, + enc_hmac_key=self.enc_hmac_key) + + @property + def enc_iv(self): + return self.cipher.enc_iv class PassphraseKey(AESKeyBase): @@ -159,7 +491,10 @@ class PassphraseKey(AESKeyBase): @classmethod def create(cls, repository, args): - key = cls() + compressor = compressor_creator(args) + maccer = maccer_creator(args, cls) + cipher = cipher_creator(args, cls) + key = cls(compressor, maccer, cipher) passphrase = os.environ.get('BORG_PASSPHRASE') if passphrase is not None: passphrase2 = passphrase @@ -181,7 +516,9 @@ class PassphraseKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - key = cls() + mac, meta, data = parser(manifest_data) + compressor, keyer, maccer, cipher = get_implementations(meta) + key = cls(compressor, maccer, cipher) passphrase = os.environ.get('BORG_PASSPHRASE') if passphrase is None: passphrase = getpass(prompt) @@ -189,8 +526,7 @@ class PassphraseKey(AESKeyBase): key.init(repository, passphrase) try: key.decrypt(None, manifest_data) - num_blocks = num_aes_blocks(len(manifest_data) - 41) - key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) + key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data))) return key except IntegrityError: passphrase = getpass(prompt) @@ -212,14 +548,15 @@ class KeyfileKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - key = cls() + mac, meta, data = parser(manifest_data) + compressor, keyer, maccer, cipher = get_implementations(meta) + key = cls(compressor, maccer, cipher) path = cls.find_key_file(repository) prompt = 'Enter passphrase for key file %s: ' % path passphrase = os.environ.get('BORG_PASSPHRASE', '') while not key.load(path, passphrase): passphrase = getpass(prompt) - num_blocks = num_aes_blocks(len(manifest_data) - 41) - key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) + key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data))) return key @classmethod @@ -254,25 +591,27 @@ class KeyfileKey(AESKeyBase): def decrypt_key_file(self, data, passphrase): d = msgpack.unpackb(data) assert d[b'version'] == 1 - assert d[b'algorithm'] == b'sha256' + assert d[b'algorithm'] == b'gmac' key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32) - data = AES(is_encrypt=False, key=key).decrypt(d[b'data']) - if HMAC(key, data, sha256).digest() != d[b'hash']: + try: + cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=b'\0'*16) + data = cipher.check_mac_and_decrypt(d[b'hash'], d[b'data']) + return data + except Exception: return None - return data def encrypt_key_file(self, data, passphrase): salt = get_random_bytes(32) iterations = 100000 key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32) - hash = HMAC(key, data, sha256).digest() - cdata = AES(is_encrypt=True, key=key).encrypt(data) + cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0'*16) + mac, cdata = cipher.compute_mac_and_encrypt(data) d = { 'version': 1, 'salt': salt, 'iterations': iterations, - 'algorithm': 'sha256', - 'hash': hash, + 'algorithm': 'gmac', + 'hash': mac, 'data': cdata, } return msgpack.packb(d) @@ -321,7 +660,10 @@ class KeyfileKey(AESKeyBase): passphrase2 = getpass('Enter same passphrase again: ') if passphrase != passphrase2: print('Passphrases do not match') - key = cls() + compressor = compressor_creator(args) + maccer = maccer_creator(args, cls) + cipher = cipher_creator(args, cls) + key = cls(compressor, maccer, cipher) key.repository_id = repository.id key.init_from_random_data(get_random_bytes(100)) key.init_ciphers() @@ -329,3 +671,213 @@ class KeyfileKey(AESKeyBase): print('Key file "%s" created.' % key.path) print('Keep this file safe. Your data will be inaccessible without it.') return key + + +# note: key 0 nicely maps to a zlib compressor with level 0 which means "no compression" +compressor_mapping = {} +for level in ZlibCompressor.LEVELS: + compressor_mapping[ZlibCompressor.TYPE + level] = \ + type('ZlibCompressorLevel%d' % level, (ZlibCompressor, ), dict(TYPE=ZlibCompressor.TYPE + level)) +for preset in LzmaCompressor.PRESETS: + compressor_mapping[LzmaCompressor.TYPE + preset] = \ + type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset)) +for level in LZ4Compressor.LEVELS: + compressor_mapping[LZ4Compressor.TYPE + level] = \ + type('LZ4CompressorLevel%d' % level, (LZ4Compressor, ), dict(TYPE=LZ4Compressor.TYPE + level)) +for level in LZ4HCCompressor.LEVELS: + compressor_mapping[LZ4HCCompressor.TYPE + level] = \ + type('LZ4HCCompressorLevel%d' % level, (LZ4HCCompressor, ), dict(TYPE=LZ4HCCompressor.TYPE + level)) +for level in BLOSCLZCompressor.LEVELS: + compressor_mapping[BLOSCLZCompressor.TYPE + level] = \ + type('BLOSCLZCompressorLevel%d' % level, (BLOSCLZCompressor, ), dict(TYPE=BLOSCLZCompressor.TYPE + level)) +for level in SnappyCompressor.LEVELS: + compressor_mapping[SnappyCompressor.TYPE + level] = \ + type('SnappyCompressorLevel%d' % level, (SnappyCompressor, ), dict(TYPE=SnappyCompressor.TYPE + level)) +for level in BLOSCZlibCompressor.LEVELS: + compressor_mapping[BLOSCZlibCompressor.TYPE + level] = \ + type('BLOSCZlibCompressorLevel%d' % level, (BLOSCZlibCompressor, ), dict(TYPE=BLOSCZlibCompressor.TYPE + level)) +# overwrite 0 with NullCompressor +compressor_mapping[NullCompressor.TYPE] = NullCompressor + + +keyer_mapping = { + KeyfileKey.TYPE: KeyfileKey, + PassphraseKey.TYPE: PassphraseKey, + PlaintextKey.TYPE: PlaintextKey, +} + + +maccer_mapping = { + # simple hashes, not MACs (but MAC-like class __init__ method signature): + SHA1.TYPE: SHA1, + SHA256.TYPE: SHA256, + SHA512_256.TYPE: SHA512_256, + SHA512.TYPE: SHA512, + GHASH.TYPE: GHASH, + # MACs: + HMAC_SHA1.TYPE: HMAC_SHA1, + HMAC_SHA256.TYPE: HMAC_SHA256, + HMAC_SHA512_256.TYPE: HMAC_SHA512_256, + HMAC_SHA512.TYPE: HMAC_SHA512, + GMAC.TYPE: GMAC, +} + + +cipher_mapping = { + # no cipher (but cipher-like class __init__ method signature): + PLAIN.TYPE: PLAIN, + # AEAD cipher implementations + AES_CTR_HMAC.TYPE: AES_CTR_HMAC, + AES_GCM.TYPE: AES_GCM, +} + + +def get_implementations(meta): + try: + compressor = compressor_mapping[meta.compr_type] + keyer = keyer_mapping[meta.key_type] + maccer = maccer_mapping[meta.mac_type] + cipher = cipher_mapping[meta.cipher_type] + except KeyError: + raise UnsupportedPayloadError("compr_type %x key_type %x mac_type %x cipher_type %x" % ( + meta.compr_type, meta.key_type, meta.mac_type, meta.cipher_type)) + return compressor, keyer, maccer, cipher + + +def legacy_parser(all_data, key_type): # all rather hardcoded + """ + Payload layout: + no encryption: TYPE(1) + data + with encryption: TYPE(1) + HMAC(32) + NONCE(8) + data + data is compressed with zlib level 6 and (in the 2nd case) encrypted. + + To reduce payload size only 8 bytes of the 16 bytes nonce is saved + in the payload, the first 8 bytes are always zeros. This does not + affect security but limits the maximum repository capacity to + only 295 exabytes! + """ + offset = 1 + if key_type == PlaintextKey.TYPE: + mac_type = SHA256.TYPE + mac = None + cipher_type = PLAIN.TYPE + iv = None + data = all_data[offset:] + else: + mac_type = HMAC_SHA256.TYPE + mac = all_data[offset:offset+32] + cipher_type = AES_CTR_HMAC.TYPE + # legacy attic did not store the full IV on disk, as the upper 8 bytes + # are expected to be zero anyway as the full IV is a 128bit counter. + iv = b'\0' * 8 + all_data[offset+32:offset+40] + data = all_data[offset+40:] + meta = Meta(compr_type=6, key_type=key_type, mac_type=mac_type, + cipher_type=cipher_type, iv=iv, legacy=True) + return mac, meta, data + +def parser00(all_data): + return legacy_parser(all_data, KeyfileKey.TYPE) + +def parser01(all_data): + return legacy_parser(all_data, PassphraseKey.TYPE) + +def parser02(all_data): + return legacy_parser(all_data, PlaintextKey.TYPE) + + +def parser03(all_data): # new & flexible + """ + Payload layout: + always: TYPE(1) + MSGPACK((mac, meta, data)) + + meta is a Meta namedtuple and contains all required information about data. + data is maybe compressed (see meta) and maybe encrypted (see meta). + """ + unpacker = msgpack.Unpacker( + use_list=False, + # avoid memory allocation issues causes by tampered input data. + max_buffer_size=CHUNK_MAX + 1000, # does not work in 0.4.6 unpackb C implementation + max_array_len=10, # meta_tuple + max_bin_len=CHUNK_MAX, # data + max_str_len=0, # not used yet + max_map_len=0, # not used yet + max_ext_len=0, # not used yet + ) + unpacker.feed(all_data[1:]) + mac, meta_tuple, data = unpacker.unpack() + meta = Meta(*meta_tuple) + return mac, meta, data + + +def parser(data): + parser_mapping = { + 0x00: parser00, + 0x01: parser01, + 0x02: parser02, + 0x03: parser03, + } + header_type = data[0] + parser_func = parser_mapping[header_type] + return parser_func(data) + + +def key_factory(repository, manifest_data): + mac, meta, data = parser(manifest_data) + compressor, keyer, maccer, cipher = get_implementations(meta) + return keyer.detect(repository, manifest_data) + + +def generate(mac, meta, data): + # always create new-style 0x03 format + return b'\x03' + msgpack.packb((mac, meta, data), use_bin_type=True) + + +def compressor_creator(args): + # args == None is used by unit tests + compression = COMPR_DEFAULT if args is None else args.compression + compressor = compressor_mapping.get(compression) + if compressor is None: + raise NotImplementedError("no compression %d" % args.compression) + return compressor + + +def key_creator(args): + if args.encryption == 'keyfile': + return KeyfileKey + if args.encryption == 'passphrase': + return PassphraseKey + if args.encryption == 'none': + return PlaintextKey + raise NotImplemented("no encryption %s" % args.encryption) + + +def maccer_creator(args, key_cls): + # args == None is used by unit tests + mac = None if args is None else args.mac + if mac is None: + if key_cls is PlaintextKey: + mac = HASH_DEFAULT + elif key_cls in (KeyfileKey, PassphraseKey): + mac = MAC_DEFAULT + else: + raise NotImplementedError("unknown key class") + maccer = maccer_mapping.get(mac) + if maccer is None: + raise NotImplementedError("no mac %d" % args.mac) + return maccer + + +def cipher_creator(args, key_cls): + # args == None is used by unit tests + cipher = None if args is None else args.cipher + if cipher is None: + if key_cls is PlaintextKey: + cipher = PLAIN_DEFAULT + elif key_cls in (KeyfileKey, PassphraseKey): + cipher = CIPHER_DEFAULT + else: + raise NotImplementedError("unknown key class") + cipher = cipher_mapping.get(cipher) + if cipher is None: + raise NotImplementedError("no cipher %d" % args.cipher) + return cipher diff --git a/borg/remote.py b/borg/remote.py index 5d59e14a..ced4895f 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -89,7 +89,7 @@ class RepositoryServer: def negotiate(self, versions): return 1 - def open(self, path, create=False): + def open(self, path, create=False, key_size=None): path = os.fsdecode(path) if path.startswith('/~'): path = path[1:] @@ -100,8 +100,8 @@ class RepositoryServer: break else: raise PathNotAllowed(path) - self.repository = Repository(path, create) - return self.repository.id + self.repository = Repository(path, create, key_size=key_size) + return self.repository.id, self.repository.key_size class RemoteRepository: @@ -112,7 +112,7 @@ class RemoteRepository: def __init__(self, name): self.name = name - def __init__(self, location, create=False): + def __init__(self, location, create=False, key_size=None): self.location = location self.preload_ids = [] self.msgid = 0 @@ -144,7 +144,7 @@ class RemoteRepository: version = self.call('negotiate', 1) if version != 1: raise Exception('Server insisted on using unsupported protocol version %d' % version) - self.id = self.call('open', location.path, create) + self.id, self.key_size = self.call('open', location.path, create, key_size) def __del__(self): self.close() @@ -303,7 +303,8 @@ class RepositoryCache: def initialize(self): self.tmppath = tempfile.mkdtemp() - self.index = NSIndex() + self.key_size = self.repository.key_size + self.index = NSIndex(key_size=self.key_size) self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b') def cleanup(self): diff --git a/borg/repository.py b/borg/repository.py index 9cea1e57..b79d0780 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -47,22 +47,23 @@ class Repository: class ObjectNotFound(Error): """Object with key {} not found in repository {}.""" - def __init__(self, path, create=False, exclusive=False): + def __init__(self, path, create=False, exclusive=False, key_size=None): self.path = path self.io = None self.lock = None self.index = None self._active_txn = False if create: - self.create(path) + self.create(path, key_size) self.open(path, exclusive) def __del__(self): self.close() - def create(self, path): + def create(self, path, key_size): """Create a new empty repository at `path` """ + assert key_size is not None if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)): raise self.AlreadyExists(path) if not os.path.exists(path): @@ -75,6 +76,7 @@ class Repository: config.set('repository', 'version', '1') config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR) config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE) + config.set('repository', 'key_size', key_size) config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii')) with open(os.path.join(path, 'config'), 'w') as fd: config.write(fd) @@ -117,10 +119,12 @@ class Repository: if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1: raise self.InvalidRepository(path) self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive) + # legacy attic repositories always have key size 32B (256b) + self.key_size = self.config.getint('repository', 'key_size', fallback=32) self.max_segment_size = self.config.getint('repository', 'max_segment_size') self.segments_per_dir = self.config.getint('repository', 'segments_per_dir') self.id = unhexlify(self.config.get('repository', 'id').strip()) - self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir) + self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir, self.key_size) def close(self): if self.lock: @@ -140,8 +144,9 @@ class Repository: def open_index(self, transaction_id): if transaction_id is None: - return NSIndex() - return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8')) + return NSIndex(key_size=self.key_size) + return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'), + key_size=self.key_size) def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True @@ -397,8 +402,6 @@ class LoggedIO: header_fmt = struct.Struct('= MIN_BUCKETS idx_name = tempfile.NamedTemporaryFile() - idx = NSIndex() + idx = NSIndex(key_size=32) idx.write(idx_name.name) initial_size = os.path.getsize(idx_name.name) self.assert_equal(len(idx), 0) @@ -70,7 +70,7 @@ class HashIndexTestCase(BaseTestCase): self.assert_equal(initial_size, os.path.getsize(idx_name.name)) def test_iteritems(self): - idx = NSIndex() + idx = NSIndex(key_size=32) for x in range(100): idx[bytes('%-0.32d' % x, 'ascii')] = x, x all = list(idx.iteritems()) diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 2f234dd8..ba06ccf8 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -4,8 +4,7 @@ import shutil import tempfile from binascii import hexlify -from ..crypto import bytes_to_long, num_aes_blocks -from ..key import PlaintextKey, PassphraseKey, KeyfileKey +from ..key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT, increment_iv from ..helpers import Location, unhexlify from . import BaseTestCase @@ -14,22 +13,26 @@ class KeyTestCase(BaseTestCase): class MockArgs: repository = Location(tempfile.mkstemp()[1]) + compression = COMPR_DEFAULT + mac = None + cipher = None keyfile2_key_file = """ - BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000 - hqppdGVyYXRpb25zzgABhqCkaGFzaNoAIMyonNI+7Cjv0qHi0AOBM6bLGxACJhfgzVD2oq - bIS9SFqWFsZ29yaXRobaZzaGEyNTakc2FsdNoAINNK5qqJc1JWSUjACwFEWGTdM7Nd0a5l - 1uBGPEb+9XM9p3ZlcnNpb24BpGRhdGHaANAYDT5yfPpU099oBJwMomsxouKyx/OG4QIXK2 - hQCG2L2L/9PUu4WIuKvGrsXoP7syemujNfcZws5jLp2UPva4PkQhQsrF1RYDEMLh2eF9Ol - rwtkThq1tnh7KjWMG9Ijt7/aoQtq0zDYP/xaFF8XXSJxiyP5zjH5+spB6RL0oQHvbsliSh - /cXJq7jrqmrJ1phd6dg4SHAM/i+hubadZoS6m25OQzYAW09wZD/phG8OVa698Z5ed3HTaT - SmrtgJL3EoOKgUI9d6BLE4dJdBqntifo""".strip() +BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000 +hqRzYWx02gAgA1l4jfyv22y6U/mxxDT8HodSWAcX0g3nOESrQcNnBsundmVyc2lvbgGqaX +RlcmF0aW9uc84AAYagqWFsZ29yaXRobaRnbWFjpGhhc2iw7eaB54JssAOnM1S4S9CeTaRk +YXRh2gDQzmuyg3iYjMeTLObY+ybI+QfngB+5mmHeEAfBa42fuEZgqM3rYyMj2XfgvamF+O +0asvhEyy9om190FaOxQ4RiiTMNqSP0FKLmd1i5ZyDMfRyp7JbscRFs9Ryk28yXWkv0MgQy +EAYlaycY+6lWdRSgEPxidyPl9t9dr2AI/UuiQytwqmcmXgWD6Px6wgpOS/4AcRmEvDqIIl +Rc2xsu+RevGAxk5rnrIIRPr7WB5R2cinzEn9ylDgBDt9LZbq706ELgtwVTnjWB8FBTPwVI +vLTTXQ== +""".strip() keyfile2_cdata = unhexlify(re.sub('\W', '', """ - 0055f161493fcfc16276e8c31493c4641e1eb19a79d0326fad0291e5a9c98e5933 - 00000000000003e8d21eaf9b86c297a8cd56432e1915bb + 0393c4102e5ce8f5e9477c9e4ce2de453121aa139600001402c41000000000000000000000000000000000 + c2c407b0147a64a379d1 """)) - keyfile2_id = unhexlify('c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314') + keyfile2_id = unhexlify('dd9451069663931c8abd85452d016733') def setUp(self): self.tmppath = tempfile.mkdtemp() @@ -45,25 +48,36 @@ class KeyTestCase(BaseTestCase): _location = _Location() id = bytes(32) + def _test_make_testdata(self): + # modify tearDown to not kill the key file first, before using this + os.environ['ATTIC_PASSPHRASE'] = 'passphrase' + key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) + print("keyfile2_key_file: find the it in the filesystem, see location in test log output") + print("keyfile2_cdata:", hexlify(key.encrypt(b'payload'))) + print("keyfile2_id:", hexlify(key.id_hash(b'payload'))) + assert False + def test_plaintext(self): - key = PlaintextKey.create(None, None) + key = PlaintextKey.create(None, self.MockArgs()) data = b'foo' - self.assert_equal(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae') + self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb3') self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data))) def test_keyfile(self): os.environ['BORG_PASSPHRASE'] = 'test' key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) - self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) + self.assert_equal(key.enc_iv, b'\0'*16) manifest = key.encrypt(b'XXX') - self.assert_equal(key.extract_nonce(manifest), 0) + self.assert_equal(key.extract_iv(manifest), b'\0'*16) manifest2 = key.encrypt(b'XXX') self.assert_not_equal(manifest, manifest2) self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2)) - self.assert_equal(key.extract_nonce(manifest2), 1) - iv = key.extract_nonce(manifest) + self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01') + iv = key.extract_iv(manifest) key2 = KeyfileKey.detect(self.MockRepository(), manifest) - self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD)) + # we assume that the payload fits into one 16B AES block (which is given for b'XXX'). + iv_plus_1 = increment_iv(iv, 16) + self.assert_equal(key2.enc_iv, iv_plus_1) # Key data sanity check self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3) self.assert_equal(key2.chunk_seed == 0, False) @@ -79,25 +93,28 @@ class KeyTestCase(BaseTestCase): def test_passphrase(self): os.environ['BORG_PASSPHRASE'] = 'test' - key = PassphraseKey.create(self.MockRepository(), None) - self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) + key = PassphraseKey.create(self.MockRepository(), self.MockArgs()) + # XXX self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) + self.assert_equal(key.enc_iv, b'\0'*16) self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6') self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901') self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a') self.assert_equal(key.chunk_seed, -775740477) manifest = key.encrypt(b'XXX') - self.assert_equal(key.extract_nonce(manifest), 0) + self.assert_equal(key.extract_iv(manifest), b'\0'*16) manifest2 = key.encrypt(b'XXX') self.assert_not_equal(manifest, manifest2) self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2)) - self.assert_equal(key.extract_nonce(manifest2), 1) - iv = key.extract_nonce(manifest) + self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01') + iv = key.extract_iv(manifest) key2 = PassphraseKey.detect(self.MockRepository(), manifest) - self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD)) + # we assume that the payload fits into one 16B AES block (which is given for b'XXX'). + iv_plus_1 = increment_iv(iv, 16) + self.assert_equal(key2.enc_iv, iv_plus_1) self.assert_equal(key.id_key, key2.id_key) self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key) self.assert_equal(key.enc_key, key2.enc_key) self.assert_equal(key.chunk_seed, key2.chunk_seed) data = b'foo' - self.assert_equal(hexlify(key.id_hash(data)), b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990') + self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde0501') self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data))) diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 9cc8d242..b2e197a0 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -9,16 +9,15 @@ from ..repository import Repository from . import BaseTestCase from .mock import patch - class RepositoryTestCaseBase(BaseTestCase): key_size = 32 - def open(self, create=False): - return Repository(os.path.join(self.tmppath, 'repository'), create=create) + def open(self, create=False, key_size=None): + return Repository(os.path.join(self.tmppath, 'repository'), create=create, key_size=key_size) def setUp(self): self.tmppath = tempfile.mkdtemp() - self.repository = self.open(create=True) + self.repository = self.open(create=True, key_size=self.key_size) def tearDown(self): self.repository.close() @@ -209,7 +208,8 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1] def open_index(self): - return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head()))) + return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())), + key_size=self.key_size) def corrupt_object(self, id_): idx = self.open_index() @@ -317,8 +317,9 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): class RemoteRepositoryTestCase(RepositoryTestCase): - def open(self, create=False): - return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + def open(self, create=False, key_size=None): + return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), + create=create, key_size=key_size) def test_invalid_rpc(self): self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None)) @@ -326,5 +327,6 @@ class RemoteRepositoryTestCase(RepositoryTestCase): class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): - def open(self, create=False): - return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + def open(self, create=False, key_size=None): + return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), + create=create, key_size=key_size) diff --git a/docs/index.rst b/docs/index.rst index db4f4928..b4e4217b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -53,6 +53,7 @@ User's Guide quickstart usage faq + tuning internals Getting help diff --git a/docs/tuning.rst b/docs/tuning.rst new file mode 100644 index 00000000..4f746e95 --- /dev/null +++ b/docs/tuning.rst @@ -0,0 +1,147 @@ +.. _tuning: +.. include:: global.rst.inc + +Tuning +====== + +General hints +------------- +CPU load, backup speed, memory and storage usage are covered below. + +As performance and resource usage depend on a lot of factors, you may need to +tweak the parameters a bit and retry until you found the best ones for your +setup. + +Usually, the default parameters are selected for best speed under the assumption +that you run a modern machine with fast CPU, fast I/O and a good amount of RAM. + +If you run an older or low-resource machine or your backup target or connection +to it is slow, tweaking parameters might give significant speedups. + +Exclude crap data +----------------- +Maybe you don't want to backup: + +* cache / temporary files (they can be rebuilt / are useless) +* specific directories / filenames / file extensions you do not need +* backups (some people make backups of backups...) + +You can exclude these, so they don't waste time and space. + +Avoid scrolling +--------------- +If you do benchmarks, avoid creating a lot of log output, especially if it +means scrolling text in a window on a graphical user interface. + +Rather use much less log output or at least redirect the output to a log file, +that is also much faster than scrolling. + +Speed (in general) +------------------ +Keep an eye on CPU and I/O bounds. Try to find the sweet spot in the middle +where it is not too much I/O bound and not too much CPU bound. + +I/O bound +~~~~~~~~~ +If CPU load does not sum up to 1 core fully loaded while backing up, the +process is likely I/O bound (can't read or write data fast enough). + +Maybe you want to try higher compression then so it has less data to write. +Or get faster I/O, if possible. + +CPU bound +~~~~~~~~~ +If you have 1 core fully loaded most of the time, but your backup seems slow, +the process is likely CPU bound (can't compute fast enough). + +Maybe you want to try lower compression then so it has less to compute. +Using a faster MAC or cipher method might also be an option. +Or get a faster CPU. + +I/O speed +--------- +From fast to slower: + +* fast local filesystem, SSD or HDD, via PCIe, SATA, USB +* ssh connection to a remote server's borg instance +* mounted network filesystems of a remote server + +Not only throughput influences timing, latency does also. + +Backup space needed +------------------- +If you always backup the same data mostly, you will often save a lot of space +due to deduplication - this works independently from compression. + +To avoid running out of space, regularly prune your backup archives according +to your needs. Backups of same machine which are close in time are usually +very cheap (because most data is same and deduplicated). + +Compression +----------- +If you have a fast backup source and destination and you are not low on backup space: +Switch off compression, your backup will run faster and with less cpu load. + +If you just want to save a bit space, but stay relatively fast: +Try zlib level 1. + +If you have very slow source or destination (e.g. a remote backup space via a +network connection that is quite slower than your local and remote storage): +Try a higher zlib or lzma. + +Authentication & MAC selection +------------------------------ +Real MACs (Message Authentication Codes) can only be used when a secret key is +available. It is signing your backup data and can detect malicious tampering. +Without a key, a simple hash will be used (which helps to detect accidental +data corruption, but can not detect malicious data tampering). + +Older or simple 32bit machine architecture +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use sha256 (no key) or hmac-sha256 (key). + +64bit architecture, but no AES hardware acceleration in the CPU +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use sha512-256 (no key) or hmac-sha512-256 (key). + +Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use ghash (no key) or gmac (key). + +Encryption & Cipher selection +----------------------------- +Always encrypt your backups (and keep passphrase and key file [if any] safe). + +The cipher selection chooses between misc. AEAD ciphers (authenticated +encryption with associated data), it is EtM (encrypt-then-mac): + +Older or simple 32bit machine architecture +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use aes256-ctr + hmac-sha256. + +64bit architecture, but no AES hardware acceleration in the CPU +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use aes256-ctr + hmac-sha512-256. + +Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use aes256-gcm (AEAD 1-pass cipher). + +RAM usage +--------- +Depending on the amount of files and chunks in the repository, memory usage +varies: + +* about 250+B RAM per file (for "files" cache) +* about 44B RAM per 64kiB chunk (for "chunks" cache) +* about 40B RAM per 64kiB chunk (for repository index, if remote repo is used, + this will be allocated on remote side) + +If you run into memory usage issues, your options are: + +* get more RAM (or more swapspace, speed will be slower) +* disable the "files" cache, speed will be slower +* have less files / chunks per repo + +Note: RAM compression likely won't help as a lot of that data is using +msgpack, which is already rather efficient. diff --git a/setup.py b/setup.py index 88dc2564..db9d16e6 100644 --- a/setup.py +++ b/setup.py @@ -102,6 +102,12 @@ elif sys.platform.startswith('freebsd'): elif sys.platform == 'darwin': ext_modules.append(Extension('borg.platform_darwin', [platform_darwin_source])) +# msgpack pure python data corruption was fixed in 0.4.6. +# Also, we might use some rather recent API features. +install_requires=['msgpack-python>=0.4.6', 'blosc>=1.2.5'] +if sys.version_info < (3, 3): + install_requires.append('backports.lzma') + setup( name='borgbackup', version=versioneer.get_version(), @@ -132,7 +138,5 @@ setup( scripts=['scripts/borg'], cmdclass=cmdclass, ext_modules=ext_modules, - # msgpack pure python data corruption was fixed in 0.4.6. - # Also, we might use some rather recent API features. - install_requires=['msgpack-python>=0.4.6'] + install_requires=install_requires, )