Merge branch 'merge-all' of ../attic into experimental

This commit is contained in:
Thomas Waldmann 2015-06-05 22:58:08 +02:00
commit 3a38457def
21 changed files with 1183 additions and 241 deletions

48
CHANGES-experimental.txt Normal file
View File

@ -0,0 +1,48 @@
Important note about "experimental" branch
==========================================
Goal of the "experimental" branch is to merge all the stuff:
- changesets from master branch
- features that DO IMPACT compatibility
- play with new technologies
- etc.
THERE IS NO GUARANTEE THAT IT IS COMPATIBLE WITH MASTER BRANCH OR PREVIOUS
"experimental" CODE nor THAT YOU CAN SWITCH BACK AND FORTH BETWEEN BRANCHES
WITHIN THE SAME REPOSITORY WITHOUT ENCOUNTERING SEVERE ISSUES.
Please also see the LICENSE for more informations.
Stuff in "experimental" that is not in "master" minus minor changes
===================================================================
added tuning docs
attic init --compression NN --cipher NN --mac NN ...
(see attic init --help)
new hashes: sha512-256
sha512
sha1
ghash (default)
new MACs: hmac-sha512-256
hmac-sha512
hmac-sha1
gmac (default)
new ciphers: aes256-ctr + hmac-sha512-256
aes256-gcm (default)
new compression: no compression (default)
zlib level 1..9 (previously, level 6 was hardcoded)
lzma preset 0..9
lz4 (and other) multi-threaded algos from blosc library
source: more flexible type 0x03 header format, allowing to give hash algo,
compression algo and level, encryption algo, key type.
IV is stored in full length, length of stored IV/MAC/hash is flexible.
Indexing key size (key = id_hash()) is flexible and configurable per repo.
source: less hardcoding, numeric offsets / lengths
source: flexible hashing, compression, encryption, key dispatching

View File

@ -34,7 +34,7 @@ Space efficient storage
Optional data encryption
All data can be protected using 256-bit AES encryption and data integrity
and authenticity is verified using HMAC-SHA256.
and authenticity is verified using a MAC (message authentication code).
Off-site backups
Borg can store data on any remote host accessible over SSH. This is
@ -49,6 +49,7 @@ What do I need?
Borg requires Python 3.2 or above to work.
Borg also requires a sufficiently recent OpenSSL (>= 1.0.0).
In order to mount archives as filesystems, llfuse is required.
For other python requirements, please see setup.py install_requires.
How do I install it?
--------------------

View File

@ -366,7 +366,7 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
void *key = NULL;
while((key = hashindex_next_key(index, key))) {
values = key + 32;
values = key + index->key_size;
unique_size += values[1];
unique_csize += values[2];
size += values[0] * values[1];

View File

@ -616,7 +616,7 @@ class ArchiveChecker:
self.repository = repository
self.init_chunks()
self.key = self.identify_key(repository)
if Manifest.MANIFEST_ID not in self.chunks:
if Manifest.manifest_id(repository) not in self.chunks:
self.manifest = self.rebuild_manifest()
else:
self.manifest, _ = Manifest.load(repository, key=self.key)
@ -635,7 +635,7 @@ class ArchiveChecker:
# Explicity set the initial hash table capacity to avoid performance issues
# due to hash table "resonance"
capacity = int(len(self.repository) * 1.2)
self.chunks = ChunkIndex(capacity)
self.chunks = ChunkIndex(capacity, key_size=self.repository.key_size)
marker = None
while True:
result = self.repository.list(limit=10000, marker=marker)
@ -687,7 +687,7 @@ class ArchiveChecker:
Missing and/or incorrect data is repaired when detected
"""
# Exclude the manifest from chunks
del self.chunks[Manifest.MANIFEST_ID]
del self.chunks[Manifest.manifest_id(self.repository)]
def mark_as_possibly_superseded(id_):
if self.chunks.get(id_, (0,))[0] == 0:

View File

@ -16,7 +16,7 @@ from . import __version__
from .archive import Archive, ArchiveChecker
from .repository import Repository
from .cache import Cache
from .key import key_creator
from .key import key_creator, maccer_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT
from .helpers import Error, location_validator, format_time, format_file_size, \
format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
@ -30,11 +30,11 @@ class Archiver:
def __init__(self):
self.exit_code = 0
def open_repository(self, location, create=False, exclusive=False):
def open_repository(self, location, create=False, exclusive=False, key_size=None):
if location.proto == 'ssh':
repository = RemoteRepository(location, create=create)
repository = RemoteRepository(location, create=create, key_size=key_size)
else:
repository = Repository(location.path, create=create, exclusive=exclusive)
repository = Repository(location.path, create=create, exclusive=exclusive, key_size=key_size)
repository._location = location
return repository
@ -59,10 +59,12 @@ class Archiver:
def do_init(self, args):
"""Initialize an empty repository"""
print('Initializing repository at "%s"' % args.repository.orig)
repository = self.open_repository(args.repository, create=True, exclusive=True)
key = key_creator(repository, args)
key_cls = key_creator(args)
maccer_cls = maccer_creator(args, key_cls)
repository = self.open_repository(args.repository, create=True, exclusive=True,
key_size=maccer_cls.digest_size)
key = key_cls.create(repository, args)
manifest = Manifest(key, repository)
manifest.key = key
manifest.write()
repository.commit()
Cache(repository, key, manifest, warn_if_unencrypted=False)
@ -523,8 +525,39 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
init_epilog = textwrap.dedent("""
This command initializes an empty repository. A repository is a filesystem
directory containing the deduplicated data from zero or more archives.
Encryption can be enabled at repository init time.
""")
Encryption can be enabled, compression, cipher and mac method can be chosen at
repository init time.
--compression METHODs (default: %02d):
- 00 no compression
- 01..09 zlib levels 1..9 (1 means low compression, 9 max. compression)
- 10..19 lzma levels 0..9 (0 means low compression, 9 max. compression)
- 20..29 lz4 (blosc) levels 0..9 (0 = no, 9 = max. compression)
- 30..39 lz4hc (blosc) levels 0..9 (0 = no, 9 = max. compression)
- 40..49 blosclz (blosc) levels 0..9 (0 = no, 9 = max. compression)
- 50..59 snappy (blosc) levels 0..9 (0 = no, 9 = max. compression)
- 60..69 zlib (blosc) levels 0..9 (0 = no, 9 = max. compression)
--cipher METHODs (default: %02d or %02d)
- 00 No encryption
- 01 AEAD: AES-CTR + HMAC-SHA256
- 02 AEAD: AES-GCM
--mac METHODs (default: %02d or %02d):
- 00 sha256 (simple hash, no MAC, faster on 32bit CPU)
- 01 sha512-256 (simple hash, no MAC, faster on 64bit CPU)
- 02 ghash (simple hash, no MAC, fastest on CPUs with AES-GCM support)
- 03 sha1 (simple hash, no MAC, fastest on CPUs without AES-GCM support)
- 04 sha512 (simple hash, no MAC, faster on 64bit CPU)
- 10 hmac-sha256 (MAC, faster on 32bit CPU)
- 11 hmac-sha512-256 (MAC, faster on 64bit CPU)
- 13 hmac-sha1 (MAC, fastest on CPUs without AES-GCM support)
- 14 hmac-sha512 (MAC, faster on 64bit CPU)
- 20 gmac (MAC, fastest on CPUs with AES-GCM support)
""" % (COMPR_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT, HASH_DEFAULT, MAC_DEFAULT))
subparser = subparsers.add_parser('init', parents=[common_parser],
description=self.do_init.__doc__, epilog=init_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter)
@ -534,7 +567,16 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
help='repository to create')
subparser.add_argument('-e', '--encryption', dest='encryption',
choices=('none', 'passphrase', 'keyfile'), default='none',
help='select encryption method')
help='select encryption key method')
subparser.add_argument('-C', '--cipher', dest='cipher',
type=int, default=None, metavar='METHOD',
help='select cipher (0..2)')
subparser.add_argument('-c', '--compression', dest='compression',
type=int, default=COMPR_DEFAULT, metavar='METHOD',
help='select compression method (0..19)')
subparser.add_argument('-m', '--mac', dest='mac',
type=int, default=None, metavar='METHOD',
help='select hash/mac method (0..3)')
check_epilog = textwrap.dedent("""
The check command verifies the consistency of a repository and the corresponding

View File

@ -95,7 +95,7 @@ class Cache:
config.set('cache', 'manifest', '')
with open(os.path.join(self.path, 'config'), 'w') as fd:
config.write(fd)
ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
ChunkIndex(key_size=self.repository.key_size).write(os.path.join(self.path, 'chunks').encode('utf-8'))
with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd:
pass # empty file
with open(os.path.join(self.path, 'files'), 'wb') as fd:
@ -118,7 +118,8 @@ class Cache:
self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
self.key_type = self.config.get('cache', 'key_type', fallback=None)
self.previous_location = self.config.get('cache', 'previous_location', fallback=None)
self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'),
key_size=self.repository.key_size)
self.files = None
def open(self):
@ -272,7 +273,7 @@ class Cache:
return archive_name
def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out):
chunk_idx = ChunkIndex()
chunk_idx = ChunkIndex(key_size=repository.key_size)
cdata = repository.get(archive_id)
data = key.decrypt(archive_id, cdata)
add(chunk_idx, archive_id, len(data), len(cdata))
@ -299,13 +300,13 @@ class Cache:
tf_out.addfile(tarinfo, f)
os.unlink(file_tmp)
def create_master_idx(chunk_idx, tf_in, tmp_dir):
def create_master_idx(chunk_idx, repository, tf_in, tmp_dir):
chunk_idx.clear()
for tarinfo in tf_in:
archive_id_hex = tarinfo.name
tf_in.extract(archive_id_hex, tmp_dir)
chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
archive_chunk_idx = ChunkIndex.read(chunk_idx_path)
archive_chunk_idx = ChunkIndex.read(chunk_idx_path, key_size=repository.key_size)
for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems():
add(chunk_idx, chunk_id, size, csize, incr=count)
os.unlink(chunk_idx_path)
@ -334,7 +335,7 @@ class Cache:
rename_out_archive()
print('Merging collection into master chunks cache...')
in_archive = open_in_archive()
create_master_idx(self.chunks, in_archive, tmp_dir)
create_master_idx(self.chunks, repository, in_archive, tmp_dir)
close_archive(in_archive)
print('Done.')

View File

@ -7,6 +7,12 @@ from libc.stdlib cimport malloc, free
API_VERSION = 2
AES_CTR_MODE = 1
AES_GCM_MODE = 2
MAC_SIZE = 16 # bytes; 128 bits is the maximum allowed value. see "hack" below.
IV_SIZE = 16 # bytes; 128 bits
cdef extern from "openssl/rand.h":
int RAND_bytes(unsigned char *buf, int num)
@ -23,6 +29,7 @@ cdef extern from "openssl/evp.h":
pass
const EVP_MD *EVP_sha256()
const EVP_CIPHER *EVP_aes_256_ctr()
const EVP_CIPHER *EVP_aes_256_gcm()
void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a)
void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a)
@ -36,20 +43,33 @@ cdef extern from "openssl/evp.h":
const unsigned char *in_, int inl)
int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
int EVP_CIPHER_CTX_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, unsigned char *ptr)
int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen,
const unsigned char *salt, int saltlen, int iter,
const EVP_MD *digest,
int keylen, unsigned char *out)
int EVP_CTRL_GCM_GET_TAG
int EVP_CTRL_GCM_SET_TAG
int EVP_CTRL_GCM_SET_IVLEN
import struct
_int = struct.Struct('>I')
_long = struct.Struct('>Q')
_2long = struct.Struct('>QQ')
bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0]
bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
long_to_bytes = lambda x: _long.pack(x)
def bytes16_to_int(b, offset=0):
h, l = _2long.unpack_from(b, offset)
return (h << 64) + l
def int_to_bytes16(i):
max_uint64 = 0xffffffffffffffff
l = i & max_uint64
h = (i >> 64) & max_uint64
return _2long.pack(h, l)
def num_aes_blocks(length):
@ -59,6 +79,22 @@ def num_aes_blocks(length):
return (length + 15) // 16
def increment_iv(iv, amount):
"""
increment the given IV considering that <amount> bytes of data was
encrypted based on it. In CTR / GCM mode, the IV is just a counter and
must never repeat.
:param iv: current IV, 16 bytes (128 bit)
:param amount: amount of data (in bytes) that was encrypted
:return: new IV, 16 bytes (128 bit)
"""
iv = bytes16_to_int(iv)
iv += num_aes_blocks(amount)
iv = int_to_bytes16(iv)
return iv
def pbkdf2_sha256(password, salt, iterations, size):
"""Password based key derivation function 2 (RFC2898)
"""
@ -93,12 +129,19 @@ cdef class AES:
"""
cdef EVP_CIPHER_CTX ctx
cdef int is_encrypt
cdef int mode
def __cinit__(self, is_encrypt, key, iv=None):
def __cinit__(self, mode, is_encrypt, key, iv=None):
EVP_CIPHER_CTX_init(&self.ctx)
self.mode = mode
self.is_encrypt = is_encrypt
# Set cipher type and mode
cipher_mode = EVP_aes_256_ctr()
if mode == AES_CTR_MODE:
cipher_mode = EVP_aes_256_ctr()
elif mode == AES_GCM_MODE:
cipher_mode = EVP_aes_256_gcm()
else:
raise Exception('unknown mode')
if self.is_encrypt:
if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
raise Exception('EVP_EncryptInit_ex failed')
@ -117,6 +160,10 @@ cdef class AES:
key2 = key
if iv:
iv2 = iv
if self.mode == AES_GCM_MODE:
# Set IV length (bytes)
if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_IVLEN, IV_SIZE, NULL):
raise Exception('EVP_CIPHER_CTX_ctrl SET IVLEN failed')
# Initialise key and IV
if self.is_encrypt:
if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
@ -125,16 +172,26 @@ cdef class AES:
if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
raise Exception('EVP_DecryptInit_ex failed')
@property
def iv(self):
return self.ctx.iv[:16]
def add(self, aad):
cdef int aadl = len(aad)
cdef int outl
if self.mode != AES_GCM_MODE:
raise Exception('additional data only supported for AES GCM mode')
# Zero or more calls to specify any AAD
if self.is_encrypt:
if not EVP_EncryptUpdate(&self.ctx, NULL, &outl, aad, aadl):
raise Exception('EVP_EncryptUpdate failed')
else: # decrypt
if not EVP_DecryptUpdate(&self.ctx, NULL, &outl, aad, aadl):
raise Exception('EVP_DecryptUpdate failed')
def encrypt(self, data):
def compute_mac_and_encrypt(self, data):
cdef int inl = len(data)
cdef int ctl = 0
cdef int outl = 0
# note: modes that use padding, need up to one extra AES block (16b)
# note: modes that use padding, need up to one extra AES block (16B)
cdef unsigned char *out = <unsigned char *>malloc(inl+16)
cdef unsigned char *mac = <unsigned char *>malloc(MAC_SIZE)
if not out:
raise MemoryError
try:
@ -144,15 +201,20 @@ cdef class AES:
if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl):
raise Exception('EVP_EncryptFinal failed')
ctl += outl
return out[:ctl]
if self.mode == AES_GCM_MODE:
# Get tag (mac) - only GCM mode. for CTR, the returned mac is undefined
if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, MAC_SIZE, mac):
raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed')
return (mac[:MAC_SIZE]), out[:ctl]
finally:
free(mac)
free(out)
def decrypt(self, data):
def check_mac_and_decrypt(self, mac, data):
cdef int inl = len(data)
cdef int ptl = 0
cdef int outl = 0
# note: modes that use padding, need up to one extra AES block (16b).
# note: modes that use padding, need up to one extra AES block (16B).
# This is what the openssl docs say. I am not sure this is correct,
# but OTOH it will not cause any harm if our buffer is a little bigger.
cdef unsigned char *out = <unsigned char *>malloc(inl+16)
@ -162,10 +224,12 @@ cdef class AES:
if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl):
raise Exception('EVP_DecryptUpdate failed')
ptl = outl
if self.mode == AES_GCM_MODE:
# Set expected tag (mac) value.
if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, MAC_SIZE, mac):
raise Exception('EVP_CIPHER_CTX_ctrl SET TAG failed')
if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0:
# this error check is very important for modes with padding or
# authentication. for them, a failure here means corrupted data.
# CTR mode does not use padding nor authentication.
# for GCM mode, a failure here means corrupted / tampered tag (mac) or data
raise Exception('EVP_DecryptFinal failed')
ptl += outl
return out[:ptl]

View File

@ -26,9 +26,11 @@ _NoDefault = object()
cdef class IndexBase:
cdef HashIndex *index
key_size = 32
cdef int key_size
def __cinit__(self, capacity=0, path=None):
def __cinit__(self, capacity=0, path=None, key_size=None):
assert key_size is not None
self.key_size = key_size
if path:
self.index = hashindex_read(<bytes>os.fsencode(path))
if not self.index:
@ -43,8 +45,8 @@ cdef class IndexBase:
hashindex_free(self.index)
@classmethod
def read(cls, path):
return cls(path=path)
def read(cls, path, key_size=None):
return cls(path=path, key_size=key_size)
def write(self, path):
if not hashindex_write(self.index, <bytes>os.fsencode(path)):
@ -61,7 +63,7 @@ cdef class IndexBase:
self[key] = value
def __delitem__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
if not hashindex_delete(self.index, <char *>key):
raise Exception('hashindex_delete failed')
@ -90,14 +92,14 @@ cdef class NSIndex(IndexBase):
value_size = 8
def __getitem__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError
return _le32toh(data[0]), _le32toh(data[1])
def __setitem__(self, key, value):
assert len(key) == 32
assert len(key) == self.key_size
cdef int[2] data
data[0] = _htole32(value[0])
data[1] = _htole32(value[1])
@ -105,20 +107,20 @@ cdef class NSIndex(IndexBase):
raise Exception('hashindex_set failed')
def __contains__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
return data != NULL
def iteritems(self, marker=None):
cdef const void *key
iter = NSKeyIterator()
iter = NSKeyIterator(self.key_size)
iter.idx = self
iter.index = self.index
if marker:
key = hashindex_get(self.index, <char *>marker)
if marker is None:
raise IndexError
iter.key = key - 32
iter.key = key - self.key_size
return iter
@ -126,9 +128,11 @@ cdef class NSKeyIterator:
cdef NSIndex idx
cdef HashIndex *index
cdef const void *key
cdef int key_size
def __cinit__(self):
def __cinit__(self, key_size):
self.key = NULL
self.key_size = key_size
def __iter__(self):
return self
@ -137,8 +141,8 @@ cdef class NSKeyIterator:
self.key = hashindex_next_key(self.index, <char *>self.key)
if not self.key:
raise StopIteration
cdef int *value = <int *>(self.key + 32)
return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
cdef int *value = <int *>(self.key + self.key_size)
return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
cdef class ChunkIndex(IndexBase):
@ -146,14 +150,14 @@ cdef class ChunkIndex(IndexBase):
value_size = 12
def __getitem__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
if not data:
raise KeyError
return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
def __setitem__(self, key, value):
assert len(key) == 32
assert len(key) == self.key_size
cdef int[3] data
data[0] = _htole32(value[0])
data[1] = _htole32(value[1])
@ -162,20 +166,20 @@ cdef class ChunkIndex(IndexBase):
raise Exception('hashindex_set failed')
def __contains__(self, key):
assert len(key) == 32
assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key)
return data != NULL
def iteritems(self, marker=None):
cdef const void *key
iter = ChunkKeyIterator()
iter = ChunkKeyIterator(self.key_size)
iter.idx = self
iter.index = self.index
if marker:
key = hashindex_get(self.index, <char *>marker)
if marker is None:
raise IndexError
iter.key = key - 32
iter.key = key - self.key_size
return iter
def summarize(self):
@ -188,9 +192,11 @@ cdef class ChunkKeyIterator:
cdef ChunkIndex idx
cdef HashIndex *index
cdef const void *key
cdef int key_size
def __cinit__(self):
def __cinit__(self, key_size):
self.key = NULL
self.key_size = key_size
def __iter__(self):
return self
@ -199,5 +205,5 @@ cdef class ChunkKeyIterator:
self.key = hashindex_next_key(self.index, <char *>self.key)
if not self.key:
raise StopIteration
cdef int *value = <int *>(self.key + 32)
return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
cdef int *value = <int *>(self.key + self.key_size)
return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))

View File

@ -82,18 +82,20 @@ def check_extension_modules():
class Manifest:
MANIFEST_ID = b'\0' * 32
def __init__(self, key, repository):
self.archives = {}
self.config = {}
self.key = key
self.repository = repository
@classmethod
def manifest_id(cls, repository):
return b'\0' * repository.key_size
@classmethod
def load(cls, repository, key=None):
from .key import key_factory
cdata = repository.get(cls.MANIFEST_ID)
cdata = repository.get(cls.manifest_id(repository))
if not key:
key = key_factory(repository, cdata)
manifest = cls(key, repository)
@ -118,7 +120,7 @@ class Manifest:
'config': self.config,
}))
self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
self.repository.put(self.manifest_id(self.repository), self.key.encrypt(data))
def list_archive_infos(self, sort_by=None, reverse=False):
# inexpensive Archive.list_archives replacement if we just need .name, .id, .ts

View File

@ -3,14 +3,33 @@ from getpass import getpass
import os
import msgpack
import textwrap
from collections import namedtuple
import hmac
from hashlib import sha256
from hashlib import sha1, sha256, sha512
import zlib
from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
try:
import lzma # python >= 3.3
except ImportError:
try:
from backports import lzma # backports.lzma from pypi
except ImportError:
lzma = None
try:
import blosc
except ImportError:
blosc = None
from .crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES_GCM_MODE, \
bytes_to_int, increment_iv, num_aes_blocks
from .helpers import IntegrityError, get_keys_dir, Error
PREFIX = b'\0' * 8
# TODO fix cyclic import:
#from .archive import CHUNK_MAX
CHUNK_MAX = 10 * 1024 * 1024
Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, iv, legacy')
class UnsupportedPayloadError(Error):
@ -22,47 +41,393 @@ class KeyfileNotFoundError(Error):
"""
class sha512_256(object): # note: can't subclass sha512
"""sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms"""
digestsize = digest_size = 32
block_size = 64
def __init__(self, data=None):
self.name = 'sha512-256'
self._h = sha512()
if data:
self.update(data)
def update(self, data):
self._h.update(data)
def digest(self):
return self._h.digest()[:self.digest_size]
def hexdigest(self):
return self._h.hexdigest()[:self.digest_size * 2]
def copy(self):
new = sha512_256.__new__(sha512_256)
new._h = self._h.copy()
return new
# HASH / MAC stuff below all has a mac-like interface, so it can be used in the same way.
# special case: hashes do not use keys (and thus, do not sign/authenticate)
class HASH: # note: we can't subclass sha1/sha256/sha512
TYPE = 0 # override in subclass
digest_size = 0 # override in subclass
hash_func = None # override in subclass
def __init__(self, key, data=b''):
# signature is like for a MAC, we ignore the key as this is a simple hash
if key is not None:
raise Exception("use a HMAC if you have a key")
self.h = self.hash_func(data)
def update(self, data):
self.h.update(data)
def digest(self):
return self.h.digest()
def hexdigest(self):
return self.h.hexdigest()
class SHA256(HASH):
TYPE = 0
digest_size = 32
hash_func = sha256
class SHA512_256(HASH):
TYPE = 1
digest_size = 32
hash_func = sha512_256
class GHASH:
TYPE = 2
digest_size = 16
def __init__(self, key, data=b''):
# signature is like for a MAC, we ignore the key as this is a simple hash
if key is not None:
raise Exception("use a MAC if you have a key")
self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=b'\0' * 32, iv=b'\0' * 16)
if data:
self.update(data)
def update(self, data):
# GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data
self.mac_cipher.add(bytes(data))
def digest(self):
hash, _ = self.mac_cipher.compute_mac_and_encrypt(b'')
return hash
class SHA1(HASH):
TYPE = 3
digest_size = 20
hash_func = sha1
class SHA512(HASH):
TYPE = 4
digest_size = 64
hash_func = sha512
class HMAC(hmac.HMAC):
"""Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
"""
TYPE = 0 # override in subclass
digest_size = 0 # override in subclass
hash_func = None # override in subclass
def __init__(self, key, data):
if key is None:
raise Exception("do not use HMAC if you don't have a key")
super().__init__(key, data, self.hash_func)
def update(self, msg):
# Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
self.inner.update(msg)
def key_creator(repository, args):
if args.encryption == 'keyfile':
return KeyfileKey.create(repository, args)
elif args.encryption == 'passphrase':
return PassphraseKey.create(repository, args)
else:
return PlaintextKey.create(repository, args)
class HMAC_SHA256(HMAC):
TYPE = 10
digest_size = 32
hash_func = sha256
def key_factory(repository, manifest_data):
if manifest_data[0] == KeyfileKey.TYPE:
return KeyfileKey.detect(repository, manifest_data)
elif manifest_data[0] == PassphraseKey.TYPE:
return PassphraseKey.detect(repository, manifest_data)
elif manifest_data[0] == PlaintextKey.TYPE:
return PlaintextKey.detect(repository, manifest_data)
else:
raise UnsupportedPayloadError(manifest_data[0])
class HMAC_SHA512_256(HMAC):
TYPE = 11
digest_size = 32
hash_func = sha512_256
class KeyBase:
class HMAC_SHA1(HMAC):
TYPE = 13
digest_size = 20
hash_func = sha1
class HMAC_SHA512(HMAC):
TYPE = 14
digest_size = 64
hash_func = sha512
class GMAC(GHASH):
TYPE = 20
digest_size = 16
def __init__(self, key, data=b''):
if key is None:
raise Exception("do not use GMAC if you don't have a key")
self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0' * 16)
if data:
self.update(data)
# defaults are optimized for speed on modern CPUs with AES hw support
HASH_DEFAULT = GHASH.TYPE
MAC_DEFAULT = GMAC.TYPE
# compressor classes, all same interface
class NullCompressor(object): # uses 0 in the mapping
TYPE = 0
def compress(self, data):
return bytes(data)
def decompress(self, data):
return bytes(data)
class ZlibCompressor(object): # uses 1..9 in the mapping
TYPE = 0
LEVELS = range(10)
def compress(self, data):
level = self.TYPE - ZlibCompressor.TYPE
return zlib.compress(data, level)
def decompress(self, data):
return zlib.decompress(data)
class LzmaCompressor(object): # uses 10..19 in the mapping
TYPE = 10
PRESETS = range(10)
def __init__(self):
self.TYPE_STR = bytes([self.TYPE])
if lzma is None:
raise NotImplemented("lzma compression needs Python >= 3.3 or backports.lzma from PyPi")
def compress(self, data):
preset = self.TYPE - LzmaCompressor.TYPE
return lzma.compress(data, preset=preset)
def decompress(self, data):
return lzma.decompress(data)
class BLOSCCompressor(object):
TYPE = 0 # override in subclass
LEVELS = range(10)
CNAME = '' # override in subclass
def __init__(self):
if blosc is None:
raise NotImplemented("%s compression needs blosc from PyPi" % self.CNAME)
if self.CNAME not in blosc.compressor_list():
raise NotImplemented("%s compression is not supported by blosc" % self.CNAME)
blosc.set_blocksize(16384) # 16kiB is the minimum, so 64kiB are enough for 4 threads
def _get_level(self):
raise NotImplemented
def compress(self, data):
return blosc.compress(bytes(data), 1, cname=self.CNAME, clevel=self._get_level())
def decompress(self, data):
return blosc.decompress(data)
class LZ4Compressor(BLOSCCompressor):
TYPE = 20
CNAME = 'lz4'
def _get_level(self):
return self.TYPE - LZ4Compressor.TYPE
class LZ4HCCompressor(BLOSCCompressor):
TYPE = 30
CNAME = 'lz4hc'
def _get_level(self):
return self.TYPE - LZ4HCCompressor.TYPE
class BLOSCLZCompressor(BLOSCCompressor):
TYPE = 40
CNAME = 'blosclz'
def _get_level(self):
return self.TYPE - BLOSCLZCompressor.TYPE
class SnappyCompressor(BLOSCCompressor):
TYPE = 50
CNAME = 'snappy'
def _get_level(self):
return self.TYPE - SnappyCompressor.TYPE
class BLOSCZlibCompressor(BLOSCCompressor):
TYPE = 60
CNAME = 'zlib'
def _get_level(self):
return self.TYPE - BLOSCZlibCompressor.TYPE
# default is optimized for speed
COMPR_DEFAULT = NullCompressor.TYPE # no compression
# ciphers - AEAD (authenticated encryption with assoc. data) style interface
# special case: PLAIN dummy does not encrypt / authenticate
class PLAIN:
TYPE = 0
enc_iv = None # dummy
def __init__(self, **kw):
pass
def compute_mac_and_encrypt(self, meta, data):
return None, data
def check_mac_and_decrypt(self, mac, meta, data):
return data
def get_aad(meta):
"""get additional authenticated data for AEAD ciphers"""
if meta.legacy:
# legacy format computed the mac over (iv_last8 + data)
return meta.iv[8:]
else:
return msgpack.packb(meta)
class AES_CTR_HMAC:
TYPE = 1
def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, enc_hmac_key=b'\0' * 32, **kw):
self.hmac_key = enc_hmac_key
self.enc_iv = enc_iv
self.enc_cipher = AES(mode=AES_CTR_MODE, is_encrypt=True, key=enc_key, iv=enc_iv)
self.dec_cipher = AES(mode=AES_CTR_MODE, is_encrypt=False, key=enc_key)
def compute_mac_and_encrypt(self, meta, data):
self.enc_cipher.reset(iv=meta.iv)
_, data = self.enc_cipher.compute_mac_and_encrypt(data)
self.enc_iv = increment_iv(meta.iv, len(data))
aad = get_aad(meta)
mac = HMAC_SHA256(self.hmac_key, aad + data).digest() # XXX mac / hash flexibility
return mac, data
def check_mac_and_decrypt(self, mac, meta, data):
aad = get_aad(meta)
if HMAC_SHA256(self.hmac_key, aad + data).digest() != mac: # XXX mac / hash flexibility
raise IntegrityError('Encryption envelope checksum mismatch')
self.dec_cipher.reset(iv=meta.iv)
data = self.dec_cipher.check_mac_and_decrypt(None, data)
return data
class AES_GCM:
TYPE = 2
def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, **kw):
# note: hmac_key is not used for aes-gcm, it does aes+gmac in 1 pass
self.enc_iv = enc_iv
self.enc_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=enc_key, iv=enc_iv)
self.dec_cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=enc_key)
def compute_mac_and_encrypt(self, meta, data):
self.enc_cipher.reset(iv=meta.iv)
aad = get_aad(meta)
self.enc_cipher.add(aad)
mac, data = self.enc_cipher.compute_mac_and_encrypt(data)
self.enc_iv = increment_iv(meta.iv, len(data))
return mac, data
def check_mac_and_decrypt(self, mac, meta, data):
self.dec_cipher.reset(iv=meta.iv)
aad = get_aad(meta)
self.dec_cipher.add(aad)
try:
data = self.dec_cipher.check_mac_and_decrypt(mac, data)
except Exception:
raise IntegrityError('Encryption envelope checksum mismatch')
return data
# cipher default is optimized for speed on modern CPUs with AES hw support
PLAIN_DEFAULT = PLAIN.TYPE
CIPHER_DEFAULT = AES_GCM.TYPE
# misc. types of keys
# special case: no keys (thus: no encryption, no signing/authentication)
class KeyBase(object):
TYPE = 0x00 # override in derived classes
def __init__(self, compressor_cls, maccer_cls, cipher_cls):
self.compressor = compressor_cls()
self.maccer_cls = maccer_cls # hasher/maccer used by id_hash
self.cipher_cls = cipher_cls # plaintext dummy or AEAD cipher
self.cipher = cipher_cls()
self.id_key = None
def id_hash(self, data):
"""Return HMAC hash using the "id" HMAC key
"""Return a HASH (no id_key) or a MAC (using the "id_key" key)
XXX do we need a cryptographic hash function here or is a keyed hash
function like GMAC / GHASH good enough? See NIST SP 800-38D.
IMPORTANT: in 1 repo, there should be only 1 kind of id_hash, otherwise
data hashed/maced with one id_hash might result in same ID as already
exists in the repo for other data created with another id_hash method.
somehow unlikely considering 128 or 256bits, but still.
"""
return self.maccer_cls(self.id_key, data).digest()
def encrypt(self, data):
pass
data = self.compressor.compress(data)
meta = Meta(compr_type=self.compressor.TYPE, key_type=self.TYPE,
mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE,
iv=self.cipher.enc_iv, legacy=False)
mac, data = self.cipher.compute_mac_and_encrypt(meta, data)
return generate(mac, meta, data)
def decrypt(self, id, data):
pass
mac, meta, data = parser(data)
compressor, keyer, maccer, cipher = get_implementations(meta)
assert isinstance(self, keyer)
assert self.maccer_cls is maccer
assert self.cipher_cls is cipher
data = self.cipher.check_mac_and_decrypt(mac, meta, data)
data = self.compressor.decompress(data)
if id and self.id_hash(data) != id:
raise IntegrityError('Chunk id verification failed')
return data
class PlaintextKey(KeyBase):
@ -73,71 +438,34 @@ class PlaintextKey(KeyBase):
@classmethod
def create(cls, repository, args):
print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.')
return cls()
compressor = compressor_creator(args)
maccer = maccer_creator(args, cls)
cipher = cipher_creator(args, cls)
return cls(compressor, maccer, cipher)
@classmethod
def detect(cls, repository, manifest_data):
return cls()
def id_hash(self, data):
return sha256(data).digest()
def encrypt(self, data):
return b''.join([self.TYPE_STR, zlib.compress(data)])
def decrypt(self, id, data):
if data[0] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
data = zlib.decompress(memoryview(data)[1:])
if id and sha256(data).digest() != id:
raise IntegrityError('Chunk id verification failed')
return data
mac, meta, data = parser(manifest_data)
compressor, keyer, maccer, cipher = get_implementations(meta)
return cls(compressor, maccer, cipher)
class AESKeyBase(KeyBase):
"""Common base class shared by KeyfileKey and PassphraseKey
Chunks are encrypted using 256bit AES in Counter Mode (CTR)
Chunks are encrypted using 256bit AES in CTR or GCM mode.
Chunks are authenticated by a GCM GMAC or a HMAC.
Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT
Payload layout: TYPE(1) + MAC(32) + NONCE(8) + CIPHERTEXT
To reduce payload size only 8 bytes of the 16 bytes nonce is saved
in the payload, the first 8 bytes are always zeros. This does not
affect security but limits the maximum repository capacity to
only 295 exabytes!
"""
PAYLOAD_OVERHEAD = 1 + 32 + 8 # TYPE + HMAC + NONCE
def id_hash(self, data):
"""Return HMAC hash using the "id" HMAC key
"""
return HMAC(self.id_key, data, sha256).digest()
def encrypt(self, data):
data = zlib.compress(data)
self.enc_cipher.reset()
data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
return b''.join((self.TYPE_STR, hmac, data))
def decrypt(self, id, data):
if data[0] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
hmac = memoryview(data)[1:33]
if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
raise IntegrityError('Encryption envelope checksum mismatch')
self.dec_cipher.reset(iv=PREFIX + data[33:41])
data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview
if id and HMAC(self.id_key, data, sha256).digest() != id:
raise IntegrityError('Chunk id verification failed')
return data
def extract_nonce(self, payload):
if payload[0] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
nonce = bytes_to_long(payload[33:41])
return nonce
def extract_iv(self, payload):
_, meta, _ = parser(payload)
return meta.iv
def init_from_random_data(self, data):
self.enc_key = data[0:32]
@ -148,9 +476,13 @@ class AESKeyBase(KeyBase):
if self.chunk_seed & 0x80000000:
self.chunk_seed = self.chunk_seed - 0xffffffff - 1
def init_ciphers(self, enc_iv=b''):
self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv)
self.dec_cipher = AES(is_encrypt=False, key=self.enc_key)
def init_ciphers(self, enc_iv=b'\0' * 16):
self.cipher = self.cipher_cls(enc_key=self.enc_key, enc_iv=enc_iv,
enc_hmac_key=self.enc_hmac_key)
@property
def enc_iv(self):
return self.cipher.enc_iv
class PassphraseKey(AESKeyBase):
@ -159,7 +491,10 @@ class PassphraseKey(AESKeyBase):
@classmethod
def create(cls, repository, args):
key = cls()
compressor = compressor_creator(args)
maccer = maccer_creator(args, cls)
cipher = cipher_creator(args, cls)
key = cls(compressor, maccer, cipher)
passphrase = os.environ.get('BORG_PASSPHRASE')
if passphrase is not None:
passphrase2 = passphrase
@ -181,7 +516,9 @@ class PassphraseKey(AESKeyBase):
@classmethod
def detect(cls, repository, manifest_data):
prompt = 'Enter passphrase for %s: ' % repository._location.orig
key = cls()
mac, meta, data = parser(manifest_data)
compressor, keyer, maccer, cipher = get_implementations(meta)
key = cls(compressor, maccer, cipher)
passphrase = os.environ.get('BORG_PASSPHRASE')
if passphrase is None:
passphrase = getpass(prompt)
@ -189,8 +526,7 @@ class PassphraseKey(AESKeyBase):
key.init(repository, passphrase)
try:
key.decrypt(None, manifest_data)
num_blocks = num_aes_blocks(len(manifest_data) - 41)
key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data)))
return key
except IntegrityError:
passphrase = getpass(prompt)
@ -212,14 +548,15 @@ class KeyfileKey(AESKeyBase):
@classmethod
def detect(cls, repository, manifest_data):
key = cls()
mac, meta, data = parser(manifest_data)
compressor, keyer, maccer, cipher = get_implementations(meta)
key = cls(compressor, maccer, cipher)
path = cls.find_key_file(repository)
prompt = 'Enter passphrase for key file %s: ' % path
passphrase = os.environ.get('BORG_PASSPHRASE', '')
while not key.load(path, passphrase):
passphrase = getpass(prompt)
num_blocks = num_aes_blocks(len(manifest_data) - 41)
key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data)))
return key
@classmethod
@ -254,25 +591,27 @@ class KeyfileKey(AESKeyBase):
def decrypt_key_file(self, data, passphrase):
d = msgpack.unpackb(data)
assert d[b'version'] == 1
assert d[b'algorithm'] == b'sha256'
assert d[b'algorithm'] == b'gmac'
key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32)
data = AES(is_encrypt=False, key=key).decrypt(d[b'data'])
if HMAC(key, data, sha256).digest() != d[b'hash']:
try:
cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=b'\0'*16)
data = cipher.check_mac_and_decrypt(d[b'hash'], d[b'data'])
return data
except Exception:
return None
return data
def encrypt_key_file(self, data, passphrase):
salt = get_random_bytes(32)
iterations = 100000
key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32)
hash = HMAC(key, data, sha256).digest()
cdata = AES(is_encrypt=True, key=key).encrypt(data)
cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0'*16)
mac, cdata = cipher.compute_mac_and_encrypt(data)
d = {
'version': 1,
'salt': salt,
'iterations': iterations,
'algorithm': 'sha256',
'hash': hash,
'algorithm': 'gmac',
'hash': mac,
'data': cdata,
}
return msgpack.packb(d)
@ -321,7 +660,10 @@ class KeyfileKey(AESKeyBase):
passphrase2 = getpass('Enter same passphrase again: ')
if passphrase != passphrase2:
print('Passphrases do not match')
key = cls()
compressor = compressor_creator(args)
maccer = maccer_creator(args, cls)
cipher = cipher_creator(args, cls)
key = cls(compressor, maccer, cipher)
key.repository_id = repository.id
key.init_from_random_data(get_random_bytes(100))
key.init_ciphers()
@ -329,3 +671,213 @@ class KeyfileKey(AESKeyBase):
print('Key file "%s" created.' % key.path)
print('Keep this file safe. Your data will be inaccessible without it.')
return key
# note: key 0 nicely maps to a zlib compressor with level 0 which means "no compression"
compressor_mapping = {}
for level in ZlibCompressor.LEVELS:
compressor_mapping[ZlibCompressor.TYPE + level] = \
type('ZlibCompressorLevel%d' % level, (ZlibCompressor, ), dict(TYPE=ZlibCompressor.TYPE + level))
for preset in LzmaCompressor.PRESETS:
compressor_mapping[LzmaCompressor.TYPE + preset] = \
type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset))
for level in LZ4Compressor.LEVELS:
compressor_mapping[LZ4Compressor.TYPE + level] = \
type('LZ4CompressorLevel%d' % level, (LZ4Compressor, ), dict(TYPE=LZ4Compressor.TYPE + level))
for level in LZ4HCCompressor.LEVELS:
compressor_mapping[LZ4HCCompressor.TYPE + level] = \
type('LZ4HCCompressorLevel%d' % level, (LZ4HCCompressor, ), dict(TYPE=LZ4HCCompressor.TYPE + level))
for level in BLOSCLZCompressor.LEVELS:
compressor_mapping[BLOSCLZCompressor.TYPE + level] = \
type('BLOSCLZCompressorLevel%d' % level, (BLOSCLZCompressor, ), dict(TYPE=BLOSCLZCompressor.TYPE + level))
for level in SnappyCompressor.LEVELS:
compressor_mapping[SnappyCompressor.TYPE + level] = \
type('SnappyCompressorLevel%d' % level, (SnappyCompressor, ), dict(TYPE=SnappyCompressor.TYPE + level))
for level in BLOSCZlibCompressor.LEVELS:
compressor_mapping[BLOSCZlibCompressor.TYPE + level] = \
type('BLOSCZlibCompressorLevel%d' % level, (BLOSCZlibCompressor, ), dict(TYPE=BLOSCZlibCompressor.TYPE + level))
# overwrite 0 with NullCompressor
compressor_mapping[NullCompressor.TYPE] = NullCompressor
keyer_mapping = {
KeyfileKey.TYPE: KeyfileKey,
PassphraseKey.TYPE: PassphraseKey,
PlaintextKey.TYPE: PlaintextKey,
}
maccer_mapping = {
# simple hashes, not MACs (but MAC-like class __init__ method signature):
SHA1.TYPE: SHA1,
SHA256.TYPE: SHA256,
SHA512_256.TYPE: SHA512_256,
SHA512.TYPE: SHA512,
GHASH.TYPE: GHASH,
# MACs:
HMAC_SHA1.TYPE: HMAC_SHA1,
HMAC_SHA256.TYPE: HMAC_SHA256,
HMAC_SHA512_256.TYPE: HMAC_SHA512_256,
HMAC_SHA512.TYPE: HMAC_SHA512,
GMAC.TYPE: GMAC,
}
cipher_mapping = {
# no cipher (but cipher-like class __init__ method signature):
PLAIN.TYPE: PLAIN,
# AEAD cipher implementations
AES_CTR_HMAC.TYPE: AES_CTR_HMAC,
AES_GCM.TYPE: AES_GCM,
}
def get_implementations(meta):
try:
compressor = compressor_mapping[meta.compr_type]
keyer = keyer_mapping[meta.key_type]
maccer = maccer_mapping[meta.mac_type]
cipher = cipher_mapping[meta.cipher_type]
except KeyError:
raise UnsupportedPayloadError("compr_type %x key_type %x mac_type %x cipher_type %x" % (
meta.compr_type, meta.key_type, meta.mac_type, meta.cipher_type))
return compressor, keyer, maccer, cipher
def legacy_parser(all_data, key_type): # all rather hardcoded
"""
Payload layout:
no encryption: TYPE(1) + data
with encryption: TYPE(1) + HMAC(32) + NONCE(8) + data
data is compressed with zlib level 6 and (in the 2nd case) encrypted.
To reduce payload size only 8 bytes of the 16 bytes nonce is saved
in the payload, the first 8 bytes are always zeros. This does not
affect security but limits the maximum repository capacity to
only 295 exabytes!
"""
offset = 1
if key_type == PlaintextKey.TYPE:
mac_type = SHA256.TYPE
mac = None
cipher_type = PLAIN.TYPE
iv = None
data = all_data[offset:]
else:
mac_type = HMAC_SHA256.TYPE
mac = all_data[offset:offset+32]
cipher_type = AES_CTR_HMAC.TYPE
# legacy attic did not store the full IV on disk, as the upper 8 bytes
# are expected to be zero anyway as the full IV is a 128bit counter.
iv = b'\0' * 8 + all_data[offset+32:offset+40]
data = all_data[offset+40:]
meta = Meta(compr_type=6, key_type=key_type, mac_type=mac_type,
cipher_type=cipher_type, iv=iv, legacy=True)
return mac, meta, data
def parser00(all_data):
return legacy_parser(all_data, KeyfileKey.TYPE)
def parser01(all_data):
return legacy_parser(all_data, PassphraseKey.TYPE)
def parser02(all_data):
return legacy_parser(all_data, PlaintextKey.TYPE)
def parser03(all_data): # new & flexible
"""
Payload layout:
always: TYPE(1) + MSGPACK((mac, meta, data))
meta is a Meta namedtuple and contains all required information about data.
data is maybe compressed (see meta) and maybe encrypted (see meta).
"""
unpacker = msgpack.Unpacker(
use_list=False,
# avoid memory allocation issues causes by tampered input data.
max_buffer_size=CHUNK_MAX + 1000, # does not work in 0.4.6 unpackb C implementation
max_array_len=10, # meta_tuple
max_bin_len=CHUNK_MAX, # data
max_str_len=0, # not used yet
max_map_len=0, # not used yet
max_ext_len=0, # not used yet
)
unpacker.feed(all_data[1:])
mac, meta_tuple, data = unpacker.unpack()
meta = Meta(*meta_tuple)
return mac, meta, data
def parser(data):
parser_mapping = {
0x00: parser00,
0x01: parser01,
0x02: parser02,
0x03: parser03,
}
header_type = data[0]
parser_func = parser_mapping[header_type]
return parser_func(data)
def key_factory(repository, manifest_data):
mac, meta, data = parser(manifest_data)
compressor, keyer, maccer, cipher = get_implementations(meta)
return keyer.detect(repository, manifest_data)
def generate(mac, meta, data):
# always create new-style 0x03 format
return b'\x03' + msgpack.packb((mac, meta, data), use_bin_type=True)
def compressor_creator(args):
# args == None is used by unit tests
compression = COMPR_DEFAULT if args is None else args.compression
compressor = compressor_mapping.get(compression)
if compressor is None:
raise NotImplementedError("no compression %d" % args.compression)
return compressor
def key_creator(args):
if args.encryption == 'keyfile':
return KeyfileKey
if args.encryption == 'passphrase':
return PassphraseKey
if args.encryption == 'none':
return PlaintextKey
raise NotImplemented("no encryption %s" % args.encryption)
def maccer_creator(args, key_cls):
# args == None is used by unit tests
mac = None if args is None else args.mac
if mac is None:
if key_cls is PlaintextKey:
mac = HASH_DEFAULT
elif key_cls in (KeyfileKey, PassphraseKey):
mac = MAC_DEFAULT
else:
raise NotImplementedError("unknown key class")
maccer = maccer_mapping.get(mac)
if maccer is None:
raise NotImplementedError("no mac %d" % args.mac)
return maccer
def cipher_creator(args, key_cls):
# args == None is used by unit tests
cipher = None if args is None else args.cipher
if cipher is None:
if key_cls is PlaintextKey:
cipher = PLAIN_DEFAULT
elif key_cls in (KeyfileKey, PassphraseKey):
cipher = CIPHER_DEFAULT
else:
raise NotImplementedError("unknown key class")
cipher = cipher_mapping.get(cipher)
if cipher is None:
raise NotImplementedError("no cipher %d" % args.cipher)
return cipher

View File

@ -89,7 +89,7 @@ class RepositoryServer:
def negotiate(self, versions):
return 1
def open(self, path, create=False):
def open(self, path, create=False, key_size=None):
path = os.fsdecode(path)
if path.startswith('/~'):
path = path[1:]
@ -100,8 +100,8 @@ class RepositoryServer:
break
else:
raise PathNotAllowed(path)
self.repository = Repository(path, create)
return self.repository.id
self.repository = Repository(path, create, key_size=key_size)
return self.repository.id, self.repository.key_size
class RemoteRepository:
@ -112,7 +112,7 @@ class RemoteRepository:
def __init__(self, name):
self.name = name
def __init__(self, location, create=False):
def __init__(self, location, create=False, key_size=None):
self.location = location
self.preload_ids = []
self.msgid = 0
@ -144,7 +144,7 @@ class RemoteRepository:
version = self.call('negotiate', 1)
if version != 1:
raise Exception('Server insisted on using unsupported protocol version %d' % version)
self.id = self.call('open', location.path, create)
self.id, self.key_size = self.call('open', location.path, create, key_size)
def __del__(self):
self.close()
@ -303,7 +303,8 @@ class RepositoryCache:
def initialize(self):
self.tmppath = tempfile.mkdtemp()
self.index = NSIndex()
self.key_size = self.repository.key_size
self.index = NSIndex(key_size=self.key_size)
self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
def cleanup(self):

View File

@ -47,22 +47,23 @@ class Repository:
class ObjectNotFound(Error):
"""Object with key {} not found in repository {}."""
def __init__(self, path, create=False, exclusive=False):
def __init__(self, path, create=False, exclusive=False, key_size=None):
self.path = path
self.io = None
self.lock = None
self.index = None
self._active_txn = False
if create:
self.create(path)
self.create(path, key_size)
self.open(path, exclusive)
def __del__(self):
self.close()
def create(self, path):
def create(self, path, key_size):
"""Create a new empty repository at `path`
"""
assert key_size is not None
if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
raise self.AlreadyExists(path)
if not os.path.exists(path):
@ -75,6 +76,7 @@ class Repository:
config.set('repository', 'version', '1')
config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
config.set('repository', 'key_size', key_size)
config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
with open(os.path.join(path, 'config'), 'w') as fd:
config.write(fd)
@ -117,10 +119,12 @@ class Repository:
if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
raise self.InvalidRepository(path)
self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive)
# legacy attic repositories always have key size 32B (256b)
self.key_size = self.config.getint('repository', 'key_size', fallback=32)
self.max_segment_size = self.config.getint('repository', 'max_segment_size')
self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
self.id = unhexlify(self.config.get('repository', 'id').strip())
self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir, self.key_size)
def close(self):
if self.lock:
@ -140,8 +144,9 @@ class Repository:
def open_index(self, transaction_id):
if transaction_id is None:
return NSIndex()
return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'))
return NSIndex(key_size=self.key_size)
return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'),
key_size=self.key_size)
def prepare_txn(self, transaction_id, do_cleanup=True):
self._active_txn = True
@ -397,8 +402,6 @@ class LoggedIO:
header_fmt = struct.Struct('<IIB')
assert header_fmt.size == 9
put_header_fmt = struct.Struct('<IIB32s')
assert put_header_fmt.size == 41
header_no_crc_fmt = struct.Struct('<IB')
assert header_no_crc_fmt.size == 5
crc_fmt = struct.Struct('<I')
@ -407,13 +410,16 @@ class LoggedIO:
_commit = header_no_crc_fmt.pack(9, TAG_COMMIT)
COMMIT = crc_fmt.pack(crc32(_commit)) + _commit
def __init__(self, path, limit, segments_per_dir, capacity=90):
def __init__(self, path, limit, segments_per_dir, key_size, capacity=90):
self.path = path
self.fds = LRUCache(capacity)
self.segment = 0
self.limit = limit
self.segments_per_dir = segments_per_dir
self.key_size = key_size
self.offset = 0
self.put_header_fmt = struct.Struct('<IIB%ds' % key_size)
assert self.put_header_fmt.size == self.header_fmt.size + key_size
self._write_fd = None
def close(self):
@ -519,9 +525,9 @@ class LoggedIO:
raise IntegrityError('Invalid segment entry header')
key = None
if tag in (TAG_PUT, TAG_DELETE):
key = rest[:32]
key = rest[:self.key_size]
if include_data:
yield tag, key, offset, rest[32:]
yield tag, key, offset, rest[self.key_size:]
else:
yield tag, key, offset
offset += size

View File

@ -3,7 +3,7 @@ from datetime import datetime, timezone
import msgpack
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
from ..key import PlaintextKey
from ..key import PlaintextKey, COMPR_DEFAULT
from ..helpers import Manifest
from . import BaseTestCase
from .mock import Mock
@ -21,9 +21,15 @@ class MockCache:
class ArchiveTimestampTestCase(BaseTestCase):
class MockArgs(object):
repository = None
compression = COMPR_DEFAULT
mac = None
cipher = None
def _test_timestamp_parsing(self, isoformat, expected):
repository = Mock()
key = PlaintextKey()
key = PlaintextKey.create(None, self.MockArgs())
manifest = Manifest(repository, key)
a = Archive(repository, key, manifest, 'test', create=True)
a.metadata = {b'time': isoformat}
@ -42,10 +48,16 @@ class ArchiveTimestampTestCase(BaseTestCase):
class ChunkBufferTestCase(BaseTestCase):
class MockArgs(object):
repository = None
compression = COMPR_DEFAULT
mac = None
cipher = None
def test(self):
data = [{b'foo': 1}, {b'bar': 2}]
cache = MockCache()
key = PlaintextKey()
key = PlaintextKey.create(None, self.MockArgs())
chunks = CacheChunkBuffer(cache, key, None)
for d in data:
chunks.add(d)

View File

@ -15,8 +15,9 @@ from .. import xattr
from ..archive import Archive, ChunkBuffer, CHUNK_MAX
from ..archiver import Archiver
from ..cache import Cache
from ..crypto import bytes_to_long, num_aes_blocks
from ..crypto import bytes16_to_int, num_aes_blocks
from ..helpers import Manifest
from ..key import parser
from ..remote import RemoteRepository, PathNotAllowed
from ..repository import Repository
from . import BaseTestCase
@ -496,8 +497,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
hash = sha256(data).digest()
if hash not in seen:
seen.add(hash)
num_blocks = num_aes_blocks(len(data) - 41)
nonce = bytes_to_long(data[33:41])
mac, meta, data = parser(data)
num_blocks = num_aes_blocks(len(data))
nonce = bytes16_to_int(meta.iv)
for counter in range(nonce, nonce + num_blocks):
self.assert_not_in(counter, used)
used.add(counter)
@ -576,7 +578,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
def test_missing_manifest(self):
archive, repository = self.open_archive('archive1')
repository.delete(Manifest.MANIFEST_ID)
repository.delete(Manifest.manifest_id(repository))
repository.commit()
self.cmd('check', self.repository_location, exit_code=1)
output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
@ -587,7 +589,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
def test_extra_chunks(self):
self.cmd('check', self.repository_location, exit_code=0)
repository = Repository(self.repository_location)
repository.put(b'01234567890123456789012345678901', b'xxxx')
repository.put(b'0123456789012345', b'xxxx')
repository.commit()
repository.close()
self.cmd('check', self.repository_location, exit_code=1)

View File

@ -1,6 +1,7 @@
from binascii import hexlify
from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes
from ..crypto import AES, AES_GCM_MODE, AES_CTR_MODE, pbkdf2_sha256, get_random_bytes, \
bytes_to_int, bytes16_to_int, int_to_bytes16, increment_iv
from . import BaseTestCase
@ -9,9 +10,27 @@ class CryptoTestCase(BaseTestCase):
def test_bytes_to_int(self):
self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1)
def test_bytes_to_long(self):
self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1)
self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1')
def test_bytes16_to_int(self):
i, b = 1, b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'
self.assert_equal(bytes16_to_int(b), i)
self.assert_equal(int_to_bytes16(i), b)
i, b = (1 << 64) + 2, b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\2'
self.assert_equal(bytes16_to_int(b), i)
self.assert_equal(int_to_bytes16(i), b)
def test_increment_iv(self):
tests = [
# iv, amount, iv_expected
(0, 0, 0),
(0, 15, 1),
(0, 16, 1),
(0, 17, 2),
(0xffffffffffffffff, 32, 0x10000000000000001),
]
for iv, amount, iv_expected in tests:
iv = int_to_bytes16(iv)
iv_expected = int_to_bytes16(iv_expected)
self.assert_equal(increment_iv(iv, amount), iv_expected)
def test_pbkdf2_sha256(self):
self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)),
@ -28,18 +47,33 @@ class CryptoTestCase(BaseTestCase):
self.assert_equal(len(bytes2), 10)
self.assert_not_equal(bytes, bytes2)
def test_aes(self):
def test_aes_ctr(self):
key = b'X' * 32
iv = b'\0' * 16
data = b'foo' * 10
# encrypt
aes = AES(is_encrypt=True, key=key)
self.assert_equal(bytes_to_long(aes.iv, 8), 0)
cdata = aes.encrypt(data)
aes = AES(mode=AES_CTR_MODE, is_encrypt=True, key=key, iv=iv)
_, cdata = aes.compute_mac_and_encrypt(data)
self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466')
self.assert_equal(bytes_to_long(aes.iv, 8), 2)
# decrypt
aes = AES(is_encrypt=False, key=key)
self.assert_equal(bytes_to_long(aes.iv, 8), 0)
pdata = aes.decrypt(cdata)
# decrypt (correct mac/cdata)
aes = AES(mode=AES_CTR_MODE, is_encrypt=False, key=key, iv=iv)
pdata = aes.check_mac_and_decrypt(None, cdata)
self.assert_equal(data, pdata)
self.assert_equal(bytes_to_long(aes.iv, 8), 2)
def test_aes_gcm(self):
key = b'X' * 32
iv = b'A' * 16
data = b'foo' * 10
# encrypt
aes = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=iv)
mac, cdata = aes.compute_mac_and_encrypt(data)
self.assert_equal(hexlify(mac), b'c98aa10eb6b7031bcc2160878d9438fb')
self.assert_equal(hexlify(cdata), b'841bcce405df769d22ee9f7f012edf5dc7fb2594d924c7400ffd050f2741')
# decrypt (correct mac/cdata)
aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv)
pdata = aes.check_mac_and_decrypt(mac, cdata)
self.assert_equal(data, pdata)
# decrypt (incorrect mac/cdata)
aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv)
cdata = b'x' + cdata[1:] # corrupt cdata
self.assertRaises(Exception, aes.check_mac_and_decrypt, mac, cdata)

View File

@ -9,7 +9,7 @@ from . import BaseTestCase
class HashIndexTestCase(BaseTestCase):
def _generic_test(self, cls, make_value, sha):
idx = cls()
idx = cls(key_size=32)
self.assert_equal(len(idx), 0)
# Test set
for x in range(100):
@ -34,7 +34,7 @@ class HashIndexTestCase(BaseTestCase):
with open(idx_name.name, 'rb') as fd:
self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
# Make sure we can open the file
idx = cls.read(idx_name.name)
idx = cls.read(idx_name.name, key_size=32)
self.assert_equal(len(idx), 50)
for x in range(50, 100):
self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
@ -42,7 +42,7 @@ class HashIndexTestCase(BaseTestCase):
self.assert_equal(len(idx), 0)
idx.write(idx_name.name)
del idx
self.assert_equal(len(cls.read(idx_name.name)), 0)
self.assert_equal(len(cls.read(idx_name.name, key_size=32)), 0)
def test_nsindex(self):
self._generic_test(NSIndex, lambda x: (x, x),
@ -55,7 +55,7 @@ class HashIndexTestCase(BaseTestCase):
def test_resize(self):
n = 2000 # Must be >= MIN_BUCKETS
idx_name = tempfile.NamedTemporaryFile()
idx = NSIndex()
idx = NSIndex(key_size=32)
idx.write(idx_name.name)
initial_size = os.path.getsize(idx_name.name)
self.assert_equal(len(idx), 0)
@ -70,7 +70,7 @@ class HashIndexTestCase(BaseTestCase):
self.assert_equal(initial_size, os.path.getsize(idx_name.name))
def test_iteritems(self):
idx = NSIndex()
idx = NSIndex(key_size=32)
for x in range(100):
idx[bytes('%-0.32d' % x, 'ascii')] = x, x
all = list(idx.iteritems())

View File

@ -4,8 +4,7 @@ import shutil
import tempfile
from binascii import hexlify
from ..crypto import bytes_to_long, num_aes_blocks
from ..key import PlaintextKey, PassphraseKey, KeyfileKey
from ..key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT, increment_iv
from ..helpers import Location, unhexlify
from . import BaseTestCase
@ -14,22 +13,26 @@ class KeyTestCase(BaseTestCase):
class MockArgs:
repository = Location(tempfile.mkstemp()[1])
compression = COMPR_DEFAULT
mac = None
cipher = None
keyfile2_key_file = """
BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000
hqppdGVyYXRpb25zzgABhqCkaGFzaNoAIMyonNI+7Cjv0qHi0AOBM6bLGxACJhfgzVD2oq
bIS9SFqWFsZ29yaXRobaZzaGEyNTakc2FsdNoAINNK5qqJc1JWSUjACwFEWGTdM7Nd0a5l
1uBGPEb+9XM9p3ZlcnNpb24BpGRhdGHaANAYDT5yfPpU099oBJwMomsxouKyx/OG4QIXK2
hQCG2L2L/9PUu4WIuKvGrsXoP7syemujNfcZws5jLp2UPva4PkQhQsrF1RYDEMLh2eF9Ol
rwtkThq1tnh7KjWMG9Ijt7/aoQtq0zDYP/xaFF8XXSJxiyP5zjH5+spB6RL0oQHvbsliSh
/cXJq7jrqmrJ1phd6dg4SHAM/i+hubadZoS6m25OQzYAW09wZD/phG8OVa698Z5ed3HTaT
SmrtgJL3EoOKgUI9d6BLE4dJdBqntifo""".strip()
BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000
hqRzYWx02gAgA1l4jfyv22y6U/mxxDT8HodSWAcX0g3nOESrQcNnBsundmVyc2lvbgGqaX
RlcmF0aW9uc84AAYagqWFsZ29yaXRobaRnbWFjpGhhc2iw7eaB54JssAOnM1S4S9CeTaRk
YXRh2gDQzmuyg3iYjMeTLObY+ybI+QfngB+5mmHeEAfBa42fuEZgqM3rYyMj2XfgvamF+O
0asvhEyy9om190FaOxQ4RiiTMNqSP0FKLmd1i5ZyDMfRyp7JbscRFs9Ryk28yXWkv0MgQy
EAYlaycY+6lWdRSgEPxidyPl9t9dr2AI/UuiQytwqmcmXgWD6Px6wgpOS/4AcRmEvDqIIl
Rc2xsu+RevGAxk5rnrIIRPr7WB5R2cinzEn9ylDgBDt9LZbq706ELgtwVTnjWB8FBTPwVI
vLTTXQ==
""".strip()
keyfile2_cdata = unhexlify(re.sub('\W', '', """
0055f161493fcfc16276e8c31493c4641e1eb19a79d0326fad0291e5a9c98e5933
00000000000003e8d21eaf9b86c297a8cd56432e1915bb
0393c4102e5ce8f5e9477c9e4ce2de453121aa139600001402c41000000000000000000000000000000000
c2c407b0147a64a379d1
"""))
keyfile2_id = unhexlify('c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314')
keyfile2_id = unhexlify('dd9451069663931c8abd85452d016733')
def setUp(self):
self.tmppath = tempfile.mkdtemp()
@ -45,25 +48,36 @@ class KeyTestCase(BaseTestCase):
_location = _Location()
id = bytes(32)
def _test_make_testdata(self):
# modify tearDown to not kill the key file first, before using this
os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
print("keyfile2_key_file: find the it in the filesystem, see location in test log output")
print("keyfile2_cdata:", hexlify(key.encrypt(b'payload')))
print("keyfile2_id:", hexlify(key.id_hash(b'payload')))
assert False
def test_plaintext(self):
key = PlaintextKey.create(None, None)
key = PlaintextKey.create(None, self.MockArgs())
data = b'foo'
self.assert_equal(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae')
self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb3')
self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
def test_keyfile(self):
os.environ['BORG_PASSPHRASE'] = 'test'
key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
self.assert_equal(key.enc_iv, b'\0'*16)
manifest = key.encrypt(b'XXX')
self.assert_equal(key.extract_nonce(manifest), 0)
self.assert_equal(key.extract_iv(manifest), b'\0'*16)
manifest2 = key.encrypt(b'XXX')
self.assert_not_equal(manifest, manifest2)
self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2))
self.assert_equal(key.extract_nonce(manifest2), 1)
iv = key.extract_nonce(manifest)
self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01')
iv = key.extract_iv(manifest)
key2 = KeyfileKey.detect(self.MockRepository(), manifest)
self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD))
# we assume that the payload fits into one 16B AES block (which is given for b'XXX').
iv_plus_1 = increment_iv(iv, 16)
self.assert_equal(key2.enc_iv, iv_plus_1)
# Key data sanity check
self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3)
self.assert_equal(key2.chunk_seed == 0, False)
@ -79,25 +93,28 @@ class KeyTestCase(BaseTestCase):
def test_passphrase(self):
os.environ['BORG_PASSPHRASE'] = 'test'
key = PassphraseKey.create(self.MockRepository(), None)
self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
key = PassphraseKey.create(self.MockRepository(), self.MockArgs())
# XXX self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
self.assert_equal(key.enc_iv, b'\0'*16)
self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6')
self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901')
self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a')
self.assert_equal(key.chunk_seed, -775740477)
manifest = key.encrypt(b'XXX')
self.assert_equal(key.extract_nonce(manifest), 0)
self.assert_equal(key.extract_iv(manifest), b'\0'*16)
manifest2 = key.encrypt(b'XXX')
self.assert_not_equal(manifest, manifest2)
self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2))
self.assert_equal(key.extract_nonce(manifest2), 1)
iv = key.extract_nonce(manifest)
self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01')
iv = key.extract_iv(manifest)
key2 = PassphraseKey.detect(self.MockRepository(), manifest)
self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD))
# we assume that the payload fits into one 16B AES block (which is given for b'XXX').
iv_plus_1 = increment_iv(iv, 16)
self.assert_equal(key2.enc_iv, iv_plus_1)
self.assert_equal(key.id_key, key2.id_key)
self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key)
self.assert_equal(key.enc_key, key2.enc_key)
self.assert_equal(key.chunk_seed, key2.chunk_seed)
data = b'foo'
self.assert_equal(hexlify(key.id_hash(data)), b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990')
self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde0501')
self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data)))

View File

@ -9,16 +9,15 @@ from ..repository import Repository
from . import BaseTestCase
from .mock import patch
class RepositoryTestCaseBase(BaseTestCase):
key_size = 32
def open(self, create=False):
return Repository(os.path.join(self.tmppath, 'repository'), create=create)
def open(self, create=False, key_size=None):
return Repository(os.path.join(self.tmppath, 'repository'), create=create, key_size=key_size)
def setUp(self):
self.tmppath = tempfile.mkdtemp()
self.repository = self.open(create=True)
self.repository = self.open(create=True, key_size=self.key_size)
def tearDown(self):
self.repository.close()
@ -209,7 +208,8 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
def open_index(self):
return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())),
key_size=self.key_size)
def corrupt_object(self, id_):
idx = self.open_index()
@ -317,8 +317,9 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
class RemoteRepositoryTestCase(RepositoryTestCase):
def open(self, create=False):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
def open(self, create=False, key_size=None):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')),
create=create, key_size=key_size)
def test_invalid_rpc(self):
self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None))
@ -326,5 +327,6 @@ class RemoteRepositoryTestCase(RepositoryTestCase):
class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
def open(self, create=False):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
def open(self, create=False, key_size=None):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')),
create=create, key_size=key_size)

View File

@ -53,6 +53,7 @@ User's Guide
quickstart
usage
faq
tuning
internals
Getting help

147
docs/tuning.rst Normal file
View File

@ -0,0 +1,147 @@
.. _tuning:
.. include:: global.rst.inc
Tuning
======
General hints
-------------
CPU load, backup speed, memory and storage usage are covered below.
As performance and resource usage depend on a lot of factors, you may need to
tweak the parameters a bit and retry until you found the best ones for your
setup.
Usually, the default parameters are selected for best speed under the assumption
that you run a modern machine with fast CPU, fast I/O and a good amount of RAM.
If you run an older or low-resource machine or your backup target or connection
to it is slow, tweaking parameters might give significant speedups.
Exclude crap data
-----------------
Maybe you don't want to backup:
* cache / temporary files (they can be rebuilt / are useless)
* specific directories / filenames / file extensions you do not need
* backups (some people make backups of backups...)
You can exclude these, so they don't waste time and space.
Avoid scrolling
---------------
If you do benchmarks, avoid creating a lot of log output, especially if it
means scrolling text in a window on a graphical user interface.
Rather use much less log output or at least redirect the output to a log file,
that is also much faster than scrolling.
Speed (in general)
------------------
Keep an eye on CPU and I/O bounds. Try to find the sweet spot in the middle
where it is not too much I/O bound and not too much CPU bound.
I/O bound
~~~~~~~~~
If CPU load does not sum up to 1 core fully loaded while backing up, the
process is likely I/O bound (can't read or write data fast enough).
Maybe you want to try higher compression then so it has less data to write.
Or get faster I/O, if possible.
CPU bound
~~~~~~~~~
If you have 1 core fully loaded most of the time, but your backup seems slow,
the process is likely CPU bound (can't compute fast enough).
Maybe you want to try lower compression then so it has less to compute.
Using a faster MAC or cipher method might also be an option.
Or get a faster CPU.
I/O speed
---------
From fast to slower:
* fast local filesystem, SSD or HDD, via PCIe, SATA, USB
* ssh connection to a remote server's borg instance
* mounted network filesystems of a remote server
Not only throughput influences timing, latency does also.
Backup space needed
-------------------
If you always backup the same data mostly, you will often save a lot of space
due to deduplication - this works independently from compression.
To avoid running out of space, regularly prune your backup archives according
to your needs. Backups of same machine which are close in time are usually
very cheap (because most data is same and deduplicated).
Compression
-----------
If you have a fast backup source and destination and you are not low on backup space:
Switch off compression, your backup will run faster and with less cpu load.
If you just want to save a bit space, but stay relatively fast:
Try zlib level 1.
If you have very slow source or destination (e.g. a remote backup space via a
network connection that is quite slower than your local and remote storage):
Try a higher zlib or lzma.
Authentication & MAC selection
------------------------------
Real MACs (Message Authentication Codes) can only be used when a secret key is
available. It is signing your backup data and can detect malicious tampering.
Without a key, a simple hash will be used (which helps to detect accidental
data corruption, but can not detect malicious data tampering).
Older or simple 32bit machine architecture
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use sha256 (no key) or hmac-sha256 (key).
64bit architecture, but no AES hardware acceleration in the CPU
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use sha512-256 (no key) or hmac-sha512-256 (key).
Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use ghash (no key) or gmac (key).
Encryption & Cipher selection
-----------------------------
Always encrypt your backups (and keep passphrase and key file [if any] safe).
The cipher selection chooses between misc. AEAD ciphers (authenticated
encryption with associated data), it is EtM (encrypt-then-mac):
Older or simple 32bit machine architecture
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use aes256-ctr + hmac-sha256.
64bit architecture, but no AES hardware acceleration in the CPU
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use aes256-ctr + hmac-sha512-256.
Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use aes256-gcm (AEAD 1-pass cipher).
RAM usage
---------
Depending on the amount of files and chunks in the repository, memory usage
varies:
* about 250+B RAM per file (for "files" cache)
* about 44B RAM per 64kiB chunk (for "chunks" cache)
* about 40B RAM per 64kiB chunk (for repository index, if remote repo is used,
this will be allocated on remote side)
If you run into memory usage issues, your options are:
* get more RAM (or more swapspace, speed will be slower)
* disable the "files" cache, speed will be slower
* have less files / chunks per repo
Note: RAM compression likely won't help as a lot of that data is using
msgpack, which is already rather efficient.

View File

@ -102,6 +102,12 @@ elif sys.platform.startswith('freebsd'):
elif sys.platform == 'darwin':
ext_modules.append(Extension('borg.platform_darwin', [platform_darwin_source]))
# msgpack pure python data corruption was fixed in 0.4.6.
# Also, we might use some rather recent API features.
install_requires=['msgpack-python>=0.4.6', 'blosc>=1.2.5']
if sys.version_info < (3, 3):
install_requires.append('backports.lzma')
setup(
name='borgbackup',
version=versioneer.get_version(),
@ -132,7 +138,5 @@ setup(
scripts=['scripts/borg'],
cmdclass=cmdclass,
ext_modules=ext_modules,
# msgpack pure python data corruption was fixed in 0.4.6.
# Also, we might use some rather recent API features.
install_requires=['msgpack-python>=0.4.6']
install_requires=install_requires,
)