Merge branch 'merge-all' of ../attic into experimental

This commit is contained in:
Thomas Waldmann 2015-06-05 22:58:08 +02:00
commit 3a38457def
21 changed files with 1183 additions and 241 deletions

48
CHANGES-experimental.txt Normal file
View File

@ -0,0 +1,48 @@
Important note about "experimental" branch
==========================================
Goal of the "experimental" branch is to merge all the stuff:
- changesets from master branch
- features that DO IMPACT compatibility
- play with new technologies
- etc.
THERE IS NO GUARANTEE THAT IT IS COMPATIBLE WITH MASTER BRANCH OR PREVIOUS
"experimental" CODE nor THAT YOU CAN SWITCH BACK AND FORTH BETWEEN BRANCHES
WITHIN THE SAME REPOSITORY WITHOUT ENCOUNTERING SEVERE ISSUES.
Please also see the LICENSE for more informations.
Stuff in "experimental" that is not in "master" minus minor changes
===================================================================
added tuning docs
attic init --compression NN --cipher NN --mac NN ...
(see attic init --help)
new hashes: sha512-256
sha512
sha1
ghash (default)
new MACs: hmac-sha512-256
hmac-sha512
hmac-sha1
gmac (default)
new ciphers: aes256-ctr + hmac-sha512-256
aes256-gcm (default)
new compression: no compression (default)
zlib level 1..9 (previously, level 6 was hardcoded)
lzma preset 0..9
lz4 (and other) multi-threaded algos from blosc library
source: more flexible type 0x03 header format, allowing to give hash algo,
compression algo and level, encryption algo, key type.
IV is stored in full length, length of stored IV/MAC/hash is flexible.
Indexing key size (key = id_hash()) is flexible and configurable per repo.
source: less hardcoding, numeric offsets / lengths
source: flexible hashing, compression, encryption, key dispatching

View File

@ -34,7 +34,7 @@ Space efficient storage
Optional data encryption Optional data encryption
All data can be protected using 256-bit AES encryption and data integrity All data can be protected using 256-bit AES encryption and data integrity
and authenticity is verified using HMAC-SHA256. and authenticity is verified using a MAC (message authentication code).
Off-site backups Off-site backups
Borg can store data on any remote host accessible over SSH. This is Borg can store data on any remote host accessible over SSH. This is
@ -49,6 +49,7 @@ What do I need?
Borg requires Python 3.2 or above to work. Borg requires Python 3.2 or above to work.
Borg also requires a sufficiently recent OpenSSL (>= 1.0.0). Borg also requires a sufficiently recent OpenSSL (>= 1.0.0).
In order to mount archives as filesystems, llfuse is required. In order to mount archives as filesystems, llfuse is required.
For other python requirements, please see setup.py install_requires.
How do I install it? How do I install it?
-------------------- --------------------

View File

@ -366,7 +366,7 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
void *key = NULL; void *key = NULL;
while((key = hashindex_next_key(index, key))) { while((key = hashindex_next_key(index, key))) {
values = key + 32; values = key + index->key_size;
unique_size += values[1]; unique_size += values[1];
unique_csize += values[2]; unique_csize += values[2];
size += values[0] * values[1]; size += values[0] * values[1];

View File

@ -616,7 +616,7 @@ class ArchiveChecker:
self.repository = repository self.repository = repository
self.init_chunks() self.init_chunks()
self.key = self.identify_key(repository) self.key = self.identify_key(repository)
if Manifest.MANIFEST_ID not in self.chunks: if Manifest.manifest_id(repository) not in self.chunks:
self.manifest = self.rebuild_manifest() self.manifest = self.rebuild_manifest()
else: else:
self.manifest, _ = Manifest.load(repository, key=self.key) self.manifest, _ = Manifest.load(repository, key=self.key)
@ -635,7 +635,7 @@ class ArchiveChecker:
# Explicity set the initial hash table capacity to avoid performance issues # Explicity set the initial hash table capacity to avoid performance issues
# due to hash table "resonance" # due to hash table "resonance"
capacity = int(len(self.repository) * 1.2) capacity = int(len(self.repository) * 1.2)
self.chunks = ChunkIndex(capacity) self.chunks = ChunkIndex(capacity, key_size=self.repository.key_size)
marker = None marker = None
while True: while True:
result = self.repository.list(limit=10000, marker=marker) result = self.repository.list(limit=10000, marker=marker)
@ -687,7 +687,7 @@ class ArchiveChecker:
Missing and/or incorrect data is repaired when detected Missing and/or incorrect data is repaired when detected
""" """
# Exclude the manifest from chunks # Exclude the manifest from chunks
del self.chunks[Manifest.MANIFEST_ID] del self.chunks[Manifest.manifest_id(self.repository)]
def mark_as_possibly_superseded(id_): def mark_as_possibly_superseded(id_):
if self.chunks.get(id_, (0,))[0] == 0: if self.chunks.get(id_, (0,))[0] == 0:

View File

@ -16,7 +16,7 @@ from . import __version__
from .archive import Archive, ArchiveChecker from .archive import Archive, ArchiveChecker
from .repository import Repository from .repository import Repository
from .cache import Cache from .cache import Cache
from .key import key_creator from .key import key_creator, maccer_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT
from .helpers import Error, location_validator, format_time, format_file_size, \ from .helpers import Error, location_validator, format_time, format_file_size, \
format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
@ -30,11 +30,11 @@ class Archiver:
def __init__(self): def __init__(self):
self.exit_code = 0 self.exit_code = 0
def open_repository(self, location, create=False, exclusive=False): def open_repository(self, location, create=False, exclusive=False, key_size=None):
if location.proto == 'ssh': if location.proto == 'ssh':
repository = RemoteRepository(location, create=create) repository = RemoteRepository(location, create=create, key_size=key_size)
else: else:
repository = Repository(location.path, create=create, exclusive=exclusive) repository = Repository(location.path, create=create, exclusive=exclusive, key_size=key_size)
repository._location = location repository._location = location
return repository return repository
@ -59,10 +59,12 @@ class Archiver:
def do_init(self, args): def do_init(self, args):
"""Initialize an empty repository""" """Initialize an empty repository"""
print('Initializing repository at "%s"' % args.repository.orig) print('Initializing repository at "%s"' % args.repository.orig)
repository = self.open_repository(args.repository, create=True, exclusive=True) key_cls = key_creator(args)
key = key_creator(repository, args) maccer_cls = maccer_creator(args, key_cls)
repository = self.open_repository(args.repository, create=True, exclusive=True,
key_size=maccer_cls.digest_size)
key = key_cls.create(repository, args)
manifest = Manifest(key, repository) manifest = Manifest(key, repository)
manifest.key = key
manifest.write() manifest.write()
repository.commit() repository.commit()
Cache(repository, key, manifest, warn_if_unencrypted=False) Cache(repository, key, manifest, warn_if_unencrypted=False)
@ -523,8 +525,39 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
init_epilog = textwrap.dedent(""" init_epilog = textwrap.dedent("""
This command initializes an empty repository. A repository is a filesystem This command initializes an empty repository. A repository is a filesystem
directory containing the deduplicated data from zero or more archives. directory containing the deduplicated data from zero or more archives.
Encryption can be enabled at repository init time. Encryption can be enabled, compression, cipher and mac method can be chosen at
""") repository init time.
--compression METHODs (default: %02d):
- 00 no compression
- 01..09 zlib levels 1..9 (1 means low compression, 9 max. compression)
- 10..19 lzma levels 0..9 (0 means low compression, 9 max. compression)
- 20..29 lz4 (blosc) levels 0..9 (0 = no, 9 = max. compression)
- 30..39 lz4hc (blosc) levels 0..9 (0 = no, 9 = max. compression)
- 40..49 blosclz (blosc) levels 0..9 (0 = no, 9 = max. compression)
- 50..59 snappy (blosc) levels 0..9 (0 = no, 9 = max. compression)
- 60..69 zlib (blosc) levels 0..9 (0 = no, 9 = max. compression)
--cipher METHODs (default: %02d or %02d)
- 00 No encryption
- 01 AEAD: AES-CTR + HMAC-SHA256
- 02 AEAD: AES-GCM
--mac METHODs (default: %02d or %02d):
- 00 sha256 (simple hash, no MAC, faster on 32bit CPU)
- 01 sha512-256 (simple hash, no MAC, faster on 64bit CPU)
- 02 ghash (simple hash, no MAC, fastest on CPUs with AES-GCM support)
- 03 sha1 (simple hash, no MAC, fastest on CPUs without AES-GCM support)
- 04 sha512 (simple hash, no MAC, faster on 64bit CPU)
- 10 hmac-sha256 (MAC, faster on 32bit CPU)
- 11 hmac-sha512-256 (MAC, faster on 64bit CPU)
- 13 hmac-sha1 (MAC, fastest on CPUs without AES-GCM support)
- 14 hmac-sha512 (MAC, faster on 64bit CPU)
- 20 gmac (MAC, fastest on CPUs with AES-GCM support)
""" % (COMPR_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT, HASH_DEFAULT, MAC_DEFAULT))
subparser = subparsers.add_parser('init', parents=[common_parser], subparser = subparsers.add_parser('init', parents=[common_parser],
description=self.do_init.__doc__, epilog=init_epilog, description=self.do_init.__doc__, epilog=init_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter) formatter_class=argparse.RawDescriptionHelpFormatter)
@ -534,7 +567,16 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
help='repository to create') help='repository to create')
subparser.add_argument('-e', '--encryption', dest='encryption', subparser.add_argument('-e', '--encryption', dest='encryption',
choices=('none', 'passphrase', 'keyfile'), default='none', choices=('none', 'passphrase', 'keyfile'), default='none',
help='select encryption method') help='select encryption key method')
subparser.add_argument('-C', '--cipher', dest='cipher',
type=int, default=None, metavar='METHOD',
help='select cipher (0..2)')
subparser.add_argument('-c', '--compression', dest='compression',
type=int, default=COMPR_DEFAULT, metavar='METHOD',
help='select compression method (0..19)')
subparser.add_argument('-m', '--mac', dest='mac',
type=int, default=None, metavar='METHOD',
help='select hash/mac method (0..3)')
check_epilog = textwrap.dedent(""" check_epilog = textwrap.dedent("""
The check command verifies the consistency of a repository and the corresponding The check command verifies the consistency of a repository and the corresponding

View File

@ -95,7 +95,7 @@ class Cache:
config.set('cache', 'manifest', '') config.set('cache', 'manifest', '')
with open(os.path.join(self.path, 'config'), 'w') as fd: with open(os.path.join(self.path, 'config'), 'w') as fd:
config.write(fd) config.write(fd)
ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) ChunkIndex(key_size=self.repository.key_size).write(os.path.join(self.path, 'chunks').encode('utf-8'))
with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd: with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd:
pass # empty file pass # empty file
with open(os.path.join(self.path, 'files'), 'wb') as fd: with open(os.path.join(self.path, 'files'), 'wb') as fd:
@ -118,7 +118,8 @@ class Cache:
self.timestamp = self.config.get('cache', 'timestamp', fallback=None) self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
self.key_type = self.config.get('cache', 'key_type', fallback=None) self.key_type = self.config.get('cache', 'key_type', fallback=None)
self.previous_location = self.config.get('cache', 'previous_location', fallback=None) self.previous_location = self.config.get('cache', 'previous_location', fallback=None)
self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8')) self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'),
key_size=self.repository.key_size)
self.files = None self.files = None
def open(self): def open(self):
@ -272,7 +273,7 @@ class Cache:
return archive_name return archive_name
def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out): def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out):
chunk_idx = ChunkIndex() chunk_idx = ChunkIndex(key_size=repository.key_size)
cdata = repository.get(archive_id) cdata = repository.get(archive_id)
data = key.decrypt(archive_id, cdata) data = key.decrypt(archive_id, cdata)
add(chunk_idx, archive_id, len(data), len(cdata)) add(chunk_idx, archive_id, len(data), len(cdata))
@ -299,13 +300,13 @@ class Cache:
tf_out.addfile(tarinfo, f) tf_out.addfile(tarinfo, f)
os.unlink(file_tmp) os.unlink(file_tmp)
def create_master_idx(chunk_idx, tf_in, tmp_dir): def create_master_idx(chunk_idx, repository, tf_in, tmp_dir):
chunk_idx.clear() chunk_idx.clear()
for tarinfo in tf_in: for tarinfo in tf_in:
archive_id_hex = tarinfo.name archive_id_hex = tarinfo.name
tf_in.extract(archive_id_hex, tmp_dir) tf_in.extract(archive_id_hex, tmp_dir)
chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
archive_chunk_idx = ChunkIndex.read(chunk_idx_path) archive_chunk_idx = ChunkIndex.read(chunk_idx_path, key_size=repository.key_size)
for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems(): for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems():
add(chunk_idx, chunk_id, size, csize, incr=count) add(chunk_idx, chunk_id, size, csize, incr=count)
os.unlink(chunk_idx_path) os.unlink(chunk_idx_path)
@ -334,7 +335,7 @@ class Cache:
rename_out_archive() rename_out_archive()
print('Merging collection into master chunks cache...') print('Merging collection into master chunks cache...')
in_archive = open_in_archive() in_archive = open_in_archive()
create_master_idx(self.chunks, in_archive, tmp_dir) create_master_idx(self.chunks, repository, in_archive, tmp_dir)
close_archive(in_archive) close_archive(in_archive)
print('Done.') print('Done.')

View File

@ -7,6 +7,12 @@ from libc.stdlib cimport malloc, free
API_VERSION = 2 API_VERSION = 2
AES_CTR_MODE = 1
AES_GCM_MODE = 2
MAC_SIZE = 16 # bytes; 128 bits is the maximum allowed value. see "hack" below.
IV_SIZE = 16 # bytes; 128 bits
cdef extern from "openssl/rand.h": cdef extern from "openssl/rand.h":
int RAND_bytes(unsigned char *buf, int num) int RAND_bytes(unsigned char *buf, int num)
@ -23,6 +29,7 @@ cdef extern from "openssl/evp.h":
pass pass
const EVP_MD *EVP_sha256() const EVP_MD *EVP_sha256()
const EVP_CIPHER *EVP_aes_256_ctr() const EVP_CIPHER *EVP_aes_256_ctr()
const EVP_CIPHER *EVP_aes_256_gcm()
void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a) void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a)
void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a) void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a)
@ -36,20 +43,33 @@ cdef extern from "openssl/evp.h":
const unsigned char *in_, int inl) const unsigned char *in_, int inl)
int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
int EVP_CIPHER_CTX_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, unsigned char *ptr)
int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen, int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen,
const unsigned char *salt, int saltlen, int iter, const unsigned char *salt, int saltlen, int iter,
const EVP_MD *digest, const EVP_MD *digest,
int keylen, unsigned char *out) int keylen, unsigned char *out)
int EVP_CTRL_GCM_GET_TAG
int EVP_CTRL_GCM_SET_TAG
int EVP_CTRL_GCM_SET_IVLEN
import struct import struct
_int = struct.Struct('>I') _int = struct.Struct('>I')
_long = struct.Struct('>Q') _2long = struct.Struct('>QQ')
bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0] bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0]
bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
long_to_bytes = lambda x: _long.pack(x)
def bytes16_to_int(b, offset=0):
h, l = _2long.unpack_from(b, offset)
return (h << 64) + l
def int_to_bytes16(i):
max_uint64 = 0xffffffffffffffff
l = i & max_uint64
h = (i >> 64) & max_uint64
return _2long.pack(h, l)
def num_aes_blocks(length): def num_aes_blocks(length):
@ -59,6 +79,22 @@ def num_aes_blocks(length):
return (length + 15) // 16 return (length + 15) // 16
def increment_iv(iv, amount):
"""
increment the given IV considering that <amount> bytes of data was
encrypted based on it. In CTR / GCM mode, the IV is just a counter and
must never repeat.
:param iv: current IV, 16 bytes (128 bit)
:param amount: amount of data (in bytes) that was encrypted
:return: new IV, 16 bytes (128 bit)
"""
iv = bytes16_to_int(iv)
iv += num_aes_blocks(amount)
iv = int_to_bytes16(iv)
return iv
def pbkdf2_sha256(password, salt, iterations, size): def pbkdf2_sha256(password, salt, iterations, size):
"""Password based key derivation function 2 (RFC2898) """Password based key derivation function 2 (RFC2898)
""" """
@ -93,12 +129,19 @@ cdef class AES:
""" """
cdef EVP_CIPHER_CTX ctx cdef EVP_CIPHER_CTX ctx
cdef int is_encrypt cdef int is_encrypt
cdef int mode
def __cinit__(self, is_encrypt, key, iv=None): def __cinit__(self, mode, is_encrypt, key, iv=None):
EVP_CIPHER_CTX_init(&self.ctx) EVP_CIPHER_CTX_init(&self.ctx)
self.mode = mode
self.is_encrypt = is_encrypt self.is_encrypt = is_encrypt
# Set cipher type and mode # Set cipher type and mode
cipher_mode = EVP_aes_256_ctr() if mode == AES_CTR_MODE:
cipher_mode = EVP_aes_256_ctr()
elif mode == AES_GCM_MODE:
cipher_mode = EVP_aes_256_gcm()
else:
raise Exception('unknown mode')
if self.is_encrypt: if self.is_encrypt:
if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL): if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
raise Exception('EVP_EncryptInit_ex failed') raise Exception('EVP_EncryptInit_ex failed')
@ -117,6 +160,10 @@ cdef class AES:
key2 = key key2 = key
if iv: if iv:
iv2 = iv iv2 = iv
if self.mode == AES_GCM_MODE:
# Set IV length (bytes)
if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_IVLEN, IV_SIZE, NULL):
raise Exception('EVP_CIPHER_CTX_ctrl SET IVLEN failed')
# Initialise key and IV # Initialise key and IV
if self.is_encrypt: if self.is_encrypt:
if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
@ -125,16 +172,26 @@ cdef class AES:
if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
raise Exception('EVP_DecryptInit_ex failed') raise Exception('EVP_DecryptInit_ex failed')
@property def add(self, aad):
def iv(self): cdef int aadl = len(aad)
return self.ctx.iv[:16] cdef int outl
if self.mode != AES_GCM_MODE:
raise Exception('additional data only supported for AES GCM mode')
# Zero or more calls to specify any AAD
if self.is_encrypt:
if not EVP_EncryptUpdate(&self.ctx, NULL, &outl, aad, aadl):
raise Exception('EVP_EncryptUpdate failed')
else: # decrypt
if not EVP_DecryptUpdate(&self.ctx, NULL, &outl, aad, aadl):
raise Exception('EVP_DecryptUpdate failed')
def encrypt(self, data): def compute_mac_and_encrypt(self, data):
cdef int inl = len(data) cdef int inl = len(data)
cdef int ctl = 0 cdef int ctl = 0
cdef int outl = 0 cdef int outl = 0
# note: modes that use padding, need up to one extra AES block (16b) # note: modes that use padding, need up to one extra AES block (16B)
cdef unsigned char *out = <unsigned char *>malloc(inl+16) cdef unsigned char *out = <unsigned char *>malloc(inl+16)
cdef unsigned char *mac = <unsigned char *>malloc(MAC_SIZE)
if not out: if not out:
raise MemoryError raise MemoryError
try: try:
@ -144,15 +201,20 @@ cdef class AES:
if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl): if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl):
raise Exception('EVP_EncryptFinal failed') raise Exception('EVP_EncryptFinal failed')
ctl += outl ctl += outl
return out[:ctl] if self.mode == AES_GCM_MODE:
# Get tag (mac) - only GCM mode. for CTR, the returned mac is undefined
if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, MAC_SIZE, mac):
raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed')
return (mac[:MAC_SIZE]), out[:ctl]
finally: finally:
free(mac)
free(out) free(out)
def decrypt(self, data): def check_mac_and_decrypt(self, mac, data):
cdef int inl = len(data) cdef int inl = len(data)
cdef int ptl = 0 cdef int ptl = 0
cdef int outl = 0 cdef int outl = 0
# note: modes that use padding, need up to one extra AES block (16b). # note: modes that use padding, need up to one extra AES block (16B).
# This is what the openssl docs say. I am not sure this is correct, # This is what the openssl docs say. I am not sure this is correct,
# but OTOH it will not cause any harm if our buffer is a little bigger. # but OTOH it will not cause any harm if our buffer is a little bigger.
cdef unsigned char *out = <unsigned char *>malloc(inl+16) cdef unsigned char *out = <unsigned char *>malloc(inl+16)
@ -162,10 +224,12 @@ cdef class AES:
if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl): if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl):
raise Exception('EVP_DecryptUpdate failed') raise Exception('EVP_DecryptUpdate failed')
ptl = outl ptl = outl
if self.mode == AES_GCM_MODE:
# Set expected tag (mac) value.
if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, MAC_SIZE, mac):
raise Exception('EVP_CIPHER_CTX_ctrl SET TAG failed')
if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0: if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0:
# this error check is very important for modes with padding or # for GCM mode, a failure here means corrupted / tampered tag (mac) or data
# authentication. for them, a failure here means corrupted data.
# CTR mode does not use padding nor authentication.
raise Exception('EVP_DecryptFinal failed') raise Exception('EVP_DecryptFinal failed')
ptl += outl ptl += outl
return out[:ptl] return out[:ptl]

View File

@ -26,9 +26,11 @@ _NoDefault = object()
cdef class IndexBase: cdef class IndexBase:
cdef HashIndex *index cdef HashIndex *index
key_size = 32 cdef int key_size
def __cinit__(self, capacity=0, path=None): def __cinit__(self, capacity=0, path=None, key_size=None):
assert key_size is not None
self.key_size = key_size
if path: if path:
self.index = hashindex_read(<bytes>os.fsencode(path)) self.index = hashindex_read(<bytes>os.fsencode(path))
if not self.index: if not self.index:
@ -43,8 +45,8 @@ cdef class IndexBase:
hashindex_free(self.index) hashindex_free(self.index)
@classmethod @classmethod
def read(cls, path): def read(cls, path, key_size=None):
return cls(path=path) return cls(path=path, key_size=key_size)
def write(self, path): def write(self, path):
if not hashindex_write(self.index, <bytes>os.fsencode(path)): if not hashindex_write(self.index, <bytes>os.fsencode(path)):
@ -61,7 +63,7 @@ cdef class IndexBase:
self[key] = value self[key] = value
def __delitem__(self, key): def __delitem__(self, key):
assert len(key) == 32 assert len(key) == self.key_size
if not hashindex_delete(self.index, <char *>key): if not hashindex_delete(self.index, <char *>key):
raise Exception('hashindex_delete failed') raise Exception('hashindex_delete failed')
@ -90,14 +92,14 @@ cdef class NSIndex(IndexBase):
value_size = 8 value_size = 8
def __getitem__(self, key): def __getitem__(self, key):
assert len(key) == 32 assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key) data = <int *>hashindex_get(self.index, <char *>key)
if not data: if not data:
raise KeyError raise KeyError
return _le32toh(data[0]), _le32toh(data[1]) return _le32toh(data[0]), _le32toh(data[1])
def __setitem__(self, key, value): def __setitem__(self, key, value):
assert len(key) == 32 assert len(key) == self.key_size
cdef int[2] data cdef int[2] data
data[0] = _htole32(value[0]) data[0] = _htole32(value[0])
data[1] = _htole32(value[1]) data[1] = _htole32(value[1])
@ -105,20 +107,20 @@ cdef class NSIndex(IndexBase):
raise Exception('hashindex_set failed') raise Exception('hashindex_set failed')
def __contains__(self, key): def __contains__(self, key):
assert len(key) == 32 assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key) data = <int *>hashindex_get(self.index, <char *>key)
return data != NULL return data != NULL
def iteritems(self, marker=None): def iteritems(self, marker=None):
cdef const void *key cdef const void *key
iter = NSKeyIterator() iter = NSKeyIterator(self.key_size)
iter.idx = self iter.idx = self
iter.index = self.index iter.index = self.index
if marker: if marker:
key = hashindex_get(self.index, <char *>marker) key = hashindex_get(self.index, <char *>marker)
if marker is None: if marker is None:
raise IndexError raise IndexError
iter.key = key - 32 iter.key = key - self.key_size
return iter return iter
@ -126,9 +128,11 @@ cdef class NSKeyIterator:
cdef NSIndex idx cdef NSIndex idx
cdef HashIndex *index cdef HashIndex *index
cdef const void *key cdef const void *key
cdef int key_size
def __cinit__(self): def __cinit__(self, key_size):
self.key = NULL self.key = NULL
self.key_size = key_size
def __iter__(self): def __iter__(self):
return self return self
@ -137,8 +141,8 @@ cdef class NSKeyIterator:
self.key = hashindex_next_key(self.index, <char *>self.key) self.key = hashindex_next_key(self.index, <char *>self.key)
if not self.key: if not self.key:
raise StopIteration raise StopIteration
cdef int *value = <int *>(self.key + 32) cdef int *value = <int *>(self.key + self.key_size)
return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1])) return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
cdef class ChunkIndex(IndexBase): cdef class ChunkIndex(IndexBase):
@ -146,14 +150,14 @@ cdef class ChunkIndex(IndexBase):
value_size = 12 value_size = 12
def __getitem__(self, key): def __getitem__(self, key):
assert len(key) == 32 assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key) data = <int *>hashindex_get(self.index, <char *>key)
if not data: if not data:
raise KeyError raise KeyError
return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2]) return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
def __setitem__(self, key, value): def __setitem__(self, key, value):
assert len(key) == 32 assert len(key) == self.key_size
cdef int[3] data cdef int[3] data
data[0] = _htole32(value[0]) data[0] = _htole32(value[0])
data[1] = _htole32(value[1]) data[1] = _htole32(value[1])
@ -162,20 +166,20 @@ cdef class ChunkIndex(IndexBase):
raise Exception('hashindex_set failed') raise Exception('hashindex_set failed')
def __contains__(self, key): def __contains__(self, key):
assert len(key) == 32 assert len(key) == self.key_size
data = <int *>hashindex_get(self.index, <char *>key) data = <int *>hashindex_get(self.index, <char *>key)
return data != NULL return data != NULL
def iteritems(self, marker=None): def iteritems(self, marker=None):
cdef const void *key cdef const void *key
iter = ChunkKeyIterator() iter = ChunkKeyIterator(self.key_size)
iter.idx = self iter.idx = self
iter.index = self.index iter.index = self.index
if marker: if marker:
key = hashindex_get(self.index, <char *>marker) key = hashindex_get(self.index, <char *>marker)
if marker is None: if marker is None:
raise IndexError raise IndexError
iter.key = key - 32 iter.key = key - self.key_size
return iter return iter
def summarize(self): def summarize(self):
@ -188,9 +192,11 @@ cdef class ChunkKeyIterator:
cdef ChunkIndex idx cdef ChunkIndex idx
cdef HashIndex *index cdef HashIndex *index
cdef const void *key cdef const void *key
cdef int key_size
def __cinit__(self): def __cinit__(self, key_size):
self.key = NULL self.key = NULL
self.key_size = key_size
def __iter__(self): def __iter__(self):
return self return self
@ -199,5 +205,5 @@ cdef class ChunkKeyIterator:
self.key = hashindex_next_key(self.index, <char *>self.key) self.key = hashindex_next_key(self.index, <char *>self.key)
if not self.key: if not self.key:
raise StopIteration raise StopIteration
cdef int *value = <int *>(self.key + 32) cdef int *value = <int *>(self.key + self.key_size)
return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2])) return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))

View File

@ -82,18 +82,20 @@ def check_extension_modules():
class Manifest: class Manifest:
MANIFEST_ID = b'\0' * 32
def __init__(self, key, repository): def __init__(self, key, repository):
self.archives = {} self.archives = {}
self.config = {} self.config = {}
self.key = key self.key = key
self.repository = repository self.repository = repository
@classmethod
def manifest_id(cls, repository):
return b'\0' * repository.key_size
@classmethod @classmethod
def load(cls, repository, key=None): def load(cls, repository, key=None):
from .key import key_factory from .key import key_factory
cdata = repository.get(cls.MANIFEST_ID) cdata = repository.get(cls.manifest_id(repository))
if not key: if not key:
key = key_factory(repository, cdata) key = key_factory(repository, cdata)
manifest = cls(key, repository) manifest = cls(key, repository)
@ -118,7 +120,7 @@ class Manifest:
'config': self.config, 'config': self.config,
})) }))
self.id = self.key.id_hash(data) self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) self.repository.put(self.manifest_id(self.repository), self.key.encrypt(data))
def list_archive_infos(self, sort_by=None, reverse=False): def list_archive_infos(self, sort_by=None, reverse=False):
# inexpensive Archive.list_archives replacement if we just need .name, .id, .ts # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts

View File

@ -3,14 +3,33 @@ from getpass import getpass
import os import os
import msgpack import msgpack
import textwrap import textwrap
from collections import namedtuple
import hmac import hmac
from hashlib import sha256 from hashlib import sha1, sha256, sha512
import zlib import zlib
from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks try:
import lzma # python >= 3.3
except ImportError:
try:
from backports import lzma # backports.lzma from pypi
except ImportError:
lzma = None
try:
import blosc
except ImportError:
blosc = None
from .crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES_GCM_MODE, \
bytes_to_int, increment_iv, num_aes_blocks
from .helpers import IntegrityError, get_keys_dir, Error from .helpers import IntegrityError, get_keys_dir, Error
PREFIX = b'\0' * 8 # TODO fix cyclic import:
#from .archive import CHUNK_MAX
CHUNK_MAX = 10 * 1024 * 1024
Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, iv, legacy')
class UnsupportedPayloadError(Error): class UnsupportedPayloadError(Error):
@ -22,47 +41,393 @@ class KeyfileNotFoundError(Error):
""" """
class sha512_256(object): # note: can't subclass sha512
"""sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms"""
digestsize = digest_size = 32
block_size = 64
def __init__(self, data=None):
self.name = 'sha512-256'
self._h = sha512()
if data:
self.update(data)
def update(self, data):
self._h.update(data)
def digest(self):
return self._h.digest()[:self.digest_size]
def hexdigest(self):
return self._h.hexdigest()[:self.digest_size * 2]
def copy(self):
new = sha512_256.__new__(sha512_256)
new._h = self._h.copy()
return new
# HASH / MAC stuff below all has a mac-like interface, so it can be used in the same way.
# special case: hashes do not use keys (and thus, do not sign/authenticate)
class HASH: # note: we can't subclass sha1/sha256/sha512
TYPE = 0 # override in subclass
digest_size = 0 # override in subclass
hash_func = None # override in subclass
def __init__(self, key, data=b''):
# signature is like for a MAC, we ignore the key as this is a simple hash
if key is not None:
raise Exception("use a HMAC if you have a key")
self.h = self.hash_func(data)
def update(self, data):
self.h.update(data)
def digest(self):
return self.h.digest()
def hexdigest(self):
return self.h.hexdigest()
class SHA256(HASH):
TYPE = 0
digest_size = 32
hash_func = sha256
class SHA512_256(HASH):
TYPE = 1
digest_size = 32
hash_func = sha512_256
class GHASH:
TYPE = 2
digest_size = 16
def __init__(self, key, data=b''):
# signature is like for a MAC, we ignore the key as this is a simple hash
if key is not None:
raise Exception("use a MAC if you have a key")
self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=b'\0' * 32, iv=b'\0' * 16)
if data:
self.update(data)
def update(self, data):
# GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data
self.mac_cipher.add(bytes(data))
def digest(self):
hash, _ = self.mac_cipher.compute_mac_and_encrypt(b'')
return hash
class SHA1(HASH):
TYPE = 3
digest_size = 20
hash_func = sha1
class SHA512(HASH):
TYPE = 4
digest_size = 64
hash_func = sha512
class HMAC(hmac.HMAC): class HMAC(hmac.HMAC):
"""Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews TYPE = 0 # override in subclass
""" digest_size = 0 # override in subclass
hash_func = None # override in subclass
def __init__(self, key, data):
if key is None:
raise Exception("do not use HMAC if you don't have a key")
super().__init__(key, data, self.hash_func)
def update(self, msg): def update(self, msg):
# Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
self.inner.update(msg) self.inner.update(msg)
def key_creator(repository, args): class HMAC_SHA256(HMAC):
if args.encryption == 'keyfile': TYPE = 10
return KeyfileKey.create(repository, args) digest_size = 32
elif args.encryption == 'passphrase': hash_func = sha256
return PassphraseKey.create(repository, args)
else:
return PlaintextKey.create(repository, args)
def key_factory(repository, manifest_data): class HMAC_SHA512_256(HMAC):
if manifest_data[0] == KeyfileKey.TYPE: TYPE = 11
return KeyfileKey.detect(repository, manifest_data) digest_size = 32
elif manifest_data[0] == PassphraseKey.TYPE: hash_func = sha512_256
return PassphraseKey.detect(repository, manifest_data)
elif manifest_data[0] == PlaintextKey.TYPE:
return PlaintextKey.detect(repository, manifest_data)
else:
raise UnsupportedPayloadError(manifest_data[0])
class KeyBase: class HMAC_SHA1(HMAC):
TYPE = 13
digest_size = 20
hash_func = sha1
class HMAC_SHA512(HMAC):
TYPE = 14
digest_size = 64
hash_func = sha512
class GMAC(GHASH):
TYPE = 20
digest_size = 16
def __init__(self, key, data=b''):
if key is None:
raise Exception("do not use GMAC if you don't have a key")
self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0' * 16)
if data:
self.update(data)
# defaults are optimized for speed on modern CPUs with AES hw support
HASH_DEFAULT = GHASH.TYPE
MAC_DEFAULT = GMAC.TYPE
# compressor classes, all same interface
class NullCompressor(object): # uses 0 in the mapping
TYPE = 0
def compress(self, data):
return bytes(data)
def decompress(self, data):
return bytes(data)
class ZlibCompressor(object): # uses 1..9 in the mapping
TYPE = 0
LEVELS = range(10)
def compress(self, data):
level = self.TYPE - ZlibCompressor.TYPE
return zlib.compress(data, level)
def decompress(self, data):
return zlib.decompress(data)
class LzmaCompressor(object): # uses 10..19 in the mapping
TYPE = 10
PRESETS = range(10)
def __init__(self): def __init__(self):
self.TYPE_STR = bytes([self.TYPE]) if lzma is None:
raise NotImplemented("lzma compression needs Python >= 3.3 or backports.lzma from PyPi")
def compress(self, data):
preset = self.TYPE - LzmaCompressor.TYPE
return lzma.compress(data, preset=preset)
def decompress(self, data):
return lzma.decompress(data)
class BLOSCCompressor(object):
TYPE = 0 # override in subclass
LEVELS = range(10)
CNAME = '' # override in subclass
def __init__(self):
if blosc is None:
raise NotImplemented("%s compression needs blosc from PyPi" % self.CNAME)
if self.CNAME not in blosc.compressor_list():
raise NotImplemented("%s compression is not supported by blosc" % self.CNAME)
blosc.set_blocksize(16384) # 16kiB is the minimum, so 64kiB are enough for 4 threads
def _get_level(self):
raise NotImplemented
def compress(self, data):
return blosc.compress(bytes(data), 1, cname=self.CNAME, clevel=self._get_level())
def decompress(self, data):
return blosc.decompress(data)
class LZ4Compressor(BLOSCCompressor):
TYPE = 20
CNAME = 'lz4'
def _get_level(self):
return self.TYPE - LZ4Compressor.TYPE
class LZ4HCCompressor(BLOSCCompressor):
TYPE = 30
CNAME = 'lz4hc'
def _get_level(self):
return self.TYPE - LZ4HCCompressor.TYPE
class BLOSCLZCompressor(BLOSCCompressor):
TYPE = 40
CNAME = 'blosclz'
def _get_level(self):
return self.TYPE - BLOSCLZCompressor.TYPE
class SnappyCompressor(BLOSCCompressor):
TYPE = 50
CNAME = 'snappy'
def _get_level(self):
return self.TYPE - SnappyCompressor.TYPE
class BLOSCZlibCompressor(BLOSCCompressor):
TYPE = 60
CNAME = 'zlib'
def _get_level(self):
return self.TYPE - BLOSCZlibCompressor.TYPE
# default is optimized for speed
COMPR_DEFAULT = NullCompressor.TYPE # no compression
# ciphers - AEAD (authenticated encryption with assoc. data) style interface
# special case: PLAIN dummy does not encrypt / authenticate
class PLAIN:
TYPE = 0
enc_iv = None # dummy
def __init__(self, **kw):
pass
def compute_mac_and_encrypt(self, meta, data):
return None, data
def check_mac_and_decrypt(self, mac, meta, data):
return data
def get_aad(meta):
"""get additional authenticated data for AEAD ciphers"""
if meta.legacy:
# legacy format computed the mac over (iv_last8 + data)
return meta.iv[8:]
else:
return msgpack.packb(meta)
class AES_CTR_HMAC:
TYPE = 1
def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, enc_hmac_key=b'\0' * 32, **kw):
self.hmac_key = enc_hmac_key
self.enc_iv = enc_iv
self.enc_cipher = AES(mode=AES_CTR_MODE, is_encrypt=True, key=enc_key, iv=enc_iv)
self.dec_cipher = AES(mode=AES_CTR_MODE, is_encrypt=False, key=enc_key)
def compute_mac_and_encrypt(self, meta, data):
self.enc_cipher.reset(iv=meta.iv)
_, data = self.enc_cipher.compute_mac_and_encrypt(data)
self.enc_iv = increment_iv(meta.iv, len(data))
aad = get_aad(meta)
mac = HMAC_SHA256(self.hmac_key, aad + data).digest() # XXX mac / hash flexibility
return mac, data
def check_mac_and_decrypt(self, mac, meta, data):
aad = get_aad(meta)
if HMAC_SHA256(self.hmac_key, aad + data).digest() != mac: # XXX mac / hash flexibility
raise IntegrityError('Encryption envelope checksum mismatch')
self.dec_cipher.reset(iv=meta.iv)
data = self.dec_cipher.check_mac_and_decrypt(None, data)
return data
class AES_GCM:
TYPE = 2
def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, **kw):
# note: hmac_key is not used for aes-gcm, it does aes+gmac in 1 pass
self.enc_iv = enc_iv
self.enc_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=enc_key, iv=enc_iv)
self.dec_cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=enc_key)
def compute_mac_and_encrypt(self, meta, data):
self.enc_cipher.reset(iv=meta.iv)
aad = get_aad(meta)
self.enc_cipher.add(aad)
mac, data = self.enc_cipher.compute_mac_and_encrypt(data)
self.enc_iv = increment_iv(meta.iv, len(data))
return mac, data
def check_mac_and_decrypt(self, mac, meta, data):
self.dec_cipher.reset(iv=meta.iv)
aad = get_aad(meta)
self.dec_cipher.add(aad)
try:
data = self.dec_cipher.check_mac_and_decrypt(mac, data)
except Exception:
raise IntegrityError('Encryption envelope checksum mismatch')
return data
# cipher default is optimized for speed on modern CPUs with AES hw support
PLAIN_DEFAULT = PLAIN.TYPE
CIPHER_DEFAULT = AES_GCM.TYPE
# misc. types of keys
# special case: no keys (thus: no encryption, no signing/authentication)
class KeyBase(object):
TYPE = 0x00 # override in derived classes
def __init__(self, compressor_cls, maccer_cls, cipher_cls):
self.compressor = compressor_cls()
self.maccer_cls = maccer_cls # hasher/maccer used by id_hash
self.cipher_cls = cipher_cls # plaintext dummy or AEAD cipher
self.cipher = cipher_cls()
self.id_key = None
def id_hash(self, data): def id_hash(self, data):
"""Return HMAC hash using the "id" HMAC key """Return a HASH (no id_key) or a MAC (using the "id_key" key)
XXX do we need a cryptographic hash function here or is a keyed hash
function like GMAC / GHASH good enough? See NIST SP 800-38D.
IMPORTANT: in 1 repo, there should be only 1 kind of id_hash, otherwise
data hashed/maced with one id_hash might result in same ID as already
exists in the repo for other data created with another id_hash method.
somehow unlikely considering 128 or 256bits, but still.
""" """
return self.maccer_cls(self.id_key, data).digest()
def encrypt(self, data): def encrypt(self, data):
pass data = self.compressor.compress(data)
meta = Meta(compr_type=self.compressor.TYPE, key_type=self.TYPE,
mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE,
iv=self.cipher.enc_iv, legacy=False)
mac, data = self.cipher.compute_mac_and_encrypt(meta, data)
return generate(mac, meta, data)
def decrypt(self, id, data): def decrypt(self, id, data):
pass mac, meta, data = parser(data)
compressor, keyer, maccer, cipher = get_implementations(meta)
assert isinstance(self, keyer)
assert self.maccer_cls is maccer
assert self.cipher_cls is cipher
data = self.cipher.check_mac_and_decrypt(mac, meta, data)
data = self.compressor.decompress(data)
if id and self.id_hash(data) != id:
raise IntegrityError('Chunk id verification failed')
return data
class PlaintextKey(KeyBase): class PlaintextKey(KeyBase):
@ -73,71 +438,34 @@ class PlaintextKey(KeyBase):
@classmethod @classmethod
def create(cls, repository, args): def create(cls, repository, args):
print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.') print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.')
return cls() compressor = compressor_creator(args)
maccer = maccer_creator(args, cls)
cipher = cipher_creator(args, cls)
return cls(compressor, maccer, cipher)
@classmethod @classmethod
def detect(cls, repository, manifest_data): def detect(cls, repository, manifest_data):
return cls() mac, meta, data = parser(manifest_data)
compressor, keyer, maccer, cipher = get_implementations(meta)
def id_hash(self, data): return cls(compressor, maccer, cipher)
return sha256(data).digest()
def encrypt(self, data):
return b''.join([self.TYPE_STR, zlib.compress(data)])
def decrypt(self, id, data):
if data[0] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
data = zlib.decompress(memoryview(data)[1:])
if id and sha256(data).digest() != id:
raise IntegrityError('Chunk id verification failed')
return data
class AESKeyBase(KeyBase): class AESKeyBase(KeyBase):
"""Common base class shared by KeyfileKey and PassphraseKey """Common base class shared by KeyfileKey and PassphraseKey
Chunks are encrypted using 256bit AES in Counter Mode (CTR) Chunks are encrypted using 256bit AES in CTR or GCM mode.
Chunks are authenticated by a GCM GMAC or a HMAC.
Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT Payload layout: TYPE(1) + MAC(32) + NONCE(8) + CIPHERTEXT
To reduce payload size only 8 bytes of the 16 bytes nonce is saved To reduce payload size only 8 bytes of the 16 bytes nonce is saved
in the payload, the first 8 bytes are always zeros. This does not in the payload, the first 8 bytes are always zeros. This does not
affect security but limits the maximum repository capacity to affect security but limits the maximum repository capacity to
only 295 exabytes! only 295 exabytes!
""" """
def extract_iv(self, payload):
PAYLOAD_OVERHEAD = 1 + 32 + 8 # TYPE + HMAC + NONCE _, meta, _ = parser(payload)
return meta.iv
def id_hash(self, data):
"""Return HMAC hash using the "id" HMAC key
"""
return HMAC(self.id_key, data, sha256).digest()
def encrypt(self, data):
data = zlib.compress(data)
self.enc_cipher.reset()
data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
return b''.join((self.TYPE_STR, hmac, data))
def decrypt(self, id, data):
if data[0] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
hmac = memoryview(data)[1:33]
if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
raise IntegrityError('Encryption envelope checksum mismatch')
self.dec_cipher.reset(iv=PREFIX + data[33:41])
data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview
if id and HMAC(self.id_key, data, sha256).digest() != id:
raise IntegrityError('Chunk id verification failed')
return data
def extract_nonce(self, payload):
if payload[0] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
nonce = bytes_to_long(payload[33:41])
return nonce
def init_from_random_data(self, data): def init_from_random_data(self, data):
self.enc_key = data[0:32] self.enc_key = data[0:32]
@ -148,9 +476,13 @@ class AESKeyBase(KeyBase):
if self.chunk_seed & 0x80000000: if self.chunk_seed & 0x80000000:
self.chunk_seed = self.chunk_seed - 0xffffffff - 1 self.chunk_seed = self.chunk_seed - 0xffffffff - 1
def init_ciphers(self, enc_iv=b''): def init_ciphers(self, enc_iv=b'\0' * 16):
self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv) self.cipher = self.cipher_cls(enc_key=self.enc_key, enc_iv=enc_iv,
self.dec_cipher = AES(is_encrypt=False, key=self.enc_key) enc_hmac_key=self.enc_hmac_key)
@property
def enc_iv(self):
return self.cipher.enc_iv
class PassphraseKey(AESKeyBase): class PassphraseKey(AESKeyBase):
@ -159,7 +491,10 @@ class PassphraseKey(AESKeyBase):
@classmethod @classmethod
def create(cls, repository, args): def create(cls, repository, args):
key = cls() compressor = compressor_creator(args)
maccer = maccer_creator(args, cls)
cipher = cipher_creator(args, cls)
key = cls(compressor, maccer, cipher)
passphrase = os.environ.get('BORG_PASSPHRASE') passphrase = os.environ.get('BORG_PASSPHRASE')
if passphrase is not None: if passphrase is not None:
passphrase2 = passphrase passphrase2 = passphrase
@ -181,7 +516,9 @@ class PassphraseKey(AESKeyBase):
@classmethod @classmethod
def detect(cls, repository, manifest_data): def detect(cls, repository, manifest_data):
prompt = 'Enter passphrase for %s: ' % repository._location.orig prompt = 'Enter passphrase for %s: ' % repository._location.orig
key = cls() mac, meta, data = parser(manifest_data)
compressor, keyer, maccer, cipher = get_implementations(meta)
key = cls(compressor, maccer, cipher)
passphrase = os.environ.get('BORG_PASSPHRASE') passphrase = os.environ.get('BORG_PASSPHRASE')
if passphrase is None: if passphrase is None:
passphrase = getpass(prompt) passphrase = getpass(prompt)
@ -189,8 +526,7 @@ class PassphraseKey(AESKeyBase):
key.init(repository, passphrase) key.init(repository, passphrase)
try: try:
key.decrypt(None, manifest_data) key.decrypt(None, manifest_data)
num_blocks = num_aes_blocks(len(manifest_data) - 41) key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data)))
key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
return key return key
except IntegrityError: except IntegrityError:
passphrase = getpass(prompt) passphrase = getpass(prompt)
@ -212,14 +548,15 @@ class KeyfileKey(AESKeyBase):
@classmethod @classmethod
def detect(cls, repository, manifest_data): def detect(cls, repository, manifest_data):
key = cls() mac, meta, data = parser(manifest_data)
compressor, keyer, maccer, cipher = get_implementations(meta)
key = cls(compressor, maccer, cipher)
path = cls.find_key_file(repository) path = cls.find_key_file(repository)
prompt = 'Enter passphrase for key file %s: ' % path prompt = 'Enter passphrase for key file %s: ' % path
passphrase = os.environ.get('BORG_PASSPHRASE', '') passphrase = os.environ.get('BORG_PASSPHRASE', '')
while not key.load(path, passphrase): while not key.load(path, passphrase):
passphrase = getpass(prompt) passphrase = getpass(prompt)
num_blocks = num_aes_blocks(len(manifest_data) - 41) key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data)))
key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
return key return key
@classmethod @classmethod
@ -254,25 +591,27 @@ class KeyfileKey(AESKeyBase):
def decrypt_key_file(self, data, passphrase): def decrypt_key_file(self, data, passphrase):
d = msgpack.unpackb(data) d = msgpack.unpackb(data)
assert d[b'version'] == 1 assert d[b'version'] == 1
assert d[b'algorithm'] == b'sha256' assert d[b'algorithm'] == b'gmac'
key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32) key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32)
data = AES(is_encrypt=False, key=key).decrypt(d[b'data']) try:
if HMAC(key, data, sha256).digest() != d[b'hash']: cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=b'\0'*16)
data = cipher.check_mac_and_decrypt(d[b'hash'], d[b'data'])
return data
except Exception:
return None return None
return data
def encrypt_key_file(self, data, passphrase): def encrypt_key_file(self, data, passphrase):
salt = get_random_bytes(32) salt = get_random_bytes(32)
iterations = 100000 iterations = 100000
key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32) key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32)
hash = HMAC(key, data, sha256).digest() cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0'*16)
cdata = AES(is_encrypt=True, key=key).encrypt(data) mac, cdata = cipher.compute_mac_and_encrypt(data)
d = { d = {
'version': 1, 'version': 1,
'salt': salt, 'salt': salt,
'iterations': iterations, 'iterations': iterations,
'algorithm': 'sha256', 'algorithm': 'gmac',
'hash': hash, 'hash': mac,
'data': cdata, 'data': cdata,
} }
return msgpack.packb(d) return msgpack.packb(d)
@ -321,7 +660,10 @@ class KeyfileKey(AESKeyBase):
passphrase2 = getpass('Enter same passphrase again: ') passphrase2 = getpass('Enter same passphrase again: ')
if passphrase != passphrase2: if passphrase != passphrase2:
print('Passphrases do not match') print('Passphrases do not match')
key = cls() compressor = compressor_creator(args)
maccer = maccer_creator(args, cls)
cipher = cipher_creator(args, cls)
key = cls(compressor, maccer, cipher)
key.repository_id = repository.id key.repository_id = repository.id
key.init_from_random_data(get_random_bytes(100)) key.init_from_random_data(get_random_bytes(100))
key.init_ciphers() key.init_ciphers()
@ -329,3 +671,213 @@ class KeyfileKey(AESKeyBase):
print('Key file "%s" created.' % key.path) print('Key file "%s" created.' % key.path)
print('Keep this file safe. Your data will be inaccessible without it.') print('Keep this file safe. Your data will be inaccessible without it.')
return key return key
# note: key 0 nicely maps to a zlib compressor with level 0 which means "no compression"
compressor_mapping = {}
for level in ZlibCompressor.LEVELS:
compressor_mapping[ZlibCompressor.TYPE + level] = \
type('ZlibCompressorLevel%d' % level, (ZlibCompressor, ), dict(TYPE=ZlibCompressor.TYPE + level))
for preset in LzmaCompressor.PRESETS:
compressor_mapping[LzmaCompressor.TYPE + preset] = \
type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset))
for level in LZ4Compressor.LEVELS:
compressor_mapping[LZ4Compressor.TYPE + level] = \
type('LZ4CompressorLevel%d' % level, (LZ4Compressor, ), dict(TYPE=LZ4Compressor.TYPE + level))
for level in LZ4HCCompressor.LEVELS:
compressor_mapping[LZ4HCCompressor.TYPE + level] = \
type('LZ4HCCompressorLevel%d' % level, (LZ4HCCompressor, ), dict(TYPE=LZ4HCCompressor.TYPE + level))
for level in BLOSCLZCompressor.LEVELS:
compressor_mapping[BLOSCLZCompressor.TYPE + level] = \
type('BLOSCLZCompressorLevel%d' % level, (BLOSCLZCompressor, ), dict(TYPE=BLOSCLZCompressor.TYPE + level))
for level in SnappyCompressor.LEVELS:
compressor_mapping[SnappyCompressor.TYPE + level] = \
type('SnappyCompressorLevel%d' % level, (SnappyCompressor, ), dict(TYPE=SnappyCompressor.TYPE + level))
for level in BLOSCZlibCompressor.LEVELS:
compressor_mapping[BLOSCZlibCompressor.TYPE + level] = \
type('BLOSCZlibCompressorLevel%d' % level, (BLOSCZlibCompressor, ), dict(TYPE=BLOSCZlibCompressor.TYPE + level))
# overwrite 0 with NullCompressor
compressor_mapping[NullCompressor.TYPE] = NullCompressor
keyer_mapping = {
KeyfileKey.TYPE: KeyfileKey,
PassphraseKey.TYPE: PassphraseKey,
PlaintextKey.TYPE: PlaintextKey,
}
maccer_mapping = {
# simple hashes, not MACs (but MAC-like class __init__ method signature):
SHA1.TYPE: SHA1,
SHA256.TYPE: SHA256,
SHA512_256.TYPE: SHA512_256,
SHA512.TYPE: SHA512,
GHASH.TYPE: GHASH,
# MACs:
HMAC_SHA1.TYPE: HMAC_SHA1,
HMAC_SHA256.TYPE: HMAC_SHA256,
HMAC_SHA512_256.TYPE: HMAC_SHA512_256,
HMAC_SHA512.TYPE: HMAC_SHA512,
GMAC.TYPE: GMAC,
}
cipher_mapping = {
# no cipher (but cipher-like class __init__ method signature):
PLAIN.TYPE: PLAIN,
# AEAD cipher implementations
AES_CTR_HMAC.TYPE: AES_CTR_HMAC,
AES_GCM.TYPE: AES_GCM,
}
def get_implementations(meta):
try:
compressor = compressor_mapping[meta.compr_type]
keyer = keyer_mapping[meta.key_type]
maccer = maccer_mapping[meta.mac_type]
cipher = cipher_mapping[meta.cipher_type]
except KeyError:
raise UnsupportedPayloadError("compr_type %x key_type %x mac_type %x cipher_type %x" % (
meta.compr_type, meta.key_type, meta.mac_type, meta.cipher_type))
return compressor, keyer, maccer, cipher
def legacy_parser(all_data, key_type): # all rather hardcoded
"""
Payload layout:
no encryption: TYPE(1) + data
with encryption: TYPE(1) + HMAC(32) + NONCE(8) + data
data is compressed with zlib level 6 and (in the 2nd case) encrypted.
To reduce payload size only 8 bytes of the 16 bytes nonce is saved
in the payload, the first 8 bytes are always zeros. This does not
affect security but limits the maximum repository capacity to
only 295 exabytes!
"""
offset = 1
if key_type == PlaintextKey.TYPE:
mac_type = SHA256.TYPE
mac = None
cipher_type = PLAIN.TYPE
iv = None
data = all_data[offset:]
else:
mac_type = HMAC_SHA256.TYPE
mac = all_data[offset:offset+32]
cipher_type = AES_CTR_HMAC.TYPE
# legacy attic did not store the full IV on disk, as the upper 8 bytes
# are expected to be zero anyway as the full IV is a 128bit counter.
iv = b'\0' * 8 + all_data[offset+32:offset+40]
data = all_data[offset+40:]
meta = Meta(compr_type=6, key_type=key_type, mac_type=mac_type,
cipher_type=cipher_type, iv=iv, legacy=True)
return mac, meta, data
def parser00(all_data):
return legacy_parser(all_data, KeyfileKey.TYPE)
def parser01(all_data):
return legacy_parser(all_data, PassphraseKey.TYPE)
def parser02(all_data):
return legacy_parser(all_data, PlaintextKey.TYPE)
def parser03(all_data): # new & flexible
"""
Payload layout:
always: TYPE(1) + MSGPACK((mac, meta, data))
meta is a Meta namedtuple and contains all required information about data.
data is maybe compressed (see meta) and maybe encrypted (see meta).
"""
unpacker = msgpack.Unpacker(
use_list=False,
# avoid memory allocation issues causes by tampered input data.
max_buffer_size=CHUNK_MAX + 1000, # does not work in 0.4.6 unpackb C implementation
max_array_len=10, # meta_tuple
max_bin_len=CHUNK_MAX, # data
max_str_len=0, # not used yet
max_map_len=0, # not used yet
max_ext_len=0, # not used yet
)
unpacker.feed(all_data[1:])
mac, meta_tuple, data = unpacker.unpack()
meta = Meta(*meta_tuple)
return mac, meta, data
def parser(data):
parser_mapping = {
0x00: parser00,
0x01: parser01,
0x02: parser02,
0x03: parser03,
}
header_type = data[0]
parser_func = parser_mapping[header_type]
return parser_func(data)
def key_factory(repository, manifest_data):
mac, meta, data = parser(manifest_data)
compressor, keyer, maccer, cipher = get_implementations(meta)
return keyer.detect(repository, manifest_data)
def generate(mac, meta, data):
# always create new-style 0x03 format
return b'\x03' + msgpack.packb((mac, meta, data), use_bin_type=True)
def compressor_creator(args):
# args == None is used by unit tests
compression = COMPR_DEFAULT if args is None else args.compression
compressor = compressor_mapping.get(compression)
if compressor is None:
raise NotImplementedError("no compression %d" % args.compression)
return compressor
def key_creator(args):
if args.encryption == 'keyfile':
return KeyfileKey
if args.encryption == 'passphrase':
return PassphraseKey
if args.encryption == 'none':
return PlaintextKey
raise NotImplemented("no encryption %s" % args.encryption)
def maccer_creator(args, key_cls):
# args == None is used by unit tests
mac = None if args is None else args.mac
if mac is None:
if key_cls is PlaintextKey:
mac = HASH_DEFAULT
elif key_cls in (KeyfileKey, PassphraseKey):
mac = MAC_DEFAULT
else:
raise NotImplementedError("unknown key class")
maccer = maccer_mapping.get(mac)
if maccer is None:
raise NotImplementedError("no mac %d" % args.mac)
return maccer
def cipher_creator(args, key_cls):
# args == None is used by unit tests
cipher = None if args is None else args.cipher
if cipher is None:
if key_cls is PlaintextKey:
cipher = PLAIN_DEFAULT
elif key_cls in (KeyfileKey, PassphraseKey):
cipher = CIPHER_DEFAULT
else:
raise NotImplementedError("unknown key class")
cipher = cipher_mapping.get(cipher)
if cipher is None:
raise NotImplementedError("no cipher %d" % args.cipher)
return cipher

View File

@ -89,7 +89,7 @@ class RepositoryServer:
def negotiate(self, versions): def negotiate(self, versions):
return 1 return 1
def open(self, path, create=False): def open(self, path, create=False, key_size=None):
path = os.fsdecode(path) path = os.fsdecode(path)
if path.startswith('/~'): if path.startswith('/~'):
path = path[1:] path = path[1:]
@ -100,8 +100,8 @@ class RepositoryServer:
break break
else: else:
raise PathNotAllowed(path) raise PathNotAllowed(path)
self.repository = Repository(path, create) self.repository = Repository(path, create, key_size=key_size)
return self.repository.id return self.repository.id, self.repository.key_size
class RemoteRepository: class RemoteRepository:
@ -112,7 +112,7 @@ class RemoteRepository:
def __init__(self, name): def __init__(self, name):
self.name = name self.name = name
def __init__(self, location, create=False): def __init__(self, location, create=False, key_size=None):
self.location = location self.location = location
self.preload_ids = [] self.preload_ids = []
self.msgid = 0 self.msgid = 0
@ -144,7 +144,7 @@ class RemoteRepository:
version = self.call('negotiate', 1) version = self.call('negotiate', 1)
if version != 1: if version != 1:
raise Exception('Server insisted on using unsupported protocol version %d' % version) raise Exception('Server insisted on using unsupported protocol version %d' % version)
self.id = self.call('open', location.path, create) self.id, self.key_size = self.call('open', location.path, create, key_size)
def __del__(self): def __del__(self):
self.close() self.close()
@ -303,7 +303,8 @@ class RepositoryCache:
def initialize(self): def initialize(self):
self.tmppath = tempfile.mkdtemp() self.tmppath = tempfile.mkdtemp()
self.index = NSIndex() self.key_size = self.repository.key_size
self.index = NSIndex(key_size=self.key_size)
self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b') self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
def cleanup(self): def cleanup(self):

View File

@ -47,22 +47,23 @@ class Repository:
class ObjectNotFound(Error): class ObjectNotFound(Error):
"""Object with key {} not found in repository {}.""" """Object with key {} not found in repository {}."""
def __init__(self, path, create=False, exclusive=False): def __init__(self, path, create=False, exclusive=False, key_size=None):
self.path = path self.path = path
self.io = None self.io = None
self.lock = None self.lock = None
self.index = None self.index = None
self._active_txn = False self._active_txn = False
if create: if create:
self.create(path) self.create(path, key_size)
self.open(path, exclusive) self.open(path, exclusive)
def __del__(self): def __del__(self):
self.close() self.close()
def create(self, path): def create(self, path, key_size):
"""Create a new empty repository at `path` """Create a new empty repository at `path`
""" """
assert key_size is not None
if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)): if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
raise self.AlreadyExists(path) raise self.AlreadyExists(path)
if not os.path.exists(path): if not os.path.exists(path):
@ -75,6 +76,7 @@ class Repository:
config.set('repository', 'version', '1') config.set('repository', 'version', '1')
config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR) config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE) config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
config.set('repository', 'key_size', key_size)
config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii')) config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
with open(os.path.join(path, 'config'), 'w') as fd: with open(os.path.join(path, 'config'), 'w') as fd:
config.write(fd) config.write(fd)
@ -117,10 +119,12 @@ class Repository:
if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1: if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
raise self.InvalidRepository(path) raise self.InvalidRepository(path)
self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive) self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive)
# legacy attic repositories always have key size 32B (256b)
self.key_size = self.config.getint('repository', 'key_size', fallback=32)
self.max_segment_size = self.config.getint('repository', 'max_segment_size') self.max_segment_size = self.config.getint('repository', 'max_segment_size')
self.segments_per_dir = self.config.getint('repository', 'segments_per_dir') self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
self.id = unhexlify(self.config.get('repository', 'id').strip()) self.id = unhexlify(self.config.get('repository', 'id').strip())
self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir) self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir, self.key_size)
def close(self): def close(self):
if self.lock: if self.lock:
@ -140,8 +144,9 @@ class Repository:
def open_index(self, transaction_id): def open_index(self, transaction_id):
if transaction_id is None: if transaction_id is None:
return NSIndex() return NSIndex(key_size=self.key_size)
return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8')) return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'),
key_size=self.key_size)
def prepare_txn(self, transaction_id, do_cleanup=True): def prepare_txn(self, transaction_id, do_cleanup=True):
self._active_txn = True self._active_txn = True
@ -397,8 +402,6 @@ class LoggedIO:
header_fmt = struct.Struct('<IIB') header_fmt = struct.Struct('<IIB')
assert header_fmt.size == 9 assert header_fmt.size == 9
put_header_fmt = struct.Struct('<IIB32s')
assert put_header_fmt.size == 41
header_no_crc_fmt = struct.Struct('<IB') header_no_crc_fmt = struct.Struct('<IB')
assert header_no_crc_fmt.size == 5 assert header_no_crc_fmt.size == 5
crc_fmt = struct.Struct('<I') crc_fmt = struct.Struct('<I')
@ -407,13 +410,16 @@ class LoggedIO:
_commit = header_no_crc_fmt.pack(9, TAG_COMMIT) _commit = header_no_crc_fmt.pack(9, TAG_COMMIT)
COMMIT = crc_fmt.pack(crc32(_commit)) + _commit COMMIT = crc_fmt.pack(crc32(_commit)) + _commit
def __init__(self, path, limit, segments_per_dir, capacity=90): def __init__(self, path, limit, segments_per_dir, key_size, capacity=90):
self.path = path self.path = path
self.fds = LRUCache(capacity) self.fds = LRUCache(capacity)
self.segment = 0 self.segment = 0
self.limit = limit self.limit = limit
self.segments_per_dir = segments_per_dir self.segments_per_dir = segments_per_dir
self.key_size = key_size
self.offset = 0 self.offset = 0
self.put_header_fmt = struct.Struct('<IIB%ds' % key_size)
assert self.put_header_fmt.size == self.header_fmt.size + key_size
self._write_fd = None self._write_fd = None
def close(self): def close(self):
@ -519,9 +525,9 @@ class LoggedIO:
raise IntegrityError('Invalid segment entry header') raise IntegrityError('Invalid segment entry header')
key = None key = None
if tag in (TAG_PUT, TAG_DELETE): if tag in (TAG_PUT, TAG_DELETE):
key = rest[:32] key = rest[:self.key_size]
if include_data: if include_data:
yield tag, key, offset, rest[32:] yield tag, key, offset, rest[self.key_size:]
else: else:
yield tag, key, offset yield tag, key, offset
offset += size offset += size

View File

@ -3,7 +3,7 @@ from datetime import datetime, timezone
import msgpack import msgpack
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
from ..key import PlaintextKey from ..key import PlaintextKey, COMPR_DEFAULT
from ..helpers import Manifest from ..helpers import Manifest
from . import BaseTestCase from . import BaseTestCase
from .mock import Mock from .mock import Mock
@ -21,9 +21,15 @@ class MockCache:
class ArchiveTimestampTestCase(BaseTestCase): class ArchiveTimestampTestCase(BaseTestCase):
class MockArgs(object):
repository = None
compression = COMPR_DEFAULT
mac = None
cipher = None
def _test_timestamp_parsing(self, isoformat, expected): def _test_timestamp_parsing(self, isoformat, expected):
repository = Mock() repository = Mock()
key = PlaintextKey() key = PlaintextKey.create(None, self.MockArgs())
manifest = Manifest(repository, key) manifest = Manifest(repository, key)
a = Archive(repository, key, manifest, 'test', create=True) a = Archive(repository, key, manifest, 'test', create=True)
a.metadata = {b'time': isoformat} a.metadata = {b'time': isoformat}
@ -42,10 +48,16 @@ class ArchiveTimestampTestCase(BaseTestCase):
class ChunkBufferTestCase(BaseTestCase): class ChunkBufferTestCase(BaseTestCase):
class MockArgs(object):
repository = None
compression = COMPR_DEFAULT
mac = None
cipher = None
def test(self): def test(self):
data = [{b'foo': 1}, {b'bar': 2}] data = [{b'foo': 1}, {b'bar': 2}]
cache = MockCache() cache = MockCache()
key = PlaintextKey() key = PlaintextKey.create(None, self.MockArgs())
chunks = CacheChunkBuffer(cache, key, None) chunks = CacheChunkBuffer(cache, key, None)
for d in data: for d in data:
chunks.add(d) chunks.add(d)

View File

@ -15,8 +15,9 @@ from .. import xattr
from ..archive import Archive, ChunkBuffer, CHUNK_MAX from ..archive import Archive, ChunkBuffer, CHUNK_MAX
from ..archiver import Archiver from ..archiver import Archiver
from ..cache import Cache from ..cache import Cache
from ..crypto import bytes_to_long, num_aes_blocks from ..crypto import bytes16_to_int, num_aes_blocks
from ..helpers import Manifest from ..helpers import Manifest
from ..key import parser
from ..remote import RemoteRepository, PathNotAllowed from ..remote import RemoteRepository, PathNotAllowed
from ..repository import Repository from ..repository import Repository
from . import BaseTestCase from . import BaseTestCase
@ -496,8 +497,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
hash = sha256(data).digest() hash = sha256(data).digest()
if hash not in seen: if hash not in seen:
seen.add(hash) seen.add(hash)
num_blocks = num_aes_blocks(len(data) - 41) mac, meta, data = parser(data)
nonce = bytes_to_long(data[33:41]) num_blocks = num_aes_blocks(len(data))
nonce = bytes16_to_int(meta.iv)
for counter in range(nonce, nonce + num_blocks): for counter in range(nonce, nonce + num_blocks):
self.assert_not_in(counter, used) self.assert_not_in(counter, used)
used.add(counter) used.add(counter)
@ -576,7 +578,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
def test_missing_manifest(self): def test_missing_manifest(self):
archive, repository = self.open_archive('archive1') archive, repository = self.open_archive('archive1')
repository.delete(Manifest.MANIFEST_ID) repository.delete(Manifest.manifest_id(repository))
repository.commit() repository.commit()
self.cmd('check', self.repository_location, exit_code=1) self.cmd('check', self.repository_location, exit_code=1)
output = self.cmd('check', '--repair', self.repository_location, exit_code=0) output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
@ -587,7 +589,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
def test_extra_chunks(self): def test_extra_chunks(self):
self.cmd('check', self.repository_location, exit_code=0) self.cmd('check', self.repository_location, exit_code=0)
repository = Repository(self.repository_location) repository = Repository(self.repository_location)
repository.put(b'01234567890123456789012345678901', b'xxxx') repository.put(b'0123456789012345', b'xxxx')
repository.commit() repository.commit()
repository.close() repository.close()
self.cmd('check', self.repository_location, exit_code=1) self.cmd('check', self.repository_location, exit_code=1)

View File

@ -1,6 +1,7 @@
from binascii import hexlify from binascii import hexlify
from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes from ..crypto import AES, AES_GCM_MODE, AES_CTR_MODE, pbkdf2_sha256, get_random_bytes, \
bytes_to_int, bytes16_to_int, int_to_bytes16, increment_iv
from . import BaseTestCase from . import BaseTestCase
@ -9,9 +10,27 @@ class CryptoTestCase(BaseTestCase):
def test_bytes_to_int(self): def test_bytes_to_int(self):
self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1) self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1)
def test_bytes_to_long(self): def test_bytes16_to_int(self):
self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1) i, b = 1, b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'
self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1') self.assert_equal(bytes16_to_int(b), i)
self.assert_equal(int_to_bytes16(i), b)
i, b = (1 << 64) + 2, b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\2'
self.assert_equal(bytes16_to_int(b), i)
self.assert_equal(int_to_bytes16(i), b)
def test_increment_iv(self):
tests = [
# iv, amount, iv_expected
(0, 0, 0),
(0, 15, 1),
(0, 16, 1),
(0, 17, 2),
(0xffffffffffffffff, 32, 0x10000000000000001),
]
for iv, amount, iv_expected in tests:
iv = int_to_bytes16(iv)
iv_expected = int_to_bytes16(iv_expected)
self.assert_equal(increment_iv(iv, amount), iv_expected)
def test_pbkdf2_sha256(self): def test_pbkdf2_sha256(self):
self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)), self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)),
@ -28,18 +47,33 @@ class CryptoTestCase(BaseTestCase):
self.assert_equal(len(bytes2), 10) self.assert_equal(len(bytes2), 10)
self.assert_not_equal(bytes, bytes2) self.assert_not_equal(bytes, bytes2)
def test_aes(self): def test_aes_ctr(self):
key = b'X' * 32 key = b'X' * 32
iv = b'\0' * 16
data = b'foo' * 10 data = b'foo' * 10
# encrypt # encrypt
aes = AES(is_encrypt=True, key=key) aes = AES(mode=AES_CTR_MODE, is_encrypt=True, key=key, iv=iv)
self.assert_equal(bytes_to_long(aes.iv, 8), 0) _, cdata = aes.compute_mac_and_encrypt(data)
cdata = aes.encrypt(data)
self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466') self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466')
self.assert_equal(bytes_to_long(aes.iv, 8), 2) # decrypt (correct mac/cdata)
# decrypt aes = AES(mode=AES_CTR_MODE, is_encrypt=False, key=key, iv=iv)
aes = AES(is_encrypt=False, key=key) pdata = aes.check_mac_and_decrypt(None, cdata)
self.assert_equal(bytes_to_long(aes.iv, 8), 0)
pdata = aes.decrypt(cdata)
self.assert_equal(data, pdata) self.assert_equal(data, pdata)
self.assert_equal(bytes_to_long(aes.iv, 8), 2)
def test_aes_gcm(self):
key = b'X' * 32
iv = b'A' * 16
data = b'foo' * 10
# encrypt
aes = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=iv)
mac, cdata = aes.compute_mac_and_encrypt(data)
self.assert_equal(hexlify(mac), b'c98aa10eb6b7031bcc2160878d9438fb')
self.assert_equal(hexlify(cdata), b'841bcce405df769d22ee9f7f012edf5dc7fb2594d924c7400ffd050f2741')
# decrypt (correct mac/cdata)
aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv)
pdata = aes.check_mac_and_decrypt(mac, cdata)
self.assert_equal(data, pdata)
# decrypt (incorrect mac/cdata)
aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv)
cdata = b'x' + cdata[1:] # corrupt cdata
self.assertRaises(Exception, aes.check_mac_and_decrypt, mac, cdata)

View File

@ -9,7 +9,7 @@ from . import BaseTestCase
class HashIndexTestCase(BaseTestCase): class HashIndexTestCase(BaseTestCase):
def _generic_test(self, cls, make_value, sha): def _generic_test(self, cls, make_value, sha):
idx = cls() idx = cls(key_size=32)
self.assert_equal(len(idx), 0) self.assert_equal(len(idx), 0)
# Test set # Test set
for x in range(100): for x in range(100):
@ -34,7 +34,7 @@ class HashIndexTestCase(BaseTestCase):
with open(idx_name.name, 'rb') as fd: with open(idx_name.name, 'rb') as fd:
self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha) self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
# Make sure we can open the file # Make sure we can open the file
idx = cls.read(idx_name.name) idx = cls.read(idx_name.name, key_size=32)
self.assert_equal(len(idx), 50) self.assert_equal(len(idx), 50)
for x in range(50, 100): for x in range(50, 100):
self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2)) self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
@ -42,7 +42,7 @@ class HashIndexTestCase(BaseTestCase):
self.assert_equal(len(idx), 0) self.assert_equal(len(idx), 0)
idx.write(idx_name.name) idx.write(idx_name.name)
del idx del idx
self.assert_equal(len(cls.read(idx_name.name)), 0) self.assert_equal(len(cls.read(idx_name.name, key_size=32)), 0)
def test_nsindex(self): def test_nsindex(self):
self._generic_test(NSIndex, lambda x: (x, x), self._generic_test(NSIndex, lambda x: (x, x),
@ -55,7 +55,7 @@ class HashIndexTestCase(BaseTestCase):
def test_resize(self): def test_resize(self):
n = 2000 # Must be >= MIN_BUCKETS n = 2000 # Must be >= MIN_BUCKETS
idx_name = tempfile.NamedTemporaryFile() idx_name = tempfile.NamedTemporaryFile()
idx = NSIndex() idx = NSIndex(key_size=32)
idx.write(idx_name.name) idx.write(idx_name.name)
initial_size = os.path.getsize(idx_name.name) initial_size = os.path.getsize(idx_name.name)
self.assert_equal(len(idx), 0) self.assert_equal(len(idx), 0)
@ -70,7 +70,7 @@ class HashIndexTestCase(BaseTestCase):
self.assert_equal(initial_size, os.path.getsize(idx_name.name)) self.assert_equal(initial_size, os.path.getsize(idx_name.name))
def test_iteritems(self): def test_iteritems(self):
idx = NSIndex() idx = NSIndex(key_size=32)
for x in range(100): for x in range(100):
idx[bytes('%-0.32d' % x, 'ascii')] = x, x idx[bytes('%-0.32d' % x, 'ascii')] = x, x
all = list(idx.iteritems()) all = list(idx.iteritems())

View File

@ -4,8 +4,7 @@ import shutil
import tempfile import tempfile
from binascii import hexlify from binascii import hexlify
from ..crypto import bytes_to_long, num_aes_blocks from ..key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT, increment_iv
from ..key import PlaintextKey, PassphraseKey, KeyfileKey
from ..helpers import Location, unhexlify from ..helpers import Location, unhexlify
from . import BaseTestCase from . import BaseTestCase
@ -14,22 +13,26 @@ class KeyTestCase(BaseTestCase):
class MockArgs: class MockArgs:
repository = Location(tempfile.mkstemp()[1]) repository = Location(tempfile.mkstemp()[1])
compression = COMPR_DEFAULT
mac = None
cipher = None
keyfile2_key_file = """ keyfile2_key_file = """
BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000 BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000
hqppdGVyYXRpb25zzgABhqCkaGFzaNoAIMyonNI+7Cjv0qHi0AOBM6bLGxACJhfgzVD2oq hqRzYWx02gAgA1l4jfyv22y6U/mxxDT8HodSWAcX0g3nOESrQcNnBsundmVyc2lvbgGqaX
bIS9SFqWFsZ29yaXRobaZzaGEyNTakc2FsdNoAINNK5qqJc1JWSUjACwFEWGTdM7Nd0a5l RlcmF0aW9uc84AAYagqWFsZ29yaXRobaRnbWFjpGhhc2iw7eaB54JssAOnM1S4S9CeTaRk
1uBGPEb+9XM9p3ZlcnNpb24BpGRhdGHaANAYDT5yfPpU099oBJwMomsxouKyx/OG4QIXK2 YXRh2gDQzmuyg3iYjMeTLObY+ybI+QfngB+5mmHeEAfBa42fuEZgqM3rYyMj2XfgvamF+O
hQCG2L2L/9PUu4WIuKvGrsXoP7syemujNfcZws5jLp2UPva4PkQhQsrF1RYDEMLh2eF9Ol 0asvhEyy9om190FaOxQ4RiiTMNqSP0FKLmd1i5ZyDMfRyp7JbscRFs9Ryk28yXWkv0MgQy
rwtkThq1tnh7KjWMG9Ijt7/aoQtq0zDYP/xaFF8XXSJxiyP5zjH5+spB6RL0oQHvbsliSh EAYlaycY+6lWdRSgEPxidyPl9t9dr2AI/UuiQytwqmcmXgWD6Px6wgpOS/4AcRmEvDqIIl
/cXJq7jrqmrJ1phd6dg4SHAM/i+hubadZoS6m25OQzYAW09wZD/phG8OVa698Z5ed3HTaT Rc2xsu+RevGAxk5rnrIIRPr7WB5R2cinzEn9ylDgBDt9LZbq706ELgtwVTnjWB8FBTPwVI
SmrtgJL3EoOKgUI9d6BLE4dJdBqntifo""".strip() vLTTXQ==
""".strip()
keyfile2_cdata = unhexlify(re.sub('\W', '', """ keyfile2_cdata = unhexlify(re.sub('\W', '', """
0055f161493fcfc16276e8c31493c4641e1eb19a79d0326fad0291e5a9c98e5933 0393c4102e5ce8f5e9477c9e4ce2de453121aa139600001402c41000000000000000000000000000000000
00000000000003e8d21eaf9b86c297a8cd56432e1915bb c2c407b0147a64a379d1
""")) """))
keyfile2_id = unhexlify('c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314') keyfile2_id = unhexlify('dd9451069663931c8abd85452d016733')
def setUp(self): def setUp(self):
self.tmppath = tempfile.mkdtemp() self.tmppath = tempfile.mkdtemp()
@ -45,25 +48,36 @@ class KeyTestCase(BaseTestCase):
_location = _Location() _location = _Location()
id = bytes(32) id = bytes(32)
def _test_make_testdata(self):
# modify tearDown to not kill the key file first, before using this
os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
print("keyfile2_key_file: find the it in the filesystem, see location in test log output")
print("keyfile2_cdata:", hexlify(key.encrypt(b'payload')))
print("keyfile2_id:", hexlify(key.id_hash(b'payload')))
assert False
def test_plaintext(self): def test_plaintext(self):
key = PlaintextKey.create(None, None) key = PlaintextKey.create(None, self.MockArgs())
data = b'foo' data = b'foo'
self.assert_equal(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae') self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb3')
self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data))) self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
def test_keyfile(self): def test_keyfile(self):
os.environ['BORG_PASSPHRASE'] = 'test' os.environ['BORG_PASSPHRASE'] = 'test'
key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) self.assert_equal(key.enc_iv, b'\0'*16)
manifest = key.encrypt(b'XXX') manifest = key.encrypt(b'XXX')
self.assert_equal(key.extract_nonce(manifest), 0) self.assert_equal(key.extract_iv(manifest), b'\0'*16)
manifest2 = key.encrypt(b'XXX') manifest2 = key.encrypt(b'XXX')
self.assert_not_equal(manifest, manifest2) self.assert_not_equal(manifest, manifest2)
self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2)) self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2))
self.assert_equal(key.extract_nonce(manifest2), 1) self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01')
iv = key.extract_nonce(manifest) iv = key.extract_iv(manifest)
key2 = KeyfileKey.detect(self.MockRepository(), manifest) key2 = KeyfileKey.detect(self.MockRepository(), manifest)
self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD)) # we assume that the payload fits into one 16B AES block (which is given for b'XXX').
iv_plus_1 = increment_iv(iv, 16)
self.assert_equal(key2.enc_iv, iv_plus_1)
# Key data sanity check # Key data sanity check
self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3) self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3)
self.assert_equal(key2.chunk_seed == 0, False) self.assert_equal(key2.chunk_seed == 0, False)
@ -79,25 +93,28 @@ class KeyTestCase(BaseTestCase):
def test_passphrase(self): def test_passphrase(self):
os.environ['BORG_PASSPHRASE'] = 'test' os.environ['BORG_PASSPHRASE'] = 'test'
key = PassphraseKey.create(self.MockRepository(), None) key = PassphraseKey.create(self.MockRepository(), self.MockArgs())
self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) # XXX self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
self.assert_equal(key.enc_iv, b'\0'*16)
self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6') self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6')
self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901') self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901')
self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a') self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a')
self.assert_equal(key.chunk_seed, -775740477) self.assert_equal(key.chunk_seed, -775740477)
manifest = key.encrypt(b'XXX') manifest = key.encrypt(b'XXX')
self.assert_equal(key.extract_nonce(manifest), 0) self.assert_equal(key.extract_iv(manifest), b'\0'*16)
manifest2 = key.encrypt(b'XXX') manifest2 = key.encrypt(b'XXX')
self.assert_not_equal(manifest, manifest2) self.assert_not_equal(manifest, manifest2)
self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2)) self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2))
self.assert_equal(key.extract_nonce(manifest2), 1) self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01')
iv = key.extract_nonce(manifest) iv = key.extract_iv(manifest)
key2 = PassphraseKey.detect(self.MockRepository(), manifest) key2 = PassphraseKey.detect(self.MockRepository(), manifest)
self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD)) # we assume that the payload fits into one 16B AES block (which is given for b'XXX').
iv_plus_1 = increment_iv(iv, 16)
self.assert_equal(key2.enc_iv, iv_plus_1)
self.assert_equal(key.id_key, key2.id_key) self.assert_equal(key.id_key, key2.id_key)
self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key) self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key)
self.assert_equal(key.enc_key, key2.enc_key) self.assert_equal(key.enc_key, key2.enc_key)
self.assert_equal(key.chunk_seed, key2.chunk_seed) self.assert_equal(key.chunk_seed, key2.chunk_seed)
data = b'foo' data = b'foo'
self.assert_equal(hexlify(key.id_hash(data)), b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990') self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde0501')
self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data))) self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data)))

View File

@ -9,16 +9,15 @@ from ..repository import Repository
from . import BaseTestCase from . import BaseTestCase
from .mock import patch from .mock import patch
class RepositoryTestCaseBase(BaseTestCase): class RepositoryTestCaseBase(BaseTestCase):
key_size = 32 key_size = 32
def open(self, create=False): def open(self, create=False, key_size=None):
return Repository(os.path.join(self.tmppath, 'repository'), create=create) return Repository(os.path.join(self.tmppath, 'repository'), create=create, key_size=key_size)
def setUp(self): def setUp(self):
self.tmppath = tempfile.mkdtemp() self.tmppath = tempfile.mkdtemp()
self.repository = self.open(create=True) self.repository = self.open(create=True, key_size=self.key_size)
def tearDown(self): def tearDown(self):
self.repository.close() self.repository.close()
@ -209,7 +208,8 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1] return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
def open_index(self): def open_index(self):
return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head()))) return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())),
key_size=self.key_size)
def corrupt_object(self, id_): def corrupt_object(self, id_):
idx = self.open_index() idx = self.open_index()
@ -317,8 +317,9 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
class RemoteRepositoryTestCase(RepositoryTestCase): class RemoteRepositoryTestCase(RepositoryTestCase):
def open(self, create=False): def open(self, create=False, key_size=None):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')),
create=create, key_size=key_size)
def test_invalid_rpc(self): def test_invalid_rpc(self):
self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None)) self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None))
@ -326,5 +327,6 @@ class RemoteRepositoryTestCase(RepositoryTestCase):
class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
def open(self, create=False): def open(self, create=False, key_size=None):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')),
create=create, key_size=key_size)

View File

@ -53,6 +53,7 @@ User's Guide
quickstart quickstart
usage usage
faq faq
tuning
internals internals
Getting help Getting help

147
docs/tuning.rst Normal file
View File

@ -0,0 +1,147 @@
.. _tuning:
.. include:: global.rst.inc
Tuning
======
General hints
-------------
CPU load, backup speed, memory and storage usage are covered below.
As performance and resource usage depend on a lot of factors, you may need to
tweak the parameters a bit and retry until you found the best ones for your
setup.
Usually, the default parameters are selected for best speed under the assumption
that you run a modern machine with fast CPU, fast I/O and a good amount of RAM.
If you run an older or low-resource machine or your backup target or connection
to it is slow, tweaking parameters might give significant speedups.
Exclude crap data
-----------------
Maybe you don't want to backup:
* cache / temporary files (they can be rebuilt / are useless)
* specific directories / filenames / file extensions you do not need
* backups (some people make backups of backups...)
You can exclude these, so they don't waste time and space.
Avoid scrolling
---------------
If you do benchmarks, avoid creating a lot of log output, especially if it
means scrolling text in a window on a graphical user interface.
Rather use much less log output or at least redirect the output to a log file,
that is also much faster than scrolling.
Speed (in general)
------------------
Keep an eye on CPU and I/O bounds. Try to find the sweet spot in the middle
where it is not too much I/O bound and not too much CPU bound.
I/O bound
~~~~~~~~~
If CPU load does not sum up to 1 core fully loaded while backing up, the
process is likely I/O bound (can't read or write data fast enough).
Maybe you want to try higher compression then so it has less data to write.
Or get faster I/O, if possible.
CPU bound
~~~~~~~~~
If you have 1 core fully loaded most of the time, but your backup seems slow,
the process is likely CPU bound (can't compute fast enough).
Maybe you want to try lower compression then so it has less to compute.
Using a faster MAC or cipher method might also be an option.
Or get a faster CPU.
I/O speed
---------
From fast to slower:
* fast local filesystem, SSD or HDD, via PCIe, SATA, USB
* ssh connection to a remote server's borg instance
* mounted network filesystems of a remote server
Not only throughput influences timing, latency does also.
Backup space needed
-------------------
If you always backup the same data mostly, you will often save a lot of space
due to deduplication - this works independently from compression.
To avoid running out of space, regularly prune your backup archives according
to your needs. Backups of same machine which are close in time are usually
very cheap (because most data is same and deduplicated).
Compression
-----------
If you have a fast backup source and destination and you are not low on backup space:
Switch off compression, your backup will run faster and with less cpu load.
If you just want to save a bit space, but stay relatively fast:
Try zlib level 1.
If you have very slow source or destination (e.g. a remote backup space via a
network connection that is quite slower than your local and remote storage):
Try a higher zlib or lzma.
Authentication & MAC selection
------------------------------
Real MACs (Message Authentication Codes) can only be used when a secret key is
available. It is signing your backup data and can detect malicious tampering.
Without a key, a simple hash will be used (which helps to detect accidental
data corruption, but can not detect malicious data tampering).
Older or simple 32bit machine architecture
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use sha256 (no key) or hmac-sha256 (key).
64bit architecture, but no AES hardware acceleration in the CPU
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use sha512-256 (no key) or hmac-sha512-256 (key).
Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use ghash (no key) or gmac (key).
Encryption & Cipher selection
-----------------------------
Always encrypt your backups (and keep passphrase and key file [if any] safe).
The cipher selection chooses between misc. AEAD ciphers (authenticated
encryption with associated data), it is EtM (encrypt-then-mac):
Older or simple 32bit machine architecture
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use aes256-ctr + hmac-sha256.
64bit architecture, but no AES hardware acceleration in the CPU
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use aes256-ctr + hmac-sha512-256.
Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use aes256-gcm (AEAD 1-pass cipher).
RAM usage
---------
Depending on the amount of files and chunks in the repository, memory usage
varies:
* about 250+B RAM per file (for "files" cache)
* about 44B RAM per 64kiB chunk (for "chunks" cache)
* about 40B RAM per 64kiB chunk (for repository index, if remote repo is used,
this will be allocated on remote side)
If you run into memory usage issues, your options are:
* get more RAM (or more swapspace, speed will be slower)
* disable the "files" cache, speed will be slower
* have less files / chunks per repo
Note: RAM compression likely won't help as a lot of that data is using
msgpack, which is already rather efficient.

View File

@ -102,6 +102,12 @@ elif sys.platform.startswith('freebsd'):
elif sys.platform == 'darwin': elif sys.platform == 'darwin':
ext_modules.append(Extension('borg.platform_darwin', [platform_darwin_source])) ext_modules.append(Extension('borg.platform_darwin', [platform_darwin_source]))
# msgpack pure python data corruption was fixed in 0.4.6.
# Also, we might use some rather recent API features.
install_requires=['msgpack-python>=0.4.6', 'blosc>=1.2.5']
if sys.version_info < (3, 3):
install_requires.append('backports.lzma')
setup( setup(
name='borgbackup', name='borgbackup',
version=versioneer.get_version(), version=versioneer.get_version(),
@ -132,7 +138,5 @@ setup(
scripts=['scripts/borg'], scripts=['scripts/borg'],
cmdclass=cmdclass, cmdclass=cmdclass,
ext_modules=ext_modules, ext_modules=ext_modules,
# msgpack pure python data corruption was fixed in 0.4.6. install_requires=install_requires,
# Also, we might use some rather recent API features.
install_requires=['msgpack-python>=0.4.6']
) )